[x264-devel] [PATCH 4/9] arm: x264_plane_copy_deinterleave_neon
Janne Grunau
janne-x264 at jannau.net
Sun Mar 16 23:26:41 CET 2014
plane_copy_deinterleave_c: 42988
plane_copy_deinterleave_neon: 10184
---
common/arm/mc-a.S | 26 ++++++++++++++++++++++++++
common/arm/mc-c.c | 6 ++++++
2 files changed, 32 insertions(+)
diff --git a/common/arm/mc-a.S b/common/arm/mc-a.S
index 6267e35..179315c 100644
--- a/common/arm/mc-a.S
+++ b/common/arm/mc-a.S
@@ -1465,3 +1465,29 @@ lowres_xloop_end:
vpop {d8-d15}
pop {r4-r10,pc}
.endfunc
+
+function x264_plane_copy_deinterleave_neon
+ push {r4-r7, lr}
+ ldrd r6, r7, [sp, #28]
+ ldrd r4, r5, [sp, #20]
+ add lr, r6, #15
+ bic lr, lr, #15
+ sub r1, r1, lr
+ sub r3, r3, lr
+ sub r5, r5, lr, lsl #1
+block:
+ vld2.8 {d0-d3}, [r4,:128]!
+ subs lr, lr, #16
+ vst1.8 {q0}, [r0]!
+ vst1.8 {q1}, [r2]!
+ bgt block
+
+ add r4, r4, r5
+ subs r7, r7, #1
+ add r0, r0, r1
+ add r2, r2, r3
+ mov lr, r6
+ bgt block
+
+ pop {r4-r7, pc}
+.endfunc
diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c
index d68369f..bf5e2eb 100644
--- a/common/arm/mc-c.c
+++ b/common/arm/mc-c.c
@@ -47,6 +47,10 @@ void x264_pixel_avg2_w8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t
void x264_pixel_avg2_w16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
void x264_pixel_avg2_w20_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
+void x264_plane_copy_deinterleave_neon( pixel *dstu, intptr_t i_dstu,
+ pixel *dstv, intptr_t i_dstv,
+ pixel *src, intptr_t i_src, int w, int h );
+
#define MC_WEIGHT(func)\
void x264_mc_weight_w20##func##_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
void x264_mc_weight_w16##func##_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
@@ -225,6 +229,8 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
pf->copy[PIXEL_8x8] = x264_mc_copy_w8_neon;
pf->copy[PIXEL_4x4] = x264_mc_copy_w4_neon;
+ pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon;
+
pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_neon;
pf->avg[PIXEL_16x8] = x264_pixel_avg_16x8_neon;
pf->avg[PIXEL_8x16] = x264_pixel_avg_8x16_neon;
--
1.9.0
More information about the x264-devel
mailing list