[x264-devel] arm: x264_plane_copy_deinterleave_neon

Janne Grunau git at videolan.org
Wed Apr 23 00:40:57 CEST 2014


x264 | branch: master | Janne Grunau <janne-x264 at jannau.net> | Sat Mar 15 17:22:08 2014 +0100| [a069d4f7b46c20e6a7b59cd4ba1f99d45d9d912f] | committer: Jason Garrett-Glaser

arm: x264_plane_copy_deinterleave_neon

plane_copy_deinterleave_c: 42988
plane_copy_deinterleave_neon: 10184

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=a069d4f7b46c20e6a7b59cd4ba1f99d45d9d912f
---

 common/arm/mc-a.S |   26 ++++++++++++++++++++++++++
 common/arm/mc-c.c |    6 ++++++
 2 files changed, 32 insertions(+)

diff --git a/common/arm/mc-a.S b/common/arm/mc-a.S
index 6267e35..179315c 100644
--- a/common/arm/mc-a.S
+++ b/common/arm/mc-a.S
@@ -1465,3 +1465,29 @@ lowres_xloop_end:
     vpop            {d8-d15}
     pop             {r4-r10,pc}
 .endfunc
+
+function x264_plane_copy_deinterleave_neon
+    push            {r4-r7, lr}
+    ldrd            r6, r7, [sp, #28]
+    ldrd            r4, r5, [sp, #20]
+    add             lr,  r6,  #15
+    bic             lr,  lr,  #15
+    sub             r1,  r1,  lr
+    sub             r3,  r3,  lr
+    sub             r5,  r5,  lr, lsl #1
+block:
+    vld2.8          {d0-d3}, [r4,:128]!
+    subs            lr,  lr,  #16
+    vst1.8          {q0},    [r0]!
+    vst1.8          {q1},    [r2]!
+    bgt             block
+
+    add             r4,  r4,  r5
+    subs            r7,  r7,  #1
+    add             r0,  r0,  r1
+    add             r2,  r2,  r3
+    mov             lr,  r6
+    bgt             block
+
+    pop             {r4-r7, pc}
+.endfunc
diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c
index d68369f..bf5e2eb 100644
--- a/common/arm/mc-c.c
+++ b/common/arm/mc-c.c
@@ -47,6 +47,10 @@ void x264_pixel_avg2_w8_neon ( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t
 void x264_pixel_avg2_w16_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
 void x264_pixel_avg2_w20_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t *, int );
 
+void x264_plane_copy_deinterleave_neon(  pixel *dstu, intptr_t i_dstu,
+                                         pixel *dstv, intptr_t i_dstv,
+                                         pixel *src,  intptr_t i_src, int w, int h );
+
 #define MC_WEIGHT(func)\
 void x264_mc_weight_w20##func##_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
 void x264_mc_weight_w16##func##_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\
@@ -225,6 +229,8 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
     pf->copy[PIXEL_8x8]   = x264_mc_copy_w8_neon;
     pf->copy[PIXEL_4x4]   = x264_mc_copy_w4_neon;
 
+    pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon;
+
     pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_neon;
     pf->avg[PIXEL_16x8]  = x264_pixel_avg_16x8_neon;
     pf->avg[PIXEL_8x16]  = x264_pixel_avg_8x16_neon;



More information about the x264-devel mailing list