[x264-devel] [PATCH] x264_store_interleave_chroma_neon, x264_load_deinterleave_chroma_fdec/fenc

George Stephanos gaf.stephanos at gmail.com
Thu Feb 9 01:01:31 CET 2012


---
 common/arm/mc-a.S |   38 ++++++++++++++++++++++++++++++++++++++
 common/arm/mc-c.c |    7 +++++++
 2 files changed, 45 insertions(+), 0 deletions(-)

diff --git a/common/arm/mc-a.S b/common/arm/mc-a.S
index a714e9b..07d6436 100644
--- a/common/arm/mc-a.S
+++ b/common/arm/mc-a.S
@@ -1355,6 +1355,44 @@ lowres_xloop_end:
     vpop            {d8-d15}
     pop             {r4-r10,pc}
 .endfunc
+
+function x264_store_interleave_chroma_neon
+    push        {r4-r5, lr}
+    ldr         r4, [sp, #12]
+    mov         r5, FDEC_STRIDE
+1:
+    vld1.64     {d0}, [r2], r5
+    vld1.64     {d1}, [r3], r5
+    vzip.8      d0, d1
+    subs        r4, #1
+    vst1.64     {d0, d1}, [r0], r1
+    bgt         1b
+    pop         {r4-r5, pc}
+.endfunc
+
+.macro LOAD_DEINTERLEAVE_CHROMA, stride
+function x264_load_deinterleave_chroma_\stride\()_neon
+    push        {r4-r5, lr}
+.if \stride == fdec
+    mov         r4, #FDEC_STRIDE
+.else
+    mov         r4, #FENC_STRIDE
+.endif
+    add         r5, r0, r4, lsr #1
+1:
+    vld1.64     {d0, d1}, [r1], r2
+    vuzp.8      d0, d1
+    vst1.64     {d0}, [r0], r4
+    vst1.64     {d1}, [r5], r4
+    subs        r3, #1
+    bgt         1b
+    pop         {r4-r5, pc}
+.endfunc
+.endm
+
+LOAD_DEINTERLEAVE_CHROMA fenc
+LOAD_DEINTERLEAVE_CHROMA fdec
+
 //void x264_plane_copy_interleave_c( pixel *dst, int i_dst,
                                    //pixel *srcu, int i_srcu,
                                    //pixel *srcv, int i_srcv, int w, int h )
diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c
index c61d29d..55f0092 100644
--- a/common/arm/mc-c.c
+++ b/common/arm/mc-c.c
@@ -79,6 +79,10 @@ void x264_hpel_filter_v_neon( uint8_t *, uint8_t *, int16_t *, int, int );
 void x264_hpel_filter_c_neon( uint8_t *, int16_t *, int );
 void x264_hpel_filter_h_neon( uint8_t *, uint8_t *, int );
 
+void x264_store_interleave_chroma_neon( uint8_t *, int, uint8_t*, uint8_t *, int );
+void x264_load_deinterleave_chroma_fenc_neon( uint8_t *, uint8_t *, int, int );
+void x264_load_deinterleave_chroma_fdec_neon( uint8_t *, uint8_t *, int, int );
+
 void x264_plane_copy_interleave_core_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int, int, int );
 void x264_plane_copy_deinterleave_neon( pixel *, int, pixel *, int, pixel *, int, int, int );
 
@@ -263,6 +267,9 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
     pf->get_ref = get_ref_neon;
     pf->hpel_filter = hpel_filter_neon;
     pf->frame_init_lowres_core = x264_frame_init_lowres_core_neon;
+    pf->store_interleave_chroma = x264_store_interleave_chroma_neon;
+    pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_neon;
+    pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_neon;
     pf->plane_copy_interleave = x264_plane_copy_interleave_neon;
     pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon;
 #endif // !HIGH_BIT_DEPTH
-- 
1.7.4.1



More information about the x264-devel mailing list