[x264-devel] [PATCH] x264_store_interleave_chroma_neon, x264_load_deinterleave_chroma_fdec/fenc
George Stephanos
gaf.stephanos at gmail.com
Thu Feb 9 01:01:31 CET 2012
---
common/arm/mc-a.S | 38 ++++++++++++++++++++++++++++++++++++++
common/arm/mc-c.c | 7 +++++++
2 files changed, 45 insertions(+), 0 deletions(-)
diff --git a/common/arm/mc-a.S b/common/arm/mc-a.S
index a714e9b..07d6436 100644
--- a/common/arm/mc-a.S
+++ b/common/arm/mc-a.S
@@ -1355,6 +1355,44 @@ lowres_xloop_end:
vpop {d8-d15}
pop {r4-r10,pc}
.endfunc
+
+function x264_store_interleave_chroma_neon
+ push {r4-r5, lr}
+ ldr r4, [sp, #12]
+ mov r5, FDEC_STRIDE
+1:
+ vld1.64 {d0}, [r2], r5
+ vld1.64 {d1}, [r3], r5
+ vzip.8 d0, d1
+ subs r4, #1
+ vst1.64 {d0, d1}, [r0], r1
+ bgt 1b
+ pop {r4-r5, pc}
+.endfunc
+
+.macro LOAD_DEINTERLEAVE_CHROMA, stride
+function x264_load_deinterleave_chroma_\stride\()_neon
+ push {r4-r5, lr}
+.if \stride == fdec
+ mov r4, #FDEC_STRIDE
+.else
+ mov r4, #FENC_STRIDE
+.endif
+ add r5, r0, r4, lsr #1
+1:
+ vld1.64 {d0, d1}, [r1], r2
+ vuzp.8 d0, d1
+ vst1.64 {d0}, [r0], r4
+ vst1.64 {d1}, [r5], r4
+ subs r3, #1
+ bgt 1b
+ pop {r4-r5, pc}
+.endfunc
+.endm
+
+LOAD_DEINTERLEAVE_CHROMA fenc
+LOAD_DEINTERLEAVE_CHROMA fdec
+
//void x264_plane_copy_interleave_c( pixel *dst, int i_dst,
//pixel *srcu, int i_srcu,
//pixel *srcv, int i_srcv, int w, int h )
diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c
index c61d29d..55f0092 100644
--- a/common/arm/mc-c.c
+++ b/common/arm/mc-c.c
@@ -79,6 +79,10 @@ void x264_hpel_filter_v_neon( uint8_t *, uint8_t *, int16_t *, int, int );
void x264_hpel_filter_c_neon( uint8_t *, int16_t *, int );
void x264_hpel_filter_h_neon( uint8_t *, uint8_t *, int );
+void x264_store_interleave_chroma_neon( uint8_t *, int, uint8_t*, uint8_t *, int );
+void x264_load_deinterleave_chroma_fenc_neon( uint8_t *, uint8_t *, int, int );
+void x264_load_deinterleave_chroma_fdec_neon( uint8_t *, uint8_t *, int, int );
+
void x264_plane_copy_interleave_core_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int, int, int );
void x264_plane_copy_deinterleave_neon( pixel *, int, pixel *, int, pixel *, int, int, int );
@@ -263,6 +267,9 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
pf->get_ref = get_ref_neon;
pf->hpel_filter = hpel_filter_neon;
pf->frame_init_lowres_core = x264_frame_init_lowres_core_neon;
+ pf->store_interleave_chroma = x264_store_interleave_chroma_neon;
+ pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_neon;
+ pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_neon;
pf->plane_copy_interleave = x264_plane_copy_interleave_neon;
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon;
#endif // !HIGH_BIT_DEPTH
--
1.7.4.1
More information about the x264-devel
mailing list