[x264-devel] arm: x264_plane_copy_interleave_neon
Janne Grunau
git at videolan.org
Wed Apr 23 00:40:57 CEST 2014
x264 | branch: master | Janne Grunau <janne-x264 at jannau.net> | Sat Mar 15 19:55:50 2014 +0100| [49a8091beed182a4fb090e4752580aacbfc2f8cd] | committer: Jason Garrett-Glaser
arm: x264_plane_copy_interleave_neon
plane_copy_interleave_c: 40285
plane_copy_interleave_neon: 10137
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=49a8091beed182a4fb090e4752580aacbfc2f8cd
---
common/arm/mc-a.S | 27 +++++++++++++++++++++++++++
common/arm/mc-c.c | 4 ++++
2 files changed, 31 insertions(+)
diff --git a/common/arm/mc-a.S b/common/arm/mc-a.S
index e9a5f86..6274c59 100644
--- a/common/arm/mc-a.S
+++ b/common/arm/mc-a.S
@@ -1569,3 +1569,30 @@ block4:
pop {r4-r8, r10, r11, pc}
.endfunc
+
+function x264_plane_copy_interleave_neon
+ push {r4-r7, lr}
+ ldrd r6, r7, [sp, #28]
+ ldrd r4, r5, [sp, #20]
+ add lr, r6, #15
+ bic lr, lr, #15
+ sub r1, r1, lr, lsl #1
+ sub r3, r3, lr
+ sub r5, r5, lr
+blocki:
+ vld1.8 {q0}, [r2]!
+ vld1.8 {q1}, [r4]!
+ subs lr, lr, #16
+ vst2.8 {d0,d2}, [r0]!
+ vst2.8 {d1,d3}, [r0]!
+ bgt blocki
+
+ subs r7, r7, #1
+ add r0, r0, r1
+ add r2, r2, r3
+ add r4, r4, r5
+ mov lr, r6
+ bgt blocki
+
+ pop {r4-r7, pc}
+.endfunc
diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c
index 48b868e..e134e96 100644
--- a/common/arm/mc-c.c
+++ b/common/arm/mc-c.c
@@ -54,6 +54,9 @@ void x264_plane_copy_deinterleave_rgb_neon( pixel *dsta, intptr_t i_dsta,
pixel *dstb, intptr_t i_dstb,
pixel *dstc, intptr_t i_dstc,
pixel *src, intptr_t i_src, int pw, int w, int h );
+void x264_plane_copy_interleave_neon( pixel *dst, intptr_t i_dst,
+ pixel *srcu, intptr_t i_srcu,
+ pixel *srcv, intptr_t i_srcv, int w, int h );
void x264_load_deinterleave_chroma_fdec_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
void x264_load_deinterleave_chroma_fenc_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
@@ -238,6 +241,7 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon;
pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_neon;
+ pf->plane_copy_interleave = x264_plane_copy_interleave_neon;
pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_neon;
pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_neon;
More information about the x264-devel
mailing list