[x264-devel] arm: x264_plane_copy_interleave_neon

Janne Grunau git at videolan.org
Wed Apr 23 00:40:57 CEST 2014


x264 | branch: master | Janne Grunau <janne-x264 at jannau.net> | Sat Mar 15 19:55:50 2014 +0100| [49a8091beed182a4fb090e4752580aacbfc2f8cd] | committer: Jason Garrett-Glaser

arm: x264_plane_copy_interleave_neon

plane_copy_interleave_c: 40285
plane_copy_interleave_neon: 10137

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=49a8091beed182a4fb090e4752580aacbfc2f8cd
---

 common/arm/mc-a.S |   27 +++++++++++++++++++++++++++
 common/arm/mc-c.c |    4 ++++
 2 files changed, 31 insertions(+)

diff --git a/common/arm/mc-a.S b/common/arm/mc-a.S
index e9a5f86..6274c59 100644
--- a/common/arm/mc-a.S
+++ b/common/arm/mc-a.S
@@ -1569,3 +1569,30 @@ block4:
 
     pop             {r4-r8, r10, r11, pc}
 .endfunc
+
+function x264_plane_copy_interleave_neon
+    push            {r4-r7, lr}
+    ldrd            r6, r7, [sp, #28]
+    ldrd            r4, r5, [sp, #20]
+    add             lr,  r6,  #15
+    bic             lr,  lr,  #15
+    sub             r1,  r1,  lr, lsl #1
+    sub             r3,  r3,  lr
+    sub             r5,  r5,  lr
+blocki:
+    vld1.8          {q0}, [r2]!
+    vld1.8          {q1}, [r4]!
+    subs            lr,  lr,  #16
+    vst2.8          {d0,d2}, [r0]!
+    vst2.8          {d1,d3}, [r0]!
+    bgt             blocki
+
+    subs            r7,  r7,  #1
+    add             r0,  r0,  r1
+    add             r2,  r2,  r3
+    add             r4,  r4,  r5
+    mov             lr,  r6
+    bgt             blocki
+
+    pop             {r4-r7, pc}
+.endfunc
diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c
index 48b868e..e134e96 100644
--- a/common/arm/mc-c.c
+++ b/common/arm/mc-c.c
@@ -54,6 +54,9 @@ void x264_plane_copy_deinterleave_rgb_neon( pixel *dsta, intptr_t i_dsta,
                                             pixel *dstb, intptr_t i_dstb,
                                             pixel *dstc, intptr_t i_dstc,
                                             pixel *src,  intptr_t i_src, int pw, int w, int h );
+void x264_plane_copy_interleave_neon( pixel *dst,  intptr_t i_dst,
+                                      pixel *srcu, intptr_t i_srcu,
+                                      pixel *srcv, intptr_t i_srcv, int w, int h );
 
 void x264_load_deinterleave_chroma_fdec_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
 void x264_load_deinterleave_chroma_fenc_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
@@ -238,6 +241,7 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
 
     pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon;
     pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_neon;
+    pf->plane_copy_interleave = x264_plane_copy_interleave_neon;
 
     pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_neon;
     pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_neon;



More information about the x264-devel mailing list