[x264-devel] arm: x264_plane_copy_deinterleave_rgb_neon
Janne Grunau
git at videolan.org
Wed Apr 23 00:40:57 CEST 2014
x264 | branch: master | Janne Grunau <janne-x264 at jannau.net> | Sat Mar 15 19:21:12 2014 +0100| [156a2ea00c06e34b215ab9e66acc464204536374] | committer: Jason Garrett-Glaser
arm: x264_plane_copy_deinterleave_rgb_neon
plane_copy_deinterleave_rgb_c: 31543
plane_copy_deinterleave_rgb_neon: 8312
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=156a2ea00c06e34b215ab9e66acc464204536374
---
common/arm/mc-a.S | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
common/arm/mc-c.c | 5 +++++
2 files changed, 57 insertions(+)
diff --git a/common/arm/mc-a.S b/common/arm/mc-a.S
index df9e2fb..e9a5f86 100644
--- a/common/arm/mc-a.S
+++ b/common/arm/mc-a.S
@@ -1517,3 +1517,55 @@ block:
pop {r4-r7, pc}
.endfunc
+
+function x264_plane_copy_deinterleave_rgb_neon
+ push {r4-r8, r10, r11, lr}
+ ldrd r4, r5, [sp, #32]
+ ldrd r6, r7, [sp, #40]
+ ldr r8, [sp, #48]
+ ldrd r10, r11, [sp, #52]
+ add lr, r10, #7
+ subs r8, r8, #3
+ bic lr, lr, #7
+ sub r7, r7, lr, lsl #1
+ sub r1, r1, lr
+ sub r3, r3, lr
+ sub r5, r5, lr
+ subne r7, r7, lr, lsl #1
+ subeq r7, r7, lr
+ bne block4
+block3:
+ vld3.8 {d0,d1,d2}, [r6]!
+ subs lr, lr, #8
+ vst1.8 {d0}, [r0]!
+ vst1.8 {d1}, [r2]!
+ vst1.8 {d2}, [r4]!
+ bgt block3
+
+ subs r11, r11, #1
+ add r0, r0, r1
+ add r2, r2, r3
+ add r4, r4, r5
+ add r6, r6, r7
+ mov lr, r10
+ bgt block3
+
+ pop {r4-r8, r10, r11, pc}
+block4:
+ vld4.8 {d0,d1,d2,d3}, [r6]!
+ subs lr, lr, #8
+ vst1.8 {d0}, [r0]!
+ vst1.8 {d1}, [r2]!
+ vst1.8 {d2}, [r4]!
+ bgt block4
+
+ subs r11, r11, #1
+ add r0, r0, r1
+ add r2, r2, r3
+ add r4, r4, r5
+ add r6, r6, r7
+ mov lr, r10
+ bgt block4
+
+ pop {r4-r8, r10, r11, pc}
+.endfunc
diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c
index e50d736..48b868e 100644
--- a/common/arm/mc-c.c
+++ b/common/arm/mc-c.c
@@ -50,6 +50,10 @@ void x264_pixel_avg2_w20_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t
void x264_plane_copy_deinterleave_neon( pixel *dstu, intptr_t i_dstu,
pixel *dstv, intptr_t i_dstv,
pixel *src, intptr_t i_src, int w, int h );
+void x264_plane_copy_deinterleave_rgb_neon( pixel *dsta, intptr_t i_dsta,
+ pixel *dstb, intptr_t i_dstb,
+ pixel *dstc, intptr_t i_dstc,
+ pixel *src, intptr_t i_src, int pw, int w, int h );
void x264_load_deinterleave_chroma_fdec_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
void x264_load_deinterleave_chroma_fenc_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
@@ -233,6 +237,7 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
pf->copy[PIXEL_4x4] = x264_mc_copy_w4_neon;
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon;
+ pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_neon;
pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_neon;
pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_neon;
More information about the x264-devel
mailing list