[x264-devel] [PATCH 2/2] arm64: implement x264_plane_copy_swap_neon
Janne Grunau
janne-x264 at jannau.net
Fri Aug 26 19:26:56 CEST 2016
plane_copy_swap_c: 27054
plane_copy_swap_neon: 4152
---
common/aarch64/mc-a.S | 28 ++++++++++++++++++++++++++++
common/aarch64/mc-c.c | 4 ++++
2 files changed, 32 insertions(+)
diff --git a/common/aarch64/mc-a.S b/common/aarch64/mc-a.S
index fe0f870..3a99fbe 100644
--- a/common/aarch64/mc-a.S
+++ b/common/aarch64/mc-a.S
@@ -1281,6 +1281,34 @@ function x264_plane_copy_core_neon, export=1
ret
endfunc
+function x264_plane_copy_swap_core_neon, export=1
+ lsl w4, w4, #1
+ sub x1, x1, x4
+ sub x3, x3, x4
+1:
+ mov w8, w4
+ tbz w4, #4, 32f
+ subs w8, w8, #16
+ ld1 {v0.16b}, [x2], #16
+ rev16 v0.16b, v0.16b
+ st1 {v0.16b}, [x0], #16
+ b.eq 0f
+32:
+ subs w8, w8, #32
+ ld1 {v0.16b,v1.16b}, [x2], #32
+ rev16 v0.16b, v0.16b
+ rev16 v1.16b, v1.16b
+ st1 {v0.16b,v1.16b}, [x0], #32
+ b.gt 32b
+0:
+ subs w5, w5, #1
+ add x2, x2, x3
+ add x0, x0, x1
+ b.gt 1b
+
+ ret
+endfunc
+
function x264_plane_copy_deinterleave_neon, export=1
add w9, w6, #15
and w9, w9, #0xfffffff0
diff --git a/common/aarch64/mc-c.c b/common/aarch64/mc-c.c
index 4f93965..09794d8 100644
--- a/common/aarch64/mc-c.c
+++ b/common/aarch64/mc-c.c
@@ -51,6 +51,8 @@ void x264_pixel_avg2_w20_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t
void x264_plane_copy_core_neon( pixel *dst, intptr_t i_dst,
pixel *src, intptr_t i_src, int w, int h );
+void x264_plane_copy_swap_core_neon( pixel *dst, intptr_t i_dst,
+ pixel *src, intptr_t i_src, int w, int h );
void x264_plane_copy_deinterleave_neon( pixel *dstu, intptr_t i_dstu,
pixel *dstv, intptr_t i_dstv,
pixel *src, intptr_t i_src, int w, int h );
@@ -208,6 +210,7 @@ void x264_hpel_filter_neon( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc,
int height, int16_t *buf );
PLANE_COPY(16, neon)
+PLANE_COPY_SWAP(16, neon)
PLANE_INTERLEAVE(neon)
#endif // !HIGH_BIT_DEPTH
@@ -232,6 +235,7 @@ void x264_mc_init_aarch64( int cpu, x264_mc_functions_t *pf )
pf->copy[PIXEL_4x4] = x264_mc_copy_w4_neon;
pf->plane_copy = x264_plane_copy_neon;
+ pf->plane_copy_swap = x264_plane_copy_swap_neon;
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon;
pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_neon;
pf->plane_copy_interleave = x264_plane_copy_interleave_neon;
--
2.9.3
More information about the x264-devel
mailing list