[x264-devel] [PATCH 09/24] arm: Add x264_nal_escape_neon
Janne Grunau
janne-x264 at jannau.net
Tue Aug 18 11:11:27 CEST 2015
On 2015-08-13 23:59:30 +0300, Martin Storsjö wrote:
> checkasm timing Cortex-A7 A8 A9
> nal_escape_c 908338 878032 633692
> nal_escape_neon 379946 451936 373471
> ---
> Makefile | 2 +-
> common/arm/bitstream-a.S | 89 ++++++++++++++++++++++++++++++++++++++++++++++
> common/bitstream.c | 4 +++
> 3 files changed, 94 insertions(+), 1 deletion(-)
> create mode 100644 common/arm/bitstream-a.S
>
> diff --git a/Makefile b/Makefile
> index 6193c59..4403a11 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -119,7 +119,7 @@ ifeq ($(SYS_ARCH),ARM)
> ifneq ($(AS),)
> ASMSRC += common/arm/cpu-a.S common/arm/pixel-a.S common/arm/mc-a.S \
> common/arm/dct-a.S common/arm/quant-a.S common/arm/deblock-a.S \
> - common/arm/predict-a.S
> + common/arm/predict-a.S common/arm/bitstream-a.S
> SRCS += common/arm/mc-c.c common/arm/predict-c.c
> OBJASM = $(ASMSRC:%.S=%.o)
> endif
> diff --git a/common/arm/bitstream-a.S b/common/arm/bitstream-a.S
> new file mode 100644
> index 0000000..62f9c96
> --- /dev/null
> +++ b/common/arm/bitstream-a.S
> @@ -0,0 +1,89 @@
> +/*****************************************************************************
> + * bitstream-a.S: arm bitstream functions
> + *****************************************************************************
> + * Copyright (C) 2014-2015 x264 project
> + *
> + * Authors: Janne Grunau <janne-x264 at jannau.net>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
> + *
> + * This program is also available under a commercial proprietary license.
> + * For more information, contact us at licensing at x264.com.
> + *****************************************************************************/
> +
> +#include "asm.S"
> +
> +function x264_nal_escape_neon
> + push {r4-r9}
I'm not quite sure if you need all those registers. I certainly only
used that many because arm64 has enough caller saved registers. Also lr
is usually in the register list when registers are pushed/popped to/from
the stack. The function returns becomes then pop {rx-ry, pc} instead of
pop; bx
> + vpush {q4-q7}
please use q8-q15, I know this register number conflicts are annoying
when porting neon between arm and arm64 (both ways, I endured )
> + vmov.u8 q0, #0xff
> + vmov.u8 q4, #4
> + mov r3, #3
> + subs r6, r1, r2
> + beq 99f
> +0:
> + cmn r6, #15
> + blt 16f
> + mov r1, r2
> + b 100f
> +16:
> + vld1.8 {q1}, [r1]!
> + vext.8 q2, q0, q1, #14
> + vext.8 q3, q0, q1, #15
> + vcgt.u8 q7, q4, q1
> + vceq.u8 q5, q2, #0
> + vceq.u8 q6, q3, #0
> + vand q5, q5, q7
> + vand q5, q5, q6
> + vshrn.u16 d14, q5, #4
> + vmov r7, r8, d14
> + orrs r7, r7, r8
> + beq 16f
> + mov r6, #-16
> +100:
> + vmov.u8 r5, d1[6]
> + vmov.u8 r4, d1[7]
> + orr r5, r4, r5, lsl #8
> +101:
> + ldrb r4, [r1, r6]
> + orr r9, r4, r5, lsl #16
> + cmp r9, #3
> + bhi 102f
> + strb r3, [r0], #1
> + orr r5, r3, r5, lsl #8
> +102:
> + adds r6, r6, #1
> + strb r4, [r0], #1
> + orr r5, r4, r5, lsl #8
> + blt 101b
> + subs r6, r1, r2
> + lsr r9, r5, #8
> + vmov.u8 d1[6], r9
> + vmov.u8 d1[7], r5
> + blt 0b
> +
> + vpop {q4-q7}
> + pop {r4-r9}
> + bx lr
> +16:
> + subs r6, r1, r2
> + vst1.8 {q1}, [r0]!
> + vmov q0, q1
> + blt 0b
> +99:
> + vpop {q4-q7}
> + pop {r4-r9}
> + bx lr
> +endfunc
> diff --git a/common/bitstream.c b/common/bitstream.c
> index 6ca1f44..ec9836a 100644
> --- a/common/bitstream.c
> +++ b/common/bitstream.c
> @@ -144,6 +144,10 @@ void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf )
> }
> #endif
> #endif
> +#if HAVE_ARMV6
> + if( cpu&X264_CPU_NEON )
> + pf->nal_escape = x264_nal_escape_neon;
> +#endif
> #if ARCH_AARCH64
> if( cpu&X264_CPU_NEON )
> pf->nal_escape = x264_nal_escape_neon;
otherwise ok
Janne
More information about the x264-devel
mailing list