[x264-devel] [PATCH 03/11] arm: Add x264_nal_escape_neon
Martin Storsjö
martin at martin.st
Tue Aug 25 13:38:12 CEST 2015
checkasm timing Cortex-A7 A8 A9
nal_escape_c 852758 879566 655497
nal_escape_neon 376831 450678 371673
---
Avoid clobbering neon registers needlessly; reduce the number
of used GPRs, return using pop {..,pc}.
---
Makefile | 2 +-
common/arm/bitstream-a.S | 84 ++++++++++++++++++++++++++++++++++++++++++++++
common/bitstream.c | 4 +++
3 files changed, 89 insertions(+), 1 deletion(-)
create mode 100644 common/arm/bitstream-a.S
diff --git a/Makefile b/Makefile
index 6193c59..4403a11 100644
--- a/Makefile
+++ b/Makefile
@@ -119,7 +119,7 @@ ifeq ($(SYS_ARCH),ARM)
ifneq ($(AS),)
ASMSRC += common/arm/cpu-a.S common/arm/pixel-a.S common/arm/mc-a.S \
common/arm/dct-a.S common/arm/quant-a.S common/arm/deblock-a.S \
- common/arm/predict-a.S
+ common/arm/predict-a.S common/arm/bitstream-a.S
SRCS += common/arm/mc-c.c common/arm/predict-c.c
OBJASM = $(ASMSRC:%.S=%.o)
endif
diff --git a/common/arm/bitstream-a.S b/common/arm/bitstream-a.S
new file mode 100644
index 0000000..5b0a171
--- /dev/null
+++ b/common/arm/bitstream-a.S
@@ -0,0 +1,84 @@
+/*****************************************************************************
+ * bitstream-a.S: arm bitstream functions
+ *****************************************************************************
+ * Copyright (C) 2014-2015 x264 project
+ *
+ * Authors: Janne Grunau <janne-x264 at jannau.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing at x264.com.
+ *****************************************************************************/
+
+#include "asm.S"
+
+function x264_nal_escape_neon
+ push {r4-r5,lr}
+ vmov.u8 q0, #0xff
+ vmov.u8 q8, #4
+ mov r3, #3
+ subs lr, r1, r2
+ beq 99f
+0:
+ cmn lr, #15
+ blt 16f
+ mov r1, r2
+ b 100f
+16:
+ vld1.8 {q1}, [r1]!
+ vext.8 q2, q0, q1, #14
+ vext.8 q3, q0, q1, #15
+ vcgt.u8 q11, q8, q1
+ vceq.u8 q9, q2, #0
+ vceq.u8 q10, q3, #0
+ vand q9, q9, q11
+ vand q9, q9, q10
+ vshrn.u16 d22, q9, #4
+ vmov ip, lr, d22
+ orrs ip, ip, lr
+ beq 16f
+ mov lr, #-16
+100:
+ vmov.u8 r5, d1[6]
+ vmov.u8 r4, d1[7]
+ orr r5, r4, r5, lsl #8
+101:
+ ldrb r4, [r1, lr]
+ orr ip, r4, r5, lsl #16
+ cmp ip, #3
+ bhi 102f
+ strb r3, [r0], #1
+ orr r5, r3, r5, lsl #8
+102:
+ adds lr, lr, #1
+ strb r4, [r0], #1
+ orr r5, r4, r5, lsl #8
+ blt 101b
+ subs lr, r1, r2
+ lsr ip, r5, #8
+ vmov.u8 d1[6], ip
+ vmov.u8 d1[7], r5
+ blt 0b
+
+ pop {r4-r5,pc}
+16:
+ subs lr, r1, r2
+ vst1.8 {q1}, [r0]!
+ vmov q0, q1
+ blt 0b
+99:
+ pop {r4-r5,pc}
+endfunc
diff --git a/common/bitstream.c b/common/bitstream.c
index 6ca1f44..ec9836a 100644
--- a/common/bitstream.c
+++ b/common/bitstream.c
@@ -144,6 +144,10 @@ void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf )
}
#endif
#endif
+#if HAVE_ARMV6
+ if( cpu&X264_CPU_NEON )
+ pf->nal_escape = x264_nal_escape_neon;
+#endif
#if ARCH_AARCH64
if( cpu&X264_CPU_NEON )
pf->nal_escape = x264_nal_escape_neon;
--
1.7.10.4
More information about the x264-devel
mailing list