[x264-devel] [PATCH 09/24] arm: Add x264_nal_escape_neon

Martin Storsjö martin at martin.st
Thu Aug 13 22:59:30 CEST 2015


checkasm timing      Cortex-A7      A8      A9
nal_escape_c                908338  878032  633692
nal_escape_neon             379946  451936  373471
---
 Makefile                 |    2 +-
 common/arm/bitstream-a.S |   89 ++++++++++++++++++++++++++++++++++++++++++++++
 common/bitstream.c       |    4 +++
 3 files changed, 94 insertions(+), 1 deletion(-)
 create mode 100644 common/arm/bitstream-a.S

diff --git a/Makefile b/Makefile
index 6193c59..4403a11 100644
--- a/Makefile
+++ b/Makefile
@@ -119,7 +119,7 @@ ifeq ($(SYS_ARCH),ARM)
 ifneq ($(AS),)
 ASMSRC += common/arm/cpu-a.S common/arm/pixel-a.S common/arm/mc-a.S \
           common/arm/dct-a.S common/arm/quant-a.S common/arm/deblock-a.S \
-          common/arm/predict-a.S
+          common/arm/predict-a.S common/arm/bitstream-a.S
 SRCS   += common/arm/mc-c.c common/arm/predict-c.c
 OBJASM  = $(ASMSRC:%.S=%.o)
 endif
diff --git a/common/arm/bitstream-a.S b/common/arm/bitstream-a.S
new file mode 100644
index 0000000..62f9c96
--- /dev/null
+++ b/common/arm/bitstream-a.S
@@ -0,0 +1,89 @@
+/*****************************************************************************
+ * bitstream-a.S: arm bitstream functions
+ *****************************************************************************
+ * Copyright (C) 2014-2015 x264 project
+ *
+ * Authors: Janne Grunau <janne-x264 at jannau.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing at x264.com.
+ *****************************************************************************/
+
+#include "asm.S"
+
+function x264_nal_escape_neon
+    push        {r4-r9}
+    vpush       {q4-q7}
+    vmov.u8     q0,  #0xff
+    vmov.u8     q4,  #4
+    mov         r3,  #3
+    subs        r6,  r1,  r2
+    beq         99f
+0:
+    cmn         r6,  #15
+    blt         16f
+    mov         r1,  r2
+    b           100f
+16:
+    vld1.8      {q1}, [r1]!
+    vext.8      q2,  q0,  q1, #14
+    vext.8      q3,  q0,  q1, #15
+    vcgt.u8     q7,  q4,  q1
+    vceq.u8     q5,  q2,  #0
+    vceq.u8     q6,  q3,  #0
+    vand        q5,  q5,  q7
+    vand        q5,  q5,  q6
+    vshrn.u16   d14, q5,  #4
+    vmov        r7,  r8,  d14
+    orrs        r7,  r7,  r8
+    beq         16f
+    mov         r6,  #-16
+100:
+    vmov.u8     r5,  d1[6]
+    vmov.u8     r4,  d1[7]
+    orr         r5,  r4,  r5, lsl #8
+101:
+    ldrb        r4,  [r1, r6]
+    orr         r9,  r4,  r5, lsl #16
+    cmp         r9,  #3
+    bhi         102f
+    strb        r3,  [r0], #1
+    orr         r5,  r3,  r5, lsl #8
+102:
+    adds        r6,  r6,  #1
+    strb        r4,  [r0], #1
+    orr         r5,  r4,  r5, lsl #8
+    blt         101b
+    subs        r6,  r1,  r2
+    lsr         r9,  r5,  #8
+    vmov.u8     d1[6],  r9
+    vmov.u8     d1[7],  r5
+    blt         0b
+
+    vpop        {q4-q7}
+    pop         {r4-r9}
+    bx          lr
+16:
+    subs        r6,  r1,  r2
+    vst1.8      {q1}, [r0]!
+    vmov        q0, q1
+    blt         0b
+99:
+    vpop        {q4-q7}
+    pop         {r4-r9}
+    bx          lr
+endfunc
diff --git a/common/bitstream.c b/common/bitstream.c
index 6ca1f44..ec9836a 100644
--- a/common/bitstream.c
+++ b/common/bitstream.c
@@ -144,6 +144,10 @@ void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf )
     }
 #endif
 #endif
+#if HAVE_ARMV6
+    if( cpu&X264_CPU_NEON )
+        pf->nal_escape = x264_nal_escape_neon;
+#endif
 #if ARCH_AARCH64
     if( cpu&X264_CPU_NEON )
         pf->nal_escape = x264_nal_escape_neon;
-- 
1.7.10.4



More information about the x264-devel mailing list