[x264-devel] [PATCH 03/11] arm: Add x264_nal_escape_neon

Martin Storsjö martin at martin.st
Tue Aug 25 13:38:12 CEST 2015


checkasm timing      Cortex-A7      A8      A9
nal_escape_c                852758  879566  655497
nal_escape_neon             376831  450678  371673
---
Avoid clobbering neon registers needlessly; reduce the number
of used GPRs, return using pop {..,pc}.
---
 Makefile                 |    2 +-
 common/arm/bitstream-a.S |   84 ++++++++++++++++++++++++++++++++++++++++++++++
 common/bitstream.c       |    4 +++
 3 files changed, 89 insertions(+), 1 deletion(-)
 create mode 100644 common/arm/bitstream-a.S

diff --git a/Makefile b/Makefile
index 6193c59..4403a11 100644
--- a/Makefile
+++ b/Makefile
@@ -119,7 +119,7 @@ ifeq ($(SYS_ARCH),ARM)
 ifneq ($(AS),)
 ASMSRC += common/arm/cpu-a.S common/arm/pixel-a.S common/arm/mc-a.S \
           common/arm/dct-a.S common/arm/quant-a.S common/arm/deblock-a.S \
-          common/arm/predict-a.S
+          common/arm/predict-a.S common/arm/bitstream-a.S
 SRCS   += common/arm/mc-c.c common/arm/predict-c.c
 OBJASM  = $(ASMSRC:%.S=%.o)
 endif
diff --git a/common/arm/bitstream-a.S b/common/arm/bitstream-a.S
new file mode 100644
index 0000000..5b0a171
--- /dev/null
+++ b/common/arm/bitstream-a.S
@@ -0,0 +1,84 @@
+/*****************************************************************************
+ * bitstream-a.S: arm bitstream functions
+ *****************************************************************************
+ * Copyright (C) 2014-2015 x264 project
+ *
+ * Authors: Janne Grunau <janne-x264 at jannau.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing at x264.com.
+ *****************************************************************************/
+
+#include "asm.S"
+
+function x264_nal_escape_neon
+    push        {r4-r5,lr}
+    vmov.u8     q0,  #0xff
+    vmov.u8     q8,  #4
+    mov         r3,  #3
+    subs        lr,  r1,  r2
+    beq         99f
+0:
+    cmn         lr,  #15
+    blt         16f
+    mov         r1,  r2
+    b           100f
+16:
+    vld1.8      {q1}, [r1]!
+    vext.8      q2,  q0,  q1, #14
+    vext.8      q3,  q0,  q1, #15
+    vcgt.u8     q11, q8,  q1
+    vceq.u8     q9,  q2,  #0
+    vceq.u8     q10, q3,  #0
+    vand        q9,  q9,  q11
+    vand        q9,  q9,  q10
+    vshrn.u16   d22, q9,  #4
+    vmov        ip,  lr,  d22
+    orrs        ip,  ip,  lr
+    beq         16f
+    mov         lr,  #-16
+100:
+    vmov.u8     r5,  d1[6]
+    vmov.u8     r4,  d1[7]
+    orr         r5,  r4,  r5, lsl #8
+101:
+    ldrb        r4,  [r1, lr]
+    orr         ip,  r4,  r5, lsl #16
+    cmp         ip,  #3
+    bhi         102f
+    strb        r3,  [r0], #1
+    orr         r5,  r3,  r5, lsl #8
+102:
+    adds        lr,  lr,  #1
+    strb        r4,  [r0], #1
+    orr         r5,  r4,  r5, lsl #8
+    blt         101b
+    subs        lr,  r1,  r2
+    lsr         ip,  r5,  #8
+    vmov.u8     d1[6],  ip
+    vmov.u8     d1[7],  r5
+    blt         0b
+
+    pop         {r4-r5,pc}
+16:
+    subs        lr,  r1,  r2
+    vst1.8      {q1}, [r0]!
+    vmov        q0, q1
+    blt         0b
+99:
+    pop         {r4-r5,pc}
+endfunc
diff --git a/common/bitstream.c b/common/bitstream.c
index 6ca1f44..ec9836a 100644
--- a/common/bitstream.c
+++ b/common/bitstream.c
@@ -144,6 +144,10 @@ void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf )
     }
 #endif
 #endif
+#if HAVE_ARMV6
+    if( cpu&X264_CPU_NEON )
+        pf->nal_escape = x264_nal_escape_neon;
+#endif
 #if ARCH_AARCH64
     if( cpu&X264_CPU_NEON )
         pf->nal_escape = x264_nal_escape_neon;
-- 
1.7.10.4



More information about the x264-devel mailing list