[x264-devel] aarch64: nal_escape_neon

Janne Grunau git at videolan.org
Sat Dec 20 21:10:48 CET 2014


x264 | branch: master | Janne Grunau <janne-x264 at jannau.net> | Wed Nov  5 11:35:13 2014 +0100| [fa7e9d3d082327ceeacfaf85da6cde4c50fb4e5b] | committer: Anton Mitrofanov

aarch64: nal_escape_neon

3-4 times faster.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=fa7e9d3d082327ceeacfaf85da6cde4c50fb4e5b
---

 Makefile                     |    3 +-
 common/aarch64/bitstream-a.S |   82 ++++++++++++++++++++++++++++++++++++++++++
 common/bitstream.c           |    6 ++++
 3 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index fd72fcd..f293542 100644
--- a/Makefile
+++ b/Makefile
@@ -128,7 +128,8 @@ endif
 # AArch64 NEON optims
 ifeq ($(ARCH),AARCH64)
 ifneq ($(AS),)
-ASMSRC += common/aarch64/dct-a.S     \
+ASMSRC += common/aarch64/bitstream-a.S \
+          common/aarch64/dct-a.S     \
           common/aarch64/deblock-a.S \
           common/aarch64/mc-a.S      \
           common/aarch64/pixel-a.S   \
diff --git a/common/aarch64/bitstream-a.S b/common/aarch64/bitstream-a.S
new file mode 100644
index 0000000..81f9ad8
--- /dev/null
+++ b/common/aarch64/bitstream-a.S
@@ -0,0 +1,82 @@
+/*****************************************************************************
+ * bitstream-a.S: aarch64 bitstream functions
+ *****************************************************************************
+ * Copyright (C) 2014 x264 project
+ *
+ * Authors: Janne Grunau <janne-x264 at jannau.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing at x264.com.
+ *****************************************************************************/
+
+#include "asm.S"
+
+function x264_nal_escape_neon, export=1
+    movi        v0.16b,  #0xff
+    movi        v4.16b,  #4
+    mov         w3,  #3
+    subs        x6,  x1,  x2
+    cbz         x6,  99f
+0:
+    cmn         x6,  #15
+    b.lt        16f
+    mov         x1,  x2
+    b           100f
+16:
+    ld1         {v1.16b}, [x1], #16
+    ext         v2.16b, v0.16b, v1.16b, #14
+    ext         v3.16b, v0.16b, v1.16b, #15
+    cmhi        v7.16b, v4.16b, v1.16b
+    cmeq        v5.16b, v2.16b, #0
+    cmeq        v6.16b, v3.16b, #0
+    and         v5.16b, v5.16b, v7.16b
+    and         v5.16b, v5.16b, v6.16b
+    shrn        v7.8b,  v5.8h,  #4
+    mov         x7,  v7.d[0]
+    cbz         x7,  16f
+    mov         x6,  #-16
+100:
+    umov        w5,  v0.b[14]
+    umov        w4,  v0.b[15]
+    orr         w5,  w4,  w5, lsl #8
+101:
+    ldrb        w4,  [x1, x6]
+    orr         w9,  w4,  w5, lsl #16
+    cmp         w9,  #3
+    b.hi        102f
+    strb        w3,  [x0], #1
+    orr         w5,  w3,  w5, lsl #8
+102:
+    adds        x6,  x6,  #1
+    strb        w4,  [x0], #1
+    orr         w5,  w4,  w5, lsl #8
+    b.lt        101b
+    subs        x6,  x1,  x2
+    lsr         w9,  w5,  #8
+    mov         v0.b[14],  w9
+    mov         v0.b[15],  w5
+    b.lt        0b
+
+    ret
+16:
+    subs        x6,  x1,  x2
+    st1         {v1.16b}, [x0], #16
+    mov         v0.16b, v1.16b
+    b.lt        0b
+99:
+    ret
+endfunc
diff --git a/common/bitstream.c b/common/bitstream.c
index ed3ad5e..85dddb6 100644
--- a/common/bitstream.c
+++ b/common/bitstream.c
@@ -54,6 +54,8 @@ void x264_cabac_block_residual_internal_sse2       ( dctcoef *l, int b_interlace
 void x264_cabac_block_residual_internal_sse2_lzcnt ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
 void x264_cabac_block_residual_internal_avx2_bmi2 ( dctcoef *l, int b_interlaced, intptr_t ctx_block_cat, x264_cabac_t *cb );
 
+uint8_t *x264_nal_escape_neon( uint8_t *dst, uint8_t *src, uint8_t *end );
+
 /****************************************************************************
  * x264_nal_encode:
  ****************************************************************************/
@@ -142,4 +144,8 @@ void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf )
     }
 #endif
 #endif
+#if ARCH_AARCH64
+    if( cpu&X264_CPU_NEON )
+        pf->nal_escape = x264_nal_escape_neon;
+#endif
 }



More information about the x264-devel mailing list