[x264-devel] checkasm: aarch64: Check register clobbering

Martin Storsjö git at videolan.org
Sun Oct 11 19:01:06 CEST 2015


x264 | branch: master | Martin Storsjö <martin at martin.st> | Tue Aug 25 14:38:20 2015 +0300| [59683a97b50b34c6282457a959bb6b3e9e7f8c0d] | committer: Henrik Gramner

checkasm: aarch64: Check register clobbering

Disable this on iOS, since it has got a slightly different ABI
for vararg parameters.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=59683a97b50b34c6282457a959bb6b3e9e7f8c0d
---

 Makefile                 |    1 +
 tools/checkasm-aarch64.S |  156 ++++++++++++++++++++++++++++++++++++++++++++++
 tools/checkasm.c         |    6 +-
 3 files changed, 162 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 4403a11..4feef33 100644
--- a/Makefile
+++ b/Makefile
@@ -140,6 +140,7 @@ SRCS   += common/aarch64/asm-offsets.c \
           common/aarch64/mc-c.c        \
           common/aarch64/predict-c.c
 OBJASM  = $(ASMSRC:%.S=%.o)
+OBJCHK += tools/checkasm-aarch64.o
 endif
 endif
 
diff --git a/tools/checkasm-aarch64.S b/tools/checkasm-aarch64.S
new file mode 100644
index 0000000..515c727
--- /dev/null
+++ b/tools/checkasm-aarch64.S
@@ -0,0 +1,156 @@
+/****************************************************************************
+ * checkasm-aarch64.S: assembly check tool
+ *****************************************************************************
+ * Copyright (C) 2015 x264 project
+ *
+ * Authors: Martin Storsjo <martin at martin.st>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing at x264.com.
+ *****************************************************************************/
+
+#include "../common/aarch64/asm.S"
+
+.section .rodata
+.align 4
+register_init:
+.quad 0x21f86d66c8ca00ce
+.quad 0x75b6ba21077c48ad
+.quad 0xed56bb2dcb3c7736
+.quad 0x8bda43d3fd1a7e06
+.quad 0xb64a9c9e5d318408
+.quad 0xdf9a54b303f1d3a3
+.quad 0x4a75479abd64e097
+.quad 0x249214109d5d1c88
+.quad 0x1a1b2550a612b48c
+.quad 0x79445c159ce79064
+.quad 0x2eed899d5a28ddcd
+.quad 0x86b2536fcd8cf636
+.quad 0xb0856806085e7943
+.quad 0x3f2bf84fc0fcca4e
+.quad 0xacbd382dcf5b8de2
+.quad 0xd229e1f5b281303f
+.quad 0x71aeaff20b095fd9
+.quad 0xab63e2e11fa38ed9
+
+
+error_message:
+.asciz "failed to preserve register"
+
+.text
+
+// max number of args used by any x264 asm function.
+#define MAX_ARGS 15
+
+#define ARG_STACK ((8*(MAX_ARGS - 6) + 15) & ~15)
+
+function x264_checkasm_call, export=1
+    stp         x29, x30, [sp, #-16]!
+    mov         x29, sp
+    stp         x19, x20, [sp, #-16]!
+    stp         x21, x22, [sp, #-16]!
+    stp         x23, x24, [sp, #-16]!
+    stp         x25, x26, [sp, #-16]!
+    stp         x27, x28, [sp, #-16]!
+    stp         d8,  d9,  [sp, #-16]!
+    stp         d10, d11, [sp, #-16]!
+    stp         d12, d13, [sp, #-16]!
+    stp         d14, d15, [sp, #-16]!
+
+    movrel      x9, register_init
+    ldp         d8,  d9,  [x9], #16
+    ldp         d10, d11, [x9], #16
+    ldp         d12, d13, [x9], #16
+    ldp         d14, d15, [x9], #16
+    ldp         x19, x20, [x9], #16
+    ldp         x21, x22, [x9], #16
+    ldp         x23, x24, [x9], #16
+    ldp         x25, x26, [x9], #16
+    ldp         x27, x28, [x9], #16
+
+    str         x1,  [sp, #-16]!
+
+    sub         sp,  sp,  #ARG_STACK
+.equ pos, 0
+// first two stacked args are copied to x6, x7
+.rept MAX_ARGS-6
+    ldr         x9, [x29, #16 + 16 + pos]
+    str         x9, [sp, #pos]
+.equ pos, pos + 8
+.endr
+
+    mov         x12, x0
+    mov         x0,  x2
+    mov         x1,  x3
+    mov         x2,  x4
+    mov         x3,  x5
+    mov         x4,  x6
+    mov         x5,  x7
+    ldp         x6,  x7,  [x29, #16]
+    blr         x12
+    add         sp,  sp,  #ARG_STACK
+    ldr         x2,  [sp]
+    stp         x0,  x1, [sp]
+    movrel      x9, register_init
+    movi        v3.8h,  #0
+
+.macro check_reg_neon reg1, reg2
+    ldr         q0,  [x9], #16
+    uzp1        v1.2d,  v\reg1\().2d, v\reg2\().2d
+    eor         v0.16b, v0.16b, v1.16b
+    orr         v3.16b, v3.16b, v0.16b
+.endm
+    check_reg_neon  8,  9
+    check_reg_neon  10, 11
+    check_reg_neon  12, 13
+    check_reg_neon  14, 15
+    uqxtn       v3.8b,  v3.8h
+    umov        x3,  v3.d[0]
+
+.macro check_reg reg1, reg2
+    ldp         x0,  x1,  [x9], #16
+    eor         x0,  x0,  \reg1
+    eor         x1,  x1,  \reg2
+    orr         x3,  x3,  x0
+    orr         x3,  x3,  x1
+.endm
+    check_reg   x19, x20
+    check_reg   x21, x22
+    check_reg   x23, x24
+    check_reg   x25, x26
+    check_reg   x27, x28
+
+    cbz         x3,  0f
+
+    mov         w9,  #0
+    str         w9,  [x2]
+    movrel      x0, error_message
+    bl          puts
+0:
+    ldp         x0,  x1,  [sp], #16
+    ldp         d14, d15, [sp], #16
+    ldp         d12, d13, [sp], #16
+    ldp         d10, d11, [sp], #16
+    ldp         d8,  d9,  [sp], #16
+    ldp         x27, x28, [sp], #16
+    ldp         x25, x26, [sp], #16
+    ldp         x23, x24, [sp], #16
+    ldp         x21, x22, [sp], #16
+    ldp         x19, x20, [sp], #16
+    ldp         x29, x30, [sp], #16
+    ret
+endfunc
diff --git a/tools/checkasm.c b/tools/checkasm.c
index f4971df..183cef5 100644
--- a/tools/checkasm.c
+++ b/tools/checkasm.c
@@ -227,6 +227,10 @@ intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... );
 #define x264_stack_pagealign( func, align ) func()
 #endif
 
+#if ARCH_AARCH64
+intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... );
+#endif
+
 #define call_c1(func,...) func(__VA_ARGS__)
 
 #if ARCH_X86_64
@@ -244,7 +248,7 @@ void x264_checkasm_stack_clobber( uint64_t clobber, ... );
     uint64_t r = (rand() & 0xffff) * 0x0001000100010001ULL; \
     x264_checkasm_stack_clobber( r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r ); /* max_args+6 */ \
     x264_checkasm_call(( intptr_t(*)())func, &ok, 0, 0, 0, 0, __VA_ARGS__ ); })
-#elif ARCH_X86
+#elif ARCH_X86 || (ARCH_AARCH64 && !defined(__APPLE__))
 #define call_a1(func,...) x264_checkasm_call( (intptr_t(*)())func, &ok, __VA_ARGS__ )
 #else
 #define call_a1 call_c1



More information about the x264-devel mailing list