[x264-devel] [PATCH 11/11] checkasm: aarch64: Check register clobbering

Martin Storsjö martin at martin.st
Tue Aug 25 13:38:20 CEST 2015


Disable this on iOS, since it has got a slightly different ABI
for vararg parameters.
---
Applied Janne's fixes and cleanups, with uqxtn instead of xtn.

I'm holding off resending the arm register clobbering patch,
pending conclusion on how to handle 64 bit return values for the
checkasm_call function on 32 bit platforms.
---
 Makefile                 |    1 +
 tools/checkasm-aarch64.S |  156 ++++++++++++++++++++++++++++++++++++++++++++++
 tools/checkasm.c         |    6 +-
 3 files changed, 162 insertions(+), 1 deletion(-)
 create mode 100644 tools/checkasm-aarch64.S

diff --git a/Makefile b/Makefile
index 4403a11..4feef33 100644
--- a/Makefile
+++ b/Makefile
@@ -140,6 +140,7 @@ SRCS   += common/aarch64/asm-offsets.c \
           common/aarch64/mc-c.c        \
           common/aarch64/predict-c.c
 OBJASM  = $(ASMSRC:%.S=%.o)
+OBJCHK += tools/checkasm-aarch64.o
 endif
 endif
 
diff --git a/tools/checkasm-aarch64.S b/tools/checkasm-aarch64.S
new file mode 100644
index 0000000..515c727
--- /dev/null
+++ b/tools/checkasm-aarch64.S
@@ -0,0 +1,156 @@
+/****************************************************************************
+ * checkasm-aarch64.S: assembly check tool
+ *****************************************************************************
+ * Copyright (C) 2015 x264 project
+ *
+ * Authors: Martin Storsjo <martin at martin.st>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing at x264.com.
+ *****************************************************************************/
+
+#include "../common/aarch64/asm.S"
+
+.section .rodata
+.align 4
+register_init:
+.quad 0x21f86d66c8ca00ce
+.quad 0x75b6ba21077c48ad
+.quad 0xed56bb2dcb3c7736
+.quad 0x8bda43d3fd1a7e06
+.quad 0xb64a9c9e5d318408
+.quad 0xdf9a54b303f1d3a3
+.quad 0x4a75479abd64e097
+.quad 0x249214109d5d1c88
+.quad 0x1a1b2550a612b48c
+.quad 0x79445c159ce79064
+.quad 0x2eed899d5a28ddcd
+.quad 0x86b2536fcd8cf636
+.quad 0xb0856806085e7943
+.quad 0x3f2bf84fc0fcca4e
+.quad 0xacbd382dcf5b8de2
+.quad 0xd229e1f5b281303f
+.quad 0x71aeaff20b095fd9
+.quad 0xab63e2e11fa38ed9
+
+
+error_message:
+.asciz "failed to preserve register"
+
+.text
+
+// max number of args used by any x264 asm function.
+#define MAX_ARGS 15
+
+#define ARG_STACK ((8*(MAX_ARGS - 6) + 15) & ~15)
+
+function x264_checkasm_call, export=1
+    stp         x29, x30, [sp, #-16]!
+    mov         x29, sp
+    stp         x19, x20, [sp, #-16]!
+    stp         x21, x22, [sp, #-16]!
+    stp         x23, x24, [sp, #-16]!
+    stp         x25, x26, [sp, #-16]!
+    stp         x27, x28, [sp, #-16]!
+    stp         d8,  d9,  [sp, #-16]!
+    stp         d10, d11, [sp, #-16]!
+    stp         d12, d13, [sp, #-16]!
+    stp         d14, d15, [sp, #-16]!
+
+    movrel      x9, register_init
+    ldp         d8,  d9,  [x9], #16
+    ldp         d10, d11, [x9], #16
+    ldp         d12, d13, [x9], #16
+    ldp         d14, d15, [x9], #16
+    ldp         x19, x20, [x9], #16
+    ldp         x21, x22, [x9], #16
+    ldp         x23, x24, [x9], #16
+    ldp         x25, x26, [x9], #16
+    ldp         x27, x28, [x9], #16
+
+    str         x1,  [sp, #-16]!
+
+    sub         sp,  sp,  #ARG_STACK
+.equ pos, 0
+// first two stacked args are copied to x6, x7
+.rept MAX_ARGS-6
+    ldr         x9, [x29, #16 + 16 + pos]
+    str         x9, [sp, #pos]
+.equ pos, pos + 8
+.endr
+
+    mov         x12, x0
+    mov         x0,  x2
+    mov         x1,  x3
+    mov         x2,  x4
+    mov         x3,  x5
+    mov         x4,  x6
+    mov         x5,  x7
+    ldp         x6,  x7,  [x29, #16]
+    blr         x12
+    add         sp,  sp,  #ARG_STACK
+    ldr         x2,  [sp]
+    stp         x0,  x1, [sp]
+    movrel      x9, register_init
+    movi        v3.8h,  #0
+
+.macro check_reg_neon reg1, reg2
+    ldr         q0,  [x9], #16
+    uzp1        v1.2d,  v\reg1\().2d, v\reg2\().2d
+    eor         v0.16b, v0.16b, v1.16b
+    orr         v3.16b, v3.16b, v0.16b
+.endm
+    check_reg_neon  8,  9
+    check_reg_neon  10, 11
+    check_reg_neon  12, 13
+    check_reg_neon  14, 15
+    uqxtn       v3.8b,  v3.8h
+    umov        x3,  v3.d[0]
+
+.macro check_reg reg1, reg2
+    ldp         x0,  x1,  [x9], #16
+    eor         x0,  x0,  \reg1
+    eor         x1,  x1,  \reg2
+    orr         x3,  x3,  x0
+    orr         x3,  x3,  x1
+.endm
+    check_reg   x19, x20
+    check_reg   x21, x22
+    check_reg   x23, x24
+    check_reg   x25, x26
+    check_reg   x27, x28
+
+    cbz         x3,  0f
+
+    mov         w9,  #0
+    str         w9,  [x2]
+    movrel      x0, error_message
+    bl          puts
+0:
+    ldp         x0,  x1,  [sp], #16
+    ldp         d14, d15, [sp], #16
+    ldp         d12, d13, [sp], #16
+    ldp         d10, d11, [sp], #16
+    ldp         d8,  d9,  [sp], #16
+    ldp         x27, x28, [sp], #16
+    ldp         x25, x26, [sp], #16
+    ldp         x23, x24, [sp], #16
+    ldp         x21, x22, [sp], #16
+    ldp         x19, x20, [sp], #16
+    ldp         x29, x30, [sp], #16
+    ret
+endfunc
diff --git a/tools/checkasm.c b/tools/checkasm.c
index f4971df..183cef5 100644
--- a/tools/checkasm.c
+++ b/tools/checkasm.c
@@ -227,6 +227,10 @@ intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... );
 #define x264_stack_pagealign( func, align ) func()
 #endif
 
+#if ARCH_AARCH64
+intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... );
+#endif
+
 #define call_c1(func,...) func(__VA_ARGS__)
 
 #if ARCH_X86_64
@@ -244,7 +248,7 @@ void x264_checkasm_stack_clobber( uint64_t clobber, ... );
     uint64_t r = (rand() & 0xffff) * 0x0001000100010001ULL; \
     x264_checkasm_stack_clobber( r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r ); /* max_args+6 */ \
     x264_checkasm_call(( intptr_t(*)())func, &ok, 0, 0, 0, 0, __VA_ARGS__ ); })
-#elif ARCH_X86
+#elif ARCH_X86 || (ARCH_AARCH64 && !defined(__APPLE__))
 #define call_a1(func,...) x264_checkasm_call( (intptr_t(*)())func, &ok, __VA_ARGS__ )
 #else
 #define call_a1 call_c1
-- 
1.7.10.4



More information about the x264-devel mailing list