[x264-devel] [PATCH 24/24] RFC: checkasm: aarch64: Check register clobbering

Martin Storsjö martin at martin.st
Thu Aug 13 22:59:45 CEST 2015


---
 Makefile                 |    1 +
 tools/checkasm-aarch64.S |  162 ++++++++++++++++++++++++++++++++++++++++++++++
 tools/checkasm.c         |    4 +-
 3 files changed, 165 insertions(+), 2 deletions(-)
 create mode 100644 tools/checkasm-aarch64.S

diff --git a/Makefile b/Makefile
index 435b3b1..d0b1633 100644
--- a/Makefile
+++ b/Makefile
@@ -141,6 +141,7 @@ SRCS   += common/aarch64/asm-offsets.c \
           common/aarch64/mc-c.c        \
           common/aarch64/predict-c.c
 OBJASM  = $(ASMSRC:%.S=%.o)
+OBJCHK += tools/checkasm-aarch64.o
 endif
 endif
 
diff --git a/tools/checkasm-aarch64.S b/tools/checkasm-aarch64.S
new file mode 100644
index 0000000..2b86d62
--- /dev/null
+++ b/tools/checkasm-aarch64.S
@@ -0,0 +1,162 @@
+/****************************************************************************
+ * checkasm-aarch64.S: assembly check tool
+ *****************************************************************************
+ * Copyright (C) 2015 x264 project
+ *
+ * Authors: Martin Storsjo <martin at martin.st>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing at x264.com.
+ *****************************************************************************/
+
+#include "../common/aarch64/asm.S"
+
+.section .rodata
+.align 4
+register_init:
+.quad 0x21f86d66c8ca00ce
+.quad 0x75b6ba21077c48ad
+.quad 0xed56bb2dcb3c7736
+.quad 0x8bda43d3fd1a7e06
+.quad 0xb64a9c9e5d318408
+.quad 0xdf9a54b303f1d3a3
+.quad 0x4a75479abd64e097
+.quad 0x249214109d5d1c88
+.quad 0x1a1b2550a612b48c
+.quad 0x79445c159ce79064
+.quad 0x2eed899d5a28ddcd
+.quad 0x86b2536fcd8cf636
+.quad 0xb0856806085e7943
+.quad 0x3f2bf84fc0fcca4e
+.quad 0xacbd382dcf5b8de2
+.quad 0xd229e1f5b281303f
+.quad 0x71aeaff20b095fd9
+.quad 0xab63e2e11fa38ed9
+
+
+error_message:
+.asciz "failed to preserve register"
+
+.text
+
+// max number of args used by any x264 asm function.
+#define MAX_ARGS 15
+
+#define ARG_STACK 8*(MAX_ARGS - 6)
+#define PUSHED 8*8 + 8*12
+
+function x264_checkasm_call, export=1
+    str         x30, [sp, #-8]!
+    stp         x19, x20, [sp, #-16]!
+    stp         x21, x22, [sp, #-16]!
+    stp         x23, x24, [sp, #-16]!
+    stp         x25, x26, [sp, #-16]!
+    stp         x27, x28, [sp, #-16]!
+    stp         d8,  d9,  [sp, #-16]!
+    stp         d10, d11, [sp, #-16]!
+    stp         d12, d13, [sp, #-16]!
+    stp         d14, d15, [sp, #-16]!
+
+    movrel      x9, register_init
+    ldp         d8,  d9,  [x9], #16
+    ldp         d10, d11, [x9], #16
+    ldp         d12, d13, [x9], #16
+    ldp         d14, d15, [x9], #16
+    ldp         x19, x20, [x9], #16
+    ldp         x21, x22, [x9], #16
+    ldp         x23, x24, [x9], #16
+    ldp         x25, x26, [x9], #16
+    ldp         x27, x28, [x9], #16
+
+    str         x1,  [sp, #-8]!
+
+    sub         sp,  sp,  #ARG_STACK
+.equ pos, 0
+.rept MAX_ARGS-6
+    ldr         x9, [sp, #ARG_STACK + PUSHED + 16 + pos]
+    str         x9, [sp, #pos]
+.equ pos, pos + 8
+.endr
+
+    mov         x12, x0
+    mov         x0,  x2
+    mov         x1,  x3
+    mov         x2,  x4
+    mov         x3,  x5
+    mov         x4,  x6
+    mov         x5,  x7
+    ldp         x6,  x7,  [sp, #ARG_STACK + PUSHED]
+    blr         x12
+    add         sp,  sp,  #ARG_STACK
+    ldr         x2,  [sp], #8
+
+    stp         x0,  x1, [sp, #-16]!
+    movrel      x9, register_init
+    ldp         d0,  d1,  [x9], #16
+    ldp         d2,  d3,  [x9], #16
+    ldp         d4,  d5,  [x9], #16
+    ldp         d6,  d7,  [x9], #16
+    eor         v0.8b,  v0.8b,  v8.8b
+    eor         v1.8b,  v1.8b,  v9.8b
+    eor         v2.8b,  v2.8b,  v10.8b
+    eor         v3.8b,  v3.8b,  v11.8b
+    eor         v4.8b,  v4.8b,  v12.8b
+    eor         v5.8b,  v5.8b,  v13.8b
+    eor         v6.8b,  v6.8b,  v14.8b
+    eor         v7.8b,  v7.8b,  v15.8b
+    orr         v0.8b,  v0.8b,  v1.8b
+    orr         v0.8b,  v0.8b,  v2.8b
+    orr         v0.8b,  v0.8b,  v3.8b
+    orr         v0.8b,  v0.8b,  v4.8b
+    orr         v0.8b,  v0.8b,  v5.8b
+    orr         v0.8b,  v0.8b,  v6.8b
+    orr         v0.8b,  v0.8b,  v7.8b
+    fmov        x3,  d0
+
+.macro check_reg reg1, reg2
+    ldp         x0,  x1,  [x9], #16
+    eor         x0,  x0,  \reg1
+    eor         x1,  x1,  \reg2
+    orr         x3,  x3,  x0
+    orr         x3,  x3,  x1
+.endm
+    check_reg   x19, x20
+    check_reg   x21, x22
+    check_reg   x23, x24
+    check_reg   x25, x26
+    check_reg   x27, x28
+
+    cbz         x3,  0f
+
+    mov         x9, #0
+    str         x9, [x2]
+    movrel      x0, error_message
+    bl          puts
+0:
+    ldp         x0,  x1,  [sp], #16
+    ldp         d14, d15, [sp], #16
+    ldp         d12, d13, [sp], #16
+    ldp         d10, d11, [sp], #16
+    ldp         d8,  d9,  [sp], #16
+    ldp         x27, x28, [sp], #16
+    ldp         x25, x26, [sp], #16
+    ldp         x23, x24, [sp], #16
+    ldp         x21, x22, [sp], #16
+    ldp         x19, x20, [sp], #16
+    ldr         x30, [sp], #8
+    ret
+endfunc
diff --git a/tools/checkasm.c b/tools/checkasm.c
index bc19297..d6af41e 100644
--- a/tools/checkasm.c
+++ b/tools/checkasm.c
@@ -227,7 +227,7 @@ intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... );
 #define x264_stack_pagealign( func, align ) func()
 #endif
 
-#if ARCH_ARM
+#if ARCH_ARM || ARCH_AARCH64
 intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... );
 #endif
 
@@ -248,7 +248,7 @@ void x264_checkasm_stack_clobber( uint64_t clobber, ... );
     uint64_t r = (rand() & 0xffff) * 0x0001000100010001ULL; \
     x264_checkasm_stack_clobber( r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r ); /* max_args+6 */ \
     x264_checkasm_call(( intptr_t(*)())func, &ok, 0, 0, 0, 0, __VA_ARGS__ ); })
-#elif ARCH_X86 || ARCH_ARM
+#elif ARCH_X86 || ARCH_ARM || ARCH_AARCH64
 #define call_a1(func,...) x264_checkasm_call( (intptr_t(*)())func, &ok, __VA_ARGS__ )
 #else
 #define call_a1 call_c1
-- 
1.7.10.4



More information about the x264-devel mailing list