[x264-devel] [PATCH 24/24] RFC: checkasm: aarch64: Check register clobbering
Martin Storsjö
martin at martin.st
Thu Aug 13 22:59:45 CEST 2015
---
Makefile | 1 +
tools/checkasm-aarch64.S | 162 ++++++++++++++++++++++++++++++++++++++++++++++
tools/checkasm.c | 4 +-
3 files changed, 165 insertions(+), 2 deletions(-)
create mode 100644 tools/checkasm-aarch64.S
diff --git a/Makefile b/Makefile
index 435b3b1..d0b1633 100644
--- a/Makefile
+++ b/Makefile
@@ -141,6 +141,7 @@ SRCS += common/aarch64/asm-offsets.c \
common/aarch64/mc-c.c \
common/aarch64/predict-c.c
OBJASM = $(ASMSRC:%.S=%.o)
+OBJCHK += tools/checkasm-aarch64.o
endif
endif
diff --git a/tools/checkasm-aarch64.S b/tools/checkasm-aarch64.S
new file mode 100644
index 0000000..2b86d62
--- /dev/null
+++ b/tools/checkasm-aarch64.S
@@ -0,0 +1,162 @@
+/****************************************************************************
+ * checkasm-aarch64.S: assembly check tool
+ *****************************************************************************
+ * Copyright (C) 2015 x264 project
+ *
+ * Authors: Martin Storsjo <martin at martin.st>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing at x264.com.
+ *****************************************************************************/
+
+#include "../common/aarch64/asm.S"
+
+.section .rodata
+.align 4
+register_init:
+.quad 0x21f86d66c8ca00ce
+.quad 0x75b6ba21077c48ad
+.quad 0xed56bb2dcb3c7736
+.quad 0x8bda43d3fd1a7e06
+.quad 0xb64a9c9e5d318408
+.quad 0xdf9a54b303f1d3a3
+.quad 0x4a75479abd64e097
+.quad 0x249214109d5d1c88
+.quad 0x1a1b2550a612b48c
+.quad 0x79445c159ce79064
+.quad 0x2eed899d5a28ddcd
+.quad 0x86b2536fcd8cf636
+.quad 0xb0856806085e7943
+.quad 0x3f2bf84fc0fcca4e
+.quad 0xacbd382dcf5b8de2
+.quad 0xd229e1f5b281303f
+.quad 0x71aeaff20b095fd9
+.quad 0xab63e2e11fa38ed9
+
+
+error_message:
+.asciz "failed to preserve register"
+
+.text
+
+// max number of args used by any x264 asm function.
+#define MAX_ARGS 15
+
+#define ARG_STACK 8*(MAX_ARGS - 6)
+#define PUSHED 8*8 + 8*12
+
+function x264_checkasm_call, export=1
+ str x30, [sp, #-8]!
+ stp x19, x20, [sp, #-16]!
+ stp x21, x22, [sp, #-16]!
+ stp x23, x24, [sp, #-16]!
+ stp x25, x26, [sp, #-16]!
+ stp x27, x28, [sp, #-16]!
+ stp d8, d9, [sp, #-16]!
+ stp d10, d11, [sp, #-16]!
+ stp d12, d13, [sp, #-16]!
+ stp d14, d15, [sp, #-16]!
+
+ movrel x9, register_init
+ ldp d8, d9, [x9], #16
+ ldp d10, d11, [x9], #16
+ ldp d12, d13, [x9], #16
+ ldp d14, d15, [x9], #16
+ ldp x19, x20, [x9], #16
+ ldp x21, x22, [x9], #16
+ ldp x23, x24, [x9], #16
+ ldp x25, x26, [x9], #16
+ ldp x27, x28, [x9], #16
+
+ str x1, [sp, #-8]!
+
+ sub sp, sp, #ARG_STACK
+.equ pos, 0
+.rept MAX_ARGS-6
+ ldr x9, [sp, #ARG_STACK + PUSHED + 16 + pos]
+ str x9, [sp, #pos]
+.equ pos, pos + 8
+.endr
+
+ mov x12, x0
+ mov x0, x2
+ mov x1, x3
+ mov x2, x4
+ mov x3, x5
+ mov x4, x6
+ mov x5, x7
+ ldp x6, x7, [sp, #ARG_STACK + PUSHED]
+ blr x12
+ add sp, sp, #ARG_STACK
+ ldr x2, [sp], #8
+
+ stp x0, x1, [sp, #-16]!
+ movrel x9, register_init
+ ldp d0, d1, [x9], #16
+ ldp d2, d3, [x9], #16
+ ldp d4, d5, [x9], #16
+ ldp d6, d7, [x9], #16
+ eor v0.8b, v0.8b, v8.8b
+ eor v1.8b, v1.8b, v9.8b
+ eor v2.8b, v2.8b, v10.8b
+ eor v3.8b, v3.8b, v11.8b
+ eor v4.8b, v4.8b, v12.8b
+ eor v5.8b, v5.8b, v13.8b
+ eor v6.8b, v6.8b, v14.8b
+ eor v7.8b, v7.8b, v15.8b
+ orr v0.8b, v0.8b, v1.8b
+ orr v0.8b, v0.8b, v2.8b
+ orr v0.8b, v0.8b, v3.8b
+ orr v0.8b, v0.8b, v4.8b
+ orr v0.8b, v0.8b, v5.8b
+ orr v0.8b, v0.8b, v6.8b
+ orr v0.8b, v0.8b, v7.8b
+ fmov x3, d0
+
+.macro check_reg reg1, reg2
+ ldp x0, x1, [x9], #16
+ eor x0, x0, \reg1
+ eor x1, x1, \reg2
+ orr x3, x3, x0
+ orr x3, x3, x1
+.endm
+ check_reg x19, x20
+ check_reg x21, x22
+ check_reg x23, x24
+ check_reg x25, x26
+ check_reg x27, x28
+
+ cbz x3, 0f
+
+ mov x9, #0
+ str x9, [x2]
+ movrel x0, error_message
+ bl puts
+0:
+ ldp x0, x1, [sp], #16
+ ldp d14, d15, [sp], #16
+ ldp d12, d13, [sp], #16
+ ldp d10, d11, [sp], #16
+ ldp d8, d9, [sp], #16
+ ldp x27, x28, [sp], #16
+ ldp x25, x26, [sp], #16
+ ldp x23, x24, [sp], #16
+ ldp x21, x22, [sp], #16
+ ldp x19, x20, [sp], #16
+ ldr x30, [sp], #8
+ ret
+endfunc
diff --git a/tools/checkasm.c b/tools/checkasm.c
index bc19297..d6af41e 100644
--- a/tools/checkasm.c
+++ b/tools/checkasm.c
@@ -227,7 +227,7 @@ intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... );
#define x264_stack_pagealign( func, align ) func()
#endif
-#if ARCH_ARM
+#if ARCH_ARM || ARCH_AARCH64
intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... );
#endif
@@ -248,7 +248,7 @@ void x264_checkasm_stack_clobber( uint64_t clobber, ... );
uint64_t r = (rand() & 0xffff) * 0x0001000100010001ULL; \
x264_checkasm_stack_clobber( r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r ); /* max_args+6 */ \
x264_checkasm_call(( intptr_t(*)())func, &ok, 0, 0, 0, 0, __VA_ARGS__ ); })
-#elif ARCH_X86 || ARCH_ARM
+#elif ARCH_X86 || ARCH_ARM || ARCH_AARCH64
#define call_a1(func,...) x264_checkasm_call( (intptr_t(*)())func, &ok, __VA_ARGS__ )
#else
#define call_a1 call_c1
--
1.7.10.4
More information about the x264-devel
mailing list