[x264-devel] checkasm: aarch64: Check register clobbering
Martin Storsjö
git at videolan.org
Sun Oct 11 19:01:06 CEST 2015
x264 | branch: master | Martin Storsjö <martin at martin.st> | Tue Aug 25 14:38:20 2015 +0300| [59683a97b50b34c6282457a959bb6b3e9e7f8c0d] | committer: Henrik Gramner
checkasm: aarch64: Check register clobbering
Disable this on iOS, since it has got a slightly different ABI
for vararg parameters.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=59683a97b50b34c6282457a959bb6b3e9e7f8c0d
---
Makefile | 1 +
tools/checkasm-aarch64.S | 156 ++++++++++++++++++++++++++++++++++++++++++++++
tools/checkasm.c | 6 +-
3 files changed, 162 insertions(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index 4403a11..4feef33 100644
--- a/Makefile
+++ b/Makefile
@@ -140,6 +140,7 @@ SRCS += common/aarch64/asm-offsets.c \
common/aarch64/mc-c.c \
common/aarch64/predict-c.c
OBJASM = $(ASMSRC:%.S=%.o)
+OBJCHK += tools/checkasm-aarch64.o
endif
endif
diff --git a/tools/checkasm-aarch64.S b/tools/checkasm-aarch64.S
new file mode 100644
index 0000000..515c727
--- /dev/null
+++ b/tools/checkasm-aarch64.S
@@ -0,0 +1,156 @@
+/****************************************************************************
+ * checkasm-aarch64.S: assembly check tool
+ *****************************************************************************
+ * Copyright (C) 2015 x264 project
+ *
+ * Authors: Martin Storsjo <martin at martin.st>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing at x264.com.
+ *****************************************************************************/
+
+#include "../common/aarch64/asm.S"
+
+.section .rodata
+.align 4
+register_init:
+.quad 0x21f86d66c8ca00ce
+.quad 0x75b6ba21077c48ad
+.quad 0xed56bb2dcb3c7736
+.quad 0x8bda43d3fd1a7e06
+.quad 0xb64a9c9e5d318408
+.quad 0xdf9a54b303f1d3a3
+.quad 0x4a75479abd64e097
+.quad 0x249214109d5d1c88
+.quad 0x1a1b2550a612b48c
+.quad 0x79445c159ce79064
+.quad 0x2eed899d5a28ddcd
+.quad 0x86b2536fcd8cf636
+.quad 0xb0856806085e7943
+.quad 0x3f2bf84fc0fcca4e
+.quad 0xacbd382dcf5b8de2
+.quad 0xd229e1f5b281303f
+.quad 0x71aeaff20b095fd9
+.quad 0xab63e2e11fa38ed9
+
+
+error_message:
+.asciz "failed to preserve register"
+
+.text
+
+// max number of args used by any x264 asm function.
+#define MAX_ARGS 15
+
+#define ARG_STACK ((8*(MAX_ARGS - 6) + 15) & ~15)
+
+function x264_checkasm_call, export=1
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ stp x19, x20, [sp, #-16]!
+ stp x21, x22, [sp, #-16]!
+ stp x23, x24, [sp, #-16]!
+ stp x25, x26, [sp, #-16]!
+ stp x27, x28, [sp, #-16]!
+ stp d8, d9, [sp, #-16]!
+ stp d10, d11, [sp, #-16]!
+ stp d12, d13, [sp, #-16]!
+ stp d14, d15, [sp, #-16]!
+
+ movrel x9, register_init
+ ldp d8, d9, [x9], #16
+ ldp d10, d11, [x9], #16
+ ldp d12, d13, [x9], #16
+ ldp d14, d15, [x9], #16
+ ldp x19, x20, [x9], #16
+ ldp x21, x22, [x9], #16
+ ldp x23, x24, [x9], #16
+ ldp x25, x26, [x9], #16
+ ldp x27, x28, [x9], #16
+
+ str x1, [sp, #-16]!
+
+ sub sp, sp, #ARG_STACK
+.equ pos, 0
+// first two stacked args are copied to x6, x7
+.rept MAX_ARGS-6
+ ldr x9, [x29, #16 + 16 + pos]
+ str x9, [sp, #pos]
+.equ pos, pos + 8
+.endr
+
+ mov x12, x0
+ mov x0, x2
+ mov x1, x3
+ mov x2, x4
+ mov x3, x5
+ mov x4, x6
+ mov x5, x7
+ ldp x6, x7, [x29, #16]
+ blr x12
+ add sp, sp, #ARG_STACK
+ ldr x2, [sp]
+ stp x0, x1, [sp]
+ movrel x9, register_init
+ movi v3.8h, #0
+
+.macro check_reg_neon reg1, reg2
+ ldr q0, [x9], #16
+ uzp1 v1.2d, v\reg1\().2d, v\reg2\().2d
+ eor v0.16b, v0.16b, v1.16b
+ orr v3.16b, v3.16b, v0.16b
+.endm
+ check_reg_neon 8, 9
+ check_reg_neon 10, 11
+ check_reg_neon 12, 13
+ check_reg_neon 14, 15
+ uqxtn v3.8b, v3.8h
+ umov x3, v3.d[0]
+
+.macro check_reg reg1, reg2
+ ldp x0, x1, [x9], #16
+ eor x0, x0, \reg1
+ eor x1, x1, \reg2
+ orr x3, x3, x0
+ orr x3, x3, x1
+.endm
+ check_reg x19, x20
+ check_reg x21, x22
+ check_reg x23, x24
+ check_reg x25, x26
+ check_reg x27, x28
+
+ cbz x3, 0f
+
+ mov w9, #0
+ str w9, [x2]
+ movrel x0, error_message
+ bl puts
+0:
+ ldp x0, x1, [sp], #16
+ ldp d14, d15, [sp], #16
+ ldp d12, d13, [sp], #16
+ ldp d10, d11, [sp], #16
+ ldp d8, d9, [sp], #16
+ ldp x27, x28, [sp], #16
+ ldp x25, x26, [sp], #16
+ ldp x23, x24, [sp], #16
+ ldp x21, x22, [sp], #16
+ ldp x19, x20, [sp], #16
+ ldp x29, x30, [sp], #16
+ ret
+endfunc
diff --git a/tools/checkasm.c b/tools/checkasm.c
index f4971df..183cef5 100644
--- a/tools/checkasm.c
+++ b/tools/checkasm.c
@@ -227,6 +227,10 @@ intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... );
#define x264_stack_pagealign( func, align ) func()
#endif
+#if ARCH_AARCH64
+intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... );
+#endif
+
#define call_c1(func,...) func(__VA_ARGS__)
#if ARCH_X86_64
@@ -244,7 +248,7 @@ void x264_checkasm_stack_clobber( uint64_t clobber, ... );
uint64_t r = (rand() & 0xffff) * 0x0001000100010001ULL; \
x264_checkasm_stack_clobber( r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r ); /* max_args+6 */ \
x264_checkasm_call(( intptr_t(*)())func, &ok, 0, 0, 0, 0, __VA_ARGS__ ); })
-#elif ARCH_X86
+#elif ARCH_X86 || (ARCH_AARCH64 && !defined(__APPLE__))
#define call_a1(func,...) x264_checkasm_call( (intptr_t(*)())func, &ok, __VA_ARGS__ )
#else
#define call_a1 call_c1
More information about the x264-devel
mailing list