[x264-devel] [PATCH] checkasm: arm: Use a macro to share code between the neon/noneon versions
Martin Storsjö
martin at martin.st
Mon Aug 31 21:36:45 CEST 2015
---
This can be squashed into the earlier "checkasm: arm: Check register
clobbering" if you want to.
Simplified the macro parameters, as suggested by Janne.
---
tools/checkasm-arm.S | 76 ++++++++++++--------------------------------------
1 file changed, 18 insertions(+), 58 deletions(-)
diff --git a/tools/checkasm-arm.S b/tools/checkasm-arm.S
index 93c96d9..35de22c 100644
--- a/tools/checkasm-arm.S
+++ b/tools/checkasm-arm.S
@@ -46,14 +46,20 @@ error_message:
#define MAX_ARGS 15
#define ARG_STACK 4*(MAX_ARGS - 2)
-#define PUSHED 16*4 + 4*10
-function x264_checkasm_call_neon
+.macro clobbercheck variant
+.equ pushed, 4*10
+function x264_checkasm_call_\variant
push {r4-r11, lr}
+.ifc \variant, neon
vpush {q4-q7}
+.equ pushed, pushed + 16*4
+.endif
movrel r12, register_init
+.ifc \variant, neon
vldm r12, {q4-q7}
+.endif
ldm r12, {r4-r11}
push {r1}
@@ -61,7 +67,7 @@ function x264_checkasm_call_neon
sub sp, sp, #ARG_STACK
.equ pos, 0
.rept MAX_ARGS-2
- ldr r12, [sp, #ARG_STACK + PUSHED + 8 + pos]
+ ldr r12, [sp, #ARG_STACK + pushed + 8 + pos]
str r12, [sp, #pos]
.equ pos, pos + 4
.endr
@@ -69,13 +75,14 @@ function x264_checkasm_call_neon
mov r12, r0
mov r0, r2
mov r1, r3
- ldrd r2, r3, [sp, #ARG_STACK + PUSHED]
+ ldrd r2, r3, [sp, #ARG_STACK + pushed]
blx r12
add sp, sp, #ARG_STACK
pop {r2}
push {r0, r1}
movrel r12, register_init
+.ifc \variant, neon
vldm r12, {q0-q3}
veor q0, q0, q4
veor q1, q1, q5
@@ -88,6 +95,9 @@ function x264_checkasm_call_neon
vrev64.32 d1, d0
vorr d0, d0, d1
vmov.32 r3, d0[0]
+.else
+ mov r3, #0
+.endif
.macro check_reg reg1, reg2
ldrd r0, r1, [r12], #8
@@ -111,62 +121,12 @@ function x264_checkasm_call_neon
bl puts
0:
pop {r0, r1}
+.ifc \variant, neon
vpop {q4-q7}
+.endif
pop {r4-r11, pc}
endfunc
-
-#undef PUSHED
-#define PUSHED 4*10
-
-function x264_checkasm_call_noneon
- push {r4-r11, lr}
-
- movrel r12, register_init
- ldm r12, {r4-r11}
-
- push {r1}
-
- sub sp, sp, #ARG_STACK
-.equ pos, 0
-.rept MAX_ARGS-2
- ldr r12, [sp, #ARG_STACK + PUSHED + 8 + pos]
- str r12, [sp, #pos]
-.equ pos, pos + 4
-.endr
-
- mov r12, r0
- mov r0, r2
- mov r1, r3
- ldrd r2, r3, [sp, #ARG_STACK + PUSHED]
- blx r12
- add sp, sp, #ARG_STACK
- pop {r2}
-
- push {r0, r1}
- movrel r12, register_init
- mov r3, #0
-
-.macro check_reg reg1, reg2
- ldrd r0, r1, [r12], #8
- eor r0, r0, \reg1
- eor r1, r1, \reg2
- orr r3, r3, r0
- orr r3, r3, r1
.endm
- check_reg r4, r5
- check_reg r6, r7
- check_reg r8, r9
- check_reg r10, r11
-.purgem check_reg
-
- cmp r3, #0
- beq 0f
- mov r12, #0
- str r12, [r2]
- movrel r0, error_message
- bl puts
-0:
- pop {r0, r1}
- pop {r4-r11, pc}
-endfunc
+clobbercheck neon
+clobbercheck noneon
--
1.7.10.4
More information about the x264-devel
mailing list