[x264-devel] [PATCH] checkasm: arm: Use a macro to share code between the neon/noneon versions

Martin Storsjö martin at martin.st
Mon Aug 31 21:36:45 CEST 2015


---
This can be squashed into the earlier "checkasm: arm: Check register
clobbering" if you want to.

Simplified the macro parameters, as suggested by Janne.
---
 tools/checkasm-arm.S |   76 ++++++++++++--------------------------------------
 1 file changed, 18 insertions(+), 58 deletions(-)

diff --git a/tools/checkasm-arm.S b/tools/checkasm-arm.S
index 93c96d9..35de22c 100644
--- a/tools/checkasm-arm.S
+++ b/tools/checkasm-arm.S
@@ -46,14 +46,20 @@ error_message:
 #define MAX_ARGS 15
 
 #define ARG_STACK 4*(MAX_ARGS - 2)
-#define PUSHED 16*4 + 4*10
 
-function x264_checkasm_call_neon
+.macro clobbercheck variant
+.equ pushed, 4*10
+function x264_checkasm_call_\variant
     push        {r4-r11, lr}
+.ifc \variant, neon
     vpush       {q4-q7}
+.equ pushed, pushed + 16*4
+.endif
 
     movrel      r12, register_init
+.ifc \variant, neon
     vldm        r12, {q4-q7}
+.endif
     ldm         r12, {r4-r11}
 
     push        {r1}
@@ -61,7 +67,7 @@ function x264_checkasm_call_neon
     sub         sp,  sp,  #ARG_STACK
 .equ pos, 0
 .rept MAX_ARGS-2
-    ldr         r12, [sp, #ARG_STACK + PUSHED + 8 + pos]
+    ldr         r12, [sp, #ARG_STACK + pushed + 8 + pos]
     str         r12, [sp, #pos]
 .equ pos, pos + 4
 .endr
@@ -69,13 +75,14 @@ function x264_checkasm_call_neon
     mov         r12, r0
     mov         r0,  r2
     mov         r1,  r3
-    ldrd        r2,  r3,  [sp, #ARG_STACK + PUSHED]
+    ldrd        r2,  r3,  [sp, #ARG_STACK + pushed]
     blx         r12
     add         sp,  sp,  #ARG_STACK
     pop         {r2}
 
     push        {r0, r1}
     movrel      r12, register_init
+.ifc \variant, neon
     vldm        r12, {q0-q3}
     veor        q0,  q0,  q4
     veor        q1,  q1,  q5
@@ -88,6 +95,9 @@ function x264_checkasm_call_neon
     vrev64.32   d1,  d0
     vorr        d0,  d0,  d1
     vmov.32     r3,  d0[0]
+.else
+    mov         r3,  #0
+.endif
 
 .macro check_reg reg1, reg2
     ldrd        r0,  r1,  [r12], #8
@@ -111,62 +121,12 @@ function x264_checkasm_call_neon
     bl          puts
 0:
     pop         {r0, r1}
+.ifc \variant, neon
     vpop        {q4-q7}
+.endif
     pop         {r4-r11, pc}
 endfunc
-
-#undef PUSHED
-#define PUSHED 4*10
-
-function x264_checkasm_call_noneon
-    push        {r4-r11, lr}
-
-    movrel      r12, register_init
-    ldm         r12, {r4-r11}
-
-    push        {r1}
-
-    sub         sp,  sp,  #ARG_STACK
-.equ pos, 0
-.rept MAX_ARGS-2
-    ldr         r12, [sp, #ARG_STACK + PUSHED + 8 + pos]
-    str         r12, [sp, #pos]
-.equ pos, pos + 4
-.endr
-
-    mov         r12, r0
-    mov         r0,  r2
-    mov         r1,  r3
-    ldrd        r2,  r3,  [sp, #ARG_STACK + PUSHED]
-    blx         r12
-    add         sp,  sp,  #ARG_STACK
-    pop         {r2}
-
-    push        {r0, r1}
-    movrel      r12, register_init
-    mov         r3,  #0
-
-.macro check_reg reg1, reg2
-    ldrd        r0,  r1,  [r12], #8
-    eor         r0,  r0, \reg1
-    eor         r1,  r1, \reg2
-    orr         r3,  r3, r0
-    orr         r3,  r3, r1
 .endm
-    check_reg   r4,  r5
-    check_reg   r6,  r7
-    check_reg   r8,  r9
-    check_reg   r10, r11
-.purgem check_reg
-
-    cmp         r3,  #0
-    beq         0f
 
-    mov         r12, #0
-    str         r12, [r2]
-    movrel      r0, error_message
-    bl          puts
-0:
-    pop         {r0, r1}
-    pop         {r4-r11, pc}
-endfunc
+clobbercheck neon
+clobbercheck noneon
-- 
1.7.10.4



More information about the x264-devel mailing list