[x264-devel] [PATCH 2/3] x32 x86inc: new macros and suffix for pointer sized operands

Matthias Räncker theonetruecamper at gmx.de
Sun Jan 27 00:00:09 CET 2013


new macros IF, IFNIDN - to avoid cluttering the code with %directives
           every other line
new ARCH-macros ARCH_X86_32, ARCH_X86_64_X64, ARCH_X86_64_X32
new macros for pointer operands
- ptrsize  8 for x64, 4 otherwise
- pword    either qword or dword
- dp:      either dq or dd
- resp:    either resq or resd
- new p suffix for pointer-sized registers/arguments
- preg(x)  gives the corresponding pointer-sized register if used with
           a native register

Programming for the x32-abi is odd in that while pointers are only 32 bit wide,
by default adressing still uses 64 bit. Using 32 bit would require an
additional REX prefix, which is undesireable.
Conversely, operations with pointers themselves should use 32 bits where
feasible to save a REX prefix. Since this works only with legacy registers
changing the allocation of registers for some functions should be considered.

Signed-off-by: Matthias Räncker <theonetruecamper at gmx.de>
---
 common/x86/x86inc.asm | 93 ++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 84 insertions(+), 9 deletions(-)

diff --git a/common/x86/x86inc.asm b/common/x86/x86inc.asm
index f45bdb4..57b8fe0 100644
--- a/common/x86/x86inc.asm
+++ b/common/x86/x86inc.asm
@@ -38,6 +38,32 @@
     %define program_name x264
 %endif
 
+; convenience single line if
+; 2 operands: cc, inst         - cc ? a : 
+; 3 operands: cc, a, b         - cc ? a : b
+; 4 operands: cc, foo, a, b    - cc ? foo a : foo b
+%macro IF 2-4
+    %if %1
+        %if %0 < 4
+            %2
+        %else
+            %2 %3
+        %endif
+    %elif %0 > 2
+        %if %0 < 4
+            %3
+        %else
+            %2 %4
+        %endif
+    %endif
+%endmacro
+
+%macro IFNIDN 3+
+    %ifnidn %1, %2
+        %3
+    %endif
+%endmacro
+
 %define WIN64  0
 %define UNIX64 0
 %if ARCH_X86_64
@@ -50,6 +76,18 @@
     %endif
 %endif
 
+%ifndef ARCH_X86_32
+    %assign ARCH_X86_32 ARCH_X86_64 ^ 1
+%endif
+
+%ifndef ARCH_X86_64_X32
+    %define ARCH_X86_64_X32 0
+%endif
+
+%ifndef ARCH_X86_64_X64
+    %define ARCH_X86_64_X64 ARCH_X86_64 ^ ARCH_X86_64_X32
+%endif
+
 %ifdef PREFIX
     %define mangle(x) _ %+ x
 %else
@@ -129,26 +167,37 @@ CPU amdnop
 
 ; registers:
 ; rN and rNq are the native-size register holding function argument N
-; rNd, rNw, rNb are dword, word, and byte size
+; rNp, rNd, rNw, rNb are pointer, dword, word, and byte size
 ; rNh is the high 8 bits of the word size
-; rNm is the original location of arg N (a register or on the stack), dword
-; rNmp is native size
+; rNm is the original location of arg N
+;  without size attribute if on the stack, dword otherwise
+; rNmq, rNmp, rNmd, rNmw, rNmh, rNmb are native, pointer, dword and byte size of
+;  the original location of arg N (a register or on the stack)
 
 %macro DECLARE_REG 2-3
     %define r%1q %2
+    IF ARCH_X86_64_X64, %define r%1p, %2, %2d
     %define r%1d %2d
     %define r%1w %2w
     %define r%1b %2b
     %define r%1h %2h
     %if %0 == 2
         %define r%1m  %2d
-        %define r%1mp %2
-    %elif ARCH_X86_64 ; memory
-        %define r%1m [rstk + stack_offset + %3]
-        %define r%1mp qword r %+ %1 %+ m
+        %define r%1mq %2
+        IF ARCH_X86_64_X64, %define r%1mp, %2, %2d
+        %define r%1md %2d
+        %define r%1mw %2w
+        %define r%1mh %2h
+        %define r%1mb %2b
     %else
         %define r%1m [rstk + stack_offset + %3]
-        %define r%1mp dword r %+ %1 %+ m
+        IF ARCH_X86_64, %define r%1mq, qword [rstk + stack_offset + %3], \
+                                       dword [rstk + stack_offset + %3]
+        %define r%1mp pword [rstk + stack_offset + %3]
+        %define r%1md dword [rstk + stack_offset + %3]
+        %define r%1mw word [rstk + stack_offset + %3]
+        %define r%1mh byte [rstk + stack_offset + %3 + 1]
+        %define r%1mb byte [rstk + stack_offset + %3]
     %endif
     %define r%1  %2
 %endmacro
@@ -156,6 +205,8 @@ CPU amdnop
 %macro DECLARE_REG_SIZE 3
     %define r%1q r%1
     %define e%1q r%1
+    IF ARCH_X86_64_X64, %define r%1p, r%1, e%1
+    %define e%1p e%1
     %define r%1d e%1
     %define e%1d e%1
     %define r%1w %1
@@ -191,6 +242,7 @@ DECLARE_REG_SIZE bp, bpl, null
 %macro DECLARE_REG_TMP_SIZE 0-*
     %rep %0
         %define t%1q t%1 %+ q
+        IF ARCH_X86_64_X64, %define t%1p, t%1, t%1 %+ d
         %define t%1d t%1 %+ d
         %define t%1w t%1 %+ w
         %define t%1h t%1 %+ h
@@ -207,6 +259,17 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
     %define gprsize 4
 %endif
 
+IF ARCH_X86_64_X64, %define ptrsize, 8, 4
+IF ARCH_X86_64_X64, %define pword, qword, dword
+IF ARCH_X86_64_X64, %define dp, dq, dd
+IF ARCH_X86_64_X64, %define resp, resq, resd
+
+%if ARCH_X86_64_X32
+    %define preg(reg) reg %+ d
+%else
+    %define preg(reg) reg
+%endif
+
 %macro PUSH 1
     push %1
     %ifidn rstk, rsp
@@ -242,7 +305,7 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
 %macro LOAD_IF_USED 1-*
     %rep %0
         %if %1 < num_args
-            mov r%1, r %+ %1 %+ mp
+            mov r%1, r %+ %1 %+ mq
         %endif
         %rotate 1
     %endrep
@@ -285,12 +348,18 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
         %assign %%i 0
         %rep n_arg_names
             CAT_UNDEF arg_name %+ %%i, q
+            CAT_UNDEF arg_name %+ %%i, p
             CAT_UNDEF arg_name %+ %%i, d
             CAT_UNDEF arg_name %+ %%i, w
             CAT_UNDEF arg_name %+ %%i, h
             CAT_UNDEF arg_name %+ %%i, b
             CAT_UNDEF arg_name %+ %%i, m
+            CAT_UNDEF arg_name %+ %%i, mq
             CAT_UNDEF arg_name %+ %%i, mp
+            CAT_UNDEF arg_name %+ %%i, md
+            CAT_UNDEF arg_name %+ %%i, mw
+            CAT_UNDEF arg_name %+ %%i, mh
+            CAT_UNDEF arg_name %+ %%i, mb
             CAT_UNDEF arg_name, %%i
             %assign %%i %%i+1
         %endrep
@@ -301,12 +370,18 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
     %assign %%i 0
     %rep %0
         %xdefine %1q r %+ %%i %+ q
+        %xdefine %1p r %+ %%i %+ p
         %xdefine %1d r %+ %%i %+ d
         %xdefine %1w r %+ %%i %+ w
         %xdefine %1h r %+ %%i %+ h
         %xdefine %1b r %+ %%i %+ b
         %xdefine %1m r %+ %%i %+ m
+        %xdefine %1mq r %+ %%i %+ mq
         %xdefine %1mp r %+ %%i %+ mp
+        %xdefine %1md r %+ %%i %+ md
+        %xdefine %1mw r %+ %%i %+ mw
+        %xdefine %1mh r %+ %%i %+ mh
+        %xdefine %1mb r %+ %%i %+ mb
         CAT_XDEFINE arg_name, %%i, %1
         %assign %%i %%i+1
         %rotate 1
-- 
1.8.1.1



More information about the x264-devel mailing list