[x264-devel] commit: Cosmetics: cleaner syntax for defining temporary registers in asm ( Loren Merritt )

Tue Dec 30 04:16:02 CET 2008

x264 | branch: master | Loren Merritt <pengvado at akuvian.org> | Mon Dec 29 21:52:25 2008 -0500| [648e132f7135c7e18625198e3ffe2c6c7d824df6] | committer: Jason Garrett-Glaser 

Cosmetics: cleaner syntax for defining temporary registers in asm
Globally define t#[qdwb], so that only t# needs to be locally defined when reorganizing registers

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=648e132f7135c7e18625198e3ffe2c6c7d824df6
---

 common/x86/cabac-a.asm |   13 ++-----------
 common/x86/mc-a.asm    |   25 +++++--------------------
 common/x86/pixel-a.asm |   28 ++++++++++------------------
 common/x86/quant-a.asm |   14 ++------------
 common/x86/x86inc.asm  |   23 +++++++++++++++++++++++
 5 files changed, 42 insertions(+), 61 deletions(-)

diff --git a/common/x86/cabac-a.asm b/common/x86/cabac-a.asm
index ea35b0c..4bfb330 100644
--- a/common/x86/cabac-a.asm
+++ b/common/x86/cabac-a.asm
@@ -31,21 +31,12 @@ cextern x264_cabac_range_lps
 cextern x264_cabac_transition
 cextern x264_cabac_renorm_shift
 
-%macro DEF_TMP 16
-    %rep 8
-        %define t%1d r%9d
-        %define t%1b r%9b
-        %define t%1  r%9
-        %rotate 1
-    %endrep
-%endmacro
-
 ; t3 must be ecx, since it's used for shift.
 %ifdef ARCH_X86_64
-    DEF_TMP 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,10
+    DECLARE_REG_TMP 0,1,2,3,4,5,6,10
     %define pointer resq
 %else
-    DEF_TMP 0,1,2,3,4,5,6,7, 0,3,2,1,4,5,6,3
+    DECLARE_REG_TMP 0,3,2,1,4,5,6,3
     %define pointer resd
 %endif
 
diff --git a/common/x86/mc-a.asm b/common/x86/mc-a.asm
index 0580f5d..3b0ffda 100644
--- a/common/x86/mc-a.asm
+++ b/common/x86/mc-a.asm
@@ -41,27 +41,13 @@ SECTION .text
 ; implicit bipred only:
 ; assumes log2_denom = 5, offset = 0, weight1 + weight2 = 64
 %ifdef ARCH_X86_64
-    %define t0 r0
-    %define t1 r1
-    %define t2 r2
-    %define t3 r3
-    %define t4 r4
-    %define t5 r5
-    %define t6d r10d
-    %define t7d r11d
+    DECLARE_REG_TMP 0,1,2,3,4,5,10,11
     %macro AVG_START 0
         PROLOGUE 6,7
         .height_loop:
     %endmacro
 %else
-    %define t0 r1
-    %define t1 r2
-    %define t2 r3
-    %define t3 r4
-    %define t4 r5
-    %define t5 r6
-    %define t6d r1d
-    %define t7d r2d
+    DECLARE_REG_TMP 1,2,3,4,5,6,1,2
     %macro AVG_START 0
         PROLOGUE 0,7
         mov t0, r0m
@@ -690,12 +676,11 @@ cglobal x264_prefetch_ref_mmxext, 3,3
 ; chroma MC
 ;=============================================================================
 
-    %define t0d  eax
-    %define t0   rax
+    %define t0 rax
 %ifdef ARCH_X86_64
-    %define t1d  r10d
+    %define t1 r10
 %else
-    %define t1d  r1d
+    %define t1 r1
 %endif
 
 %macro MC_CHROMA_START 0
diff --git a/common/x86/pixel-a.asm b/common/x86/pixel-a.asm
index 6314e56..42f9113 100644
--- a/common/x86/pixel-a.asm
+++ b/common/x86/pixel-a.asm
@@ -230,9 +230,9 @@ cglobal x264_pixel_ssd_4x4_sse4, 4,4
     pxor  m6, m6    ; sum squared
     pxor  m7, m7    ; zero
 %ifdef ARCH_X86_64
-    %define t3d r3d
+    %define t3 r3
 %else
-    %define t3d r2d
+    %define t3 r2
 %endif
 %endmacro
 
@@ -1028,15 +1028,13 @@ cglobal x264_intra_satd_x3_4x4_%1, 2,6
     ; stack is 16 byte aligned because abi says so
     %define  top_1d  rsp-8  ; size 8
     %define  left_1d rsp-16 ; size 8
-    %define  t0  r10
-    %define  t0d r10d
+    %define  t0 r10
 %else
     ; stack is 16 byte aligned at least in gcc, and we've pushed 3 regs + return address, so it's still aligned
     SUB         esp, 16
     %define  top_1d  esp+8
     %define  left_1d esp
-    %define  t0  r2
-    %define  t0d r2d
+    %define  t0 r2
 %endif
 
     call load_hadamard
@@ -1068,17 +1066,11 @@ cglobal x264_intra_satd_x3_4x4_%1, 2,6
     RET
 
 %ifdef ARCH_X86_64
-    %define  t0  r10
-    %define  t0d r10d
-    %define  t2  r11
-    %define  t2w r11w
-    %define  t2d r11d
+    %define  t0 r10
+    %define  t2 r11
 %else
-    %define  t0  r0
-    %define  t0d r0d
-    %define  t2  r2
-    %define  t2w r2w
-    %define  t2d r2d
+    %define  t0 r0
+    %define  t2 r2
 %endif
 
 ;-----------------------------------------------------------------------------
@@ -1731,10 +1723,10 @@ cglobal x264_pixel_ssim_end4_sse2, 3,3
 
 %macro ADS_START 1 ; unroll_size
 %ifdef ARCH_X86_64
-    %define t0  r6
+    %define t0 r6
     mov     r10, rsp
 %else
-    %define t0  r4
+    %define t0 r4
     mov     rbp, rsp
 %endif
     mov     r0d, r5m
diff --git a/common/x86/quant-a.asm b/common/x86/quant-a.asm
index a039414..8addcad 100644
--- a/common/x86/quant-a.asm
+++ b/common/x86/quant-a.asm
@@ -241,19 +241,9 @@ QUANT_DC x264_quant_2x2_dc_ssse3, 1
 %endmacro
 
 %ifdef ARCH_X86_64
-    %define t0  r4
-    %define t0d r4d
-    %define t1  r3
-    %define t1d r3d
-    %define t2  r2
-    %define t2d r2d
+    DECLARE_REG_TMP 4,3,2
 %else
-    %define t0  r2
-    %define t0d r2d
-    %define t1  r0
-    %define t1d r0d
-    %define t2  r1
-    %define t2d r1d
+    DECLARE_REG_TMP 2,0,1
 %endif
 
 %macro DEQUANT_START 2
diff --git a/common/x86/x86inc.asm b/common/x86/x86inc.asm
index 9a4a92b..b2aee3f 100644
--- a/common/x86/x86inc.asm
+++ b/common/x86/x86inc.asm
@@ -116,6 +116,29 @@ DECLARE_REG_SIZE si, sil
 DECLARE_REG_SIZE di, dil
 DECLARE_REG_SIZE bp, bpl
 
+; t# defines for when per-arch register allocation is more complex than just function arguments
+
+%macro DECLARE_REG_TMP 1-*
+    %assign %%i 0
+    %rep %0
+        CAT_XDEFINE t, %%i, r%1
+        %assign %%i %%i+1
+        %rotate 1
+    %endrep
+%endmacro
+
+%macro DECLARE_REG_TMP_SIZE 0-*
+    %rep %0
+        %define t%1q t%1 %+ q
+        %define t%1d t%1 %+ d
+        %define t%1w t%1 %+ w
+        %define t%1b t%1 %+ b
+        %rotate 1
+    %endrep
+%endmacro
+
+DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7
+
 %ifdef ARCH_X86_64
     %define gprsize 8
 %else