[x264-devel] commit: Cosmetics: cleaner syntax for defining temporary registers in asm ( Loren Merritt )
git version control
git at videolan.org
Tue Dec 30 04:16:02 CET 2008
x264 | branch: master | Loren Merritt <pengvado at akuvian.org> | Mon Dec 29 21:52:25 2008 -0500| [648e132f7135c7e18625198e3ffe2c6c7d824df6] | committer: Jason Garrett-Glaser
Cosmetics: cleaner syntax for defining temporary registers in asm
Globally define t#[qdwb], so that only t# needs to be locally defined when reorganizing registers
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=648e132f7135c7e18625198e3ffe2c6c7d824df6
---
common/x86/cabac-a.asm | 13 ++-----------
common/x86/mc-a.asm | 25 +++++--------------------
common/x86/pixel-a.asm | 28 ++++++++++------------------
common/x86/quant-a.asm | 14 ++------------
common/x86/x86inc.asm | 23 +++++++++++++++++++++++
5 files changed, 42 insertions(+), 61 deletions(-)
diff --git a/common/x86/cabac-a.asm b/common/x86/cabac-a.asm
index ea35b0c..4bfb330 100644
--- a/common/x86/cabac-a.asm
+++ b/common/x86/cabac-a.asm
@@ -31,21 +31,12 @@ cextern x264_cabac_range_lps
cextern x264_cabac_transition
cextern x264_cabac_renorm_shift
-%macro DEF_TMP 16
- %rep 8
- %define t%1d r%9d
- %define t%1b r%9b
- %define t%1 r%9
- %rotate 1
- %endrep
-%endmacro
-
; t3 must be ecx, since it's used for shift.
%ifdef ARCH_X86_64
- DEF_TMP 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,10
+ DECLARE_REG_TMP 0,1,2,3,4,5,6,10
%define pointer resq
%else
- DEF_TMP 0,1,2,3,4,5,6,7, 0,3,2,1,4,5,6,3
+ DECLARE_REG_TMP 0,3,2,1,4,5,6,3
%define pointer resd
%endif
diff --git a/common/x86/mc-a.asm b/common/x86/mc-a.asm
index 0580f5d..3b0ffda 100644
--- a/common/x86/mc-a.asm
+++ b/common/x86/mc-a.asm
@@ -41,27 +41,13 @@ SECTION .text
; implicit bipred only:
; assumes log2_denom = 5, offset = 0, weight1 + weight2 = 64
%ifdef ARCH_X86_64
- %define t0 r0
- %define t1 r1
- %define t2 r2
- %define t3 r3
- %define t4 r4
- %define t5 r5
- %define t6d r10d
- %define t7d r11d
+ DECLARE_REG_TMP 0,1,2,3,4,5,10,11
%macro AVG_START 0
PROLOGUE 6,7
.height_loop:
%endmacro
%else
- %define t0 r1
- %define t1 r2
- %define t2 r3
- %define t3 r4
- %define t4 r5
- %define t5 r6
- %define t6d r1d
- %define t7d r2d
+ DECLARE_REG_TMP 1,2,3,4,5,6,1,2
%macro AVG_START 0
PROLOGUE 0,7
mov t0, r0m
@@ -690,12 +676,11 @@ cglobal x264_prefetch_ref_mmxext, 3,3
; chroma MC
;=============================================================================
- %define t0d eax
- %define t0 rax
+ %define t0 rax
%ifdef ARCH_X86_64
- %define t1d r10d
+ %define t1 r10
%else
- %define t1d r1d
+ %define t1 r1
%endif
%macro MC_CHROMA_START 0
diff --git a/common/x86/pixel-a.asm b/common/x86/pixel-a.asm
index 6314e56..42f9113 100644
--- a/common/x86/pixel-a.asm
+++ b/common/x86/pixel-a.asm
@@ -230,9 +230,9 @@ cglobal x264_pixel_ssd_4x4_sse4, 4,4
pxor m6, m6 ; sum squared
pxor m7, m7 ; zero
%ifdef ARCH_X86_64
- %define t3d r3d
+ %define t3 r3
%else
- %define t3d r2d
+ %define t3 r2
%endif
%endmacro
@@ -1028,15 +1028,13 @@ cglobal x264_intra_satd_x3_4x4_%1, 2,6
; stack is 16 byte aligned because abi says so
%define top_1d rsp-8 ; size 8
%define left_1d rsp-16 ; size 8
- %define t0 r10
- %define t0d r10d
+ %define t0 r10
%else
; stack is 16 byte aligned at least in gcc, and we've pushed 3 regs + return address, so it's still aligned
SUB esp, 16
%define top_1d esp+8
%define left_1d esp
- %define t0 r2
- %define t0d r2d
+ %define t0 r2
%endif
call load_hadamard
@@ -1068,17 +1066,11 @@ cglobal x264_intra_satd_x3_4x4_%1, 2,6
RET
%ifdef ARCH_X86_64
- %define t0 r10
- %define t0d r10d
- %define t2 r11
- %define t2w r11w
- %define t2d r11d
+ %define t0 r10
+ %define t2 r11
%else
- %define t0 r0
- %define t0d r0d
- %define t2 r2
- %define t2w r2w
- %define t2d r2d
+ %define t0 r0
+ %define t2 r2
%endif
;-----------------------------------------------------------------------------
@@ -1731,10 +1723,10 @@ cglobal x264_pixel_ssim_end4_sse2, 3,3
%macro ADS_START 1 ; unroll_size
%ifdef ARCH_X86_64
- %define t0 r6
+ %define t0 r6
mov r10, rsp
%else
- %define t0 r4
+ %define t0 r4
mov rbp, rsp
%endif
mov r0d, r5m
diff --git a/common/x86/quant-a.asm b/common/x86/quant-a.asm
index a039414..8addcad 100644
--- a/common/x86/quant-a.asm
+++ b/common/x86/quant-a.asm
@@ -241,19 +241,9 @@ QUANT_DC x264_quant_2x2_dc_ssse3, 1
%endmacro
%ifdef ARCH_X86_64
- %define t0 r4
- %define t0d r4d
- %define t1 r3
- %define t1d r3d
- %define t2 r2
- %define t2d r2d
+ DECLARE_REG_TMP 4,3,2
%else
- %define t0 r2
- %define t0d r2d
- %define t1 r0
- %define t1d r0d
- %define t2 r1
- %define t2d r1d
+ DECLARE_REG_TMP 2,0,1
%endif
%macro DEQUANT_START 2
diff --git a/common/x86/x86inc.asm b/common/x86/x86inc.asm
index 9a4a92b..b2aee3f 100644
--- a/common/x86/x86inc.asm
+++ b/common/x86/x86inc.asm
@@ -116,6 +116,29 @@ DECLARE_REG_SIZE si, sil
DECLARE_REG_SIZE di, dil
DECLARE_REG_SIZE bp, bpl
+; t# defines for when per-arch register allocation is more complex than just function arguments
+
+%macro DECLARE_REG_TMP 1-*
+ %assign %%i 0
+ %rep %0
+ CAT_XDEFINE t, %%i, r%1
+ %assign %%i %%i+1
+ %rotate 1
+ %endrep
+%endmacro
+
+%macro DECLARE_REG_TMP_SIZE 0-*
+ %rep %0
+ %define t%1q t%1 %+ q
+ %define t%1d t%1 %+ d
+ %define t%1w t%1 %+ w
+ %define t%1b t%1 %+ b
+ %rotate 1
+ %endrep
+%endmacro
+
+DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7
+
%ifdef ARCH_X86_64
%define gprsize 8
%else
More information about the x264-devel
mailing list