[x264-devel] x86inc: add TAIL_CALL macro to abstract a common asm idiom
Loren Merritt
git at videolan.org
Sat Feb 4 21:10:53 CET 2012
x264 | branch: master | Loren Merritt <pengvado at akuvian.org> | Fri Feb 3 06:27:18 2012 +0000| [efef20090a06a38f9d95755588d7830fb92a2a02] | committer: Jason Garrett-Glaser
x86inc: add TAIL_CALL macro to abstract a common asm idiom
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=efef20090a06a38f9d95755588d7830fb92a2a02
---
common/x86/dct-64.asm | 30 ++++++------------------------
common/x86/dct-a.asm | 24 ++++--------------------
common/x86/mc-a.asm | 7 +------
common/x86/x86inc.asm | 47 +++++++++++++++++++++++------------------------
4 files changed, 34 insertions(+), 74 deletions(-)
diff --git a/common/x86/dct-64.asm b/common/x86/dct-64.asm
index 2a2c386..8f40222 100644
--- a/common/x86/dct-64.asm
+++ b/common/x86/dct-64.asm
@@ -141,10 +141,7 @@ cextern hsub_mul
%macro SUB8x8_DCT8 0
cglobal sub8x8_dct8, 3,3,14
-%if WIN64
- call .skip_prologue
- RET
-%endif
+ TAIL_CALL .skip_prologue, 0
global current_function %+ .skip_prologue
.skip_prologue:
LOAD_DIFF8x4 0,1,2,3, none,none, r1, r2
@@ -194,10 +191,7 @@ SUB8x8_DCT8
%macro ADD8x8_IDCT8 0
cglobal add8x8_idct8, 2,2,16
add r1, 128
-%if WIN64
- call .skip_prologue
- RET
-%endif
+ TAIL_CALL .skip_prologue, 0
global current_function %+ .skip_prologue
.skip_prologue:
mova m0, [r1-128]
@@ -260,10 +254,7 @@ cglobal sub8x8_dct, 3,3,10
%if cpuflag(ssse3)
mova m7, [hsub_mul]
%endif
-%if WIN64
- call .skip_prologue
- RET
-%endif
+ TAIL_CALL .skip_prologue, 0
global current_function %+ .skip_prologue
.skip_prologue:
SWAP 7, 9
@@ -287,10 +278,7 @@ cglobal sub8x8_dct8, 3,3,11
%if cpuflag(ssse3)
mova m7, [hsub_mul]
%endif
-%if WIN64
- call .skip_prologue
- RET
-%endif
+ TAIL_CALL .skip_prologue, 0
global current_function %+ .skip_prologue
.skip_prologue:
SWAP 7, 10
@@ -330,10 +318,7 @@ DCT_SUB8
cglobal add8x8_idct8, 2,2,11
add r0, 4*FDEC_STRIDE
pxor m7, m7
-%if WIN64
- call .skip_prologue
- RET
-%endif
+ TAIL_CALL .skip_prologue, 0
global current_function %+ .skip_prologue
.skip_prologue:
SWAP 7, 9
@@ -369,10 +354,7 @@ ADD8x8_IDCT8
cglobal add8x8_idct, 2,2,11
add r0, 4*FDEC_STRIDE
pxor m7, m7
-%if WIN64
- call .skip_prologue
- RET
-%endif
+ TAIL_CALL .skip_prologue, 0
global current_function %+ .skip_prologue
.skip_prologue:
SWAP 7, 9
diff --git a/common/x86/dct-a.asm b/common/x86/dct-a.asm
index 7b355c7..1928604 100644
--- a/common/x86/dct-a.asm
+++ b/common/x86/dct-a.asm
@@ -406,12 +406,7 @@ cglobal %1, 3,3,%7
add r0, %3
add r1, %4-%5-%6*FENC_STRIDE
add r2, %4-%5-%6*FDEC_STRIDE
-%if WIN64
- call %2.skip_prologue
- RET
-%else
- jmp %2.skip_prologue
-%endif
+ TAIL_CALL %2.skip_prologue, 1
%endmacro
;-----------------------------------------------------------------------------
@@ -440,12 +435,7 @@ cglobal %1, 2,2,11
call %2.skip_prologue
add r0, %4-%5-%6*FDEC_STRIDE
add r1, %3
-%if WIN64
- call %2.skip_prologue
- RET
-%else
- jmp %2.skip_prologue
-%endif
+ TAIL_CALL %2.skip_prologue, 1
%endmacro
%if HIGH_BIT_DEPTH
@@ -680,10 +670,7 @@ INIT_XMM sse2
cglobal add16x16_idct_dc, 2,2,8
call .loop
add r0, FDEC_STRIDE*4
-%if WIN64
- call .loop
- RET
-%endif
+ TAIL_CALL .loop, 0
.loop:
add r0, FDEC_STRIDE*4
movq m0, [r1+0]
@@ -712,10 +699,7 @@ cglobal add16x16_idct_dc, 2,2,8
cglobal add16x16_idct_dc, 2,2,8
call .loop
add r0, FDEC_STRIDE*4
-%if WIN64
- call .loop
- RET
-%endif
+ TAIL_CALL .loop, 0
.loop:
add r0, FDEC_STRIDE*4
mova m0, [r1]
diff --git a/common/x86/mc-a.asm b/common/x86/mc-a.asm
index 3d8423f..e460aa2 100644
--- a/common/x86/mc-a.asm
+++ b/common/x86/mc-a.asm
@@ -1142,12 +1142,7 @@ cglobal pixel_avg2_w16_cache64_ssse3
%else
lea r6, [avg_w16_addr + r6]
%endif
-%if UNIX64
- jmp r6
-%else
- call r6
- RET
-%endif
+ TAIL_CALL r6, 1
%assign j 0
%assign k 1
diff --git a/common/x86/x86inc.asm b/common/x86/x86inc.asm
index 4eb13a6..57ebc85 100644
--- a/common/x86/x86inc.asm
+++ b/common/x86/x86inc.asm
@@ -368,20 +368,14 @@ DECLARE_REG 14, R15, 120
%assign xmm_regs_used 0
%endmacro
+%define has_epilogue regs_used > 7 || xmm_regs_used > 6
+
%macro RET 0
WIN64_RESTORE_XMM_INTERNAL rsp
POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7
ret
%endmacro
-%macro REP_RET 0
- %if regs_used > 7 || xmm_regs_used > 6
- RET
- %else
- rep ret
- %endif
-%endmacro
-
%elif ARCH_X86_64 ; *nix x64 ;=============================================
DECLARE_REG 0, rdi
@@ -410,19 +404,13 @@ DECLARE_REG 14, R15, 72
DEFINE_ARGS %4
%endmacro
+%define has_epilogue regs_used > 9
+
%macro RET 0
POP_IF_USED 14, 13, 12, 11, 10, 9
ret
%endmacro
-%macro REP_RET 0
- %if regs_used > 9
- RET
- %else
- rep ret
- %endif
-%endmacro
-
%else ; X86_32 ;==============================================================
DECLARE_REG 0, eax, 4
@@ -456,19 +444,13 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
DEFINE_ARGS %4
%endmacro
+%define has_epilogue regs_used > 3
+
%macro RET 0
POP_IF_USED 6, 5, 4, 3
ret
%endmacro
-%macro REP_RET 0
- %if regs_used > 3
- RET
- %else
- rep ret
- %endif
-%endmacro
-
%endif ;======================================================================
%if WIN64 == 0
@@ -478,6 +460,23 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
%endmacro
%endif
+%macro REP_RET 0
+ %if has_epilogue
+ RET
+ %else
+ rep ret
+ %endif
+%endmacro
+
+%macro TAIL_CALL 2 ; callee, is_nonadjacent
+ %if has_epilogue
+ call %1
+ RET
+ %elif %2
+ jmp %1
+ %endif
+%endmacro
+
;=============================================================================
; arch-independent part
;=============================================================================
More information about the x264-devel
mailing list