[x264-devel] x86inc: add TAIL_CALL macro to abstract a common asm idiom

Loren Merritt git at videolan.org
Sat Feb 4 21:10:53 CET 2012


x264 | branch: master | Loren Merritt <pengvado at akuvian.org> | Fri Feb  3 06:27:18 2012 +0000| [efef20090a06a38f9d95755588d7830fb92a2a02] | committer: Jason Garrett-Glaser

x86inc: add TAIL_CALL macro to abstract a common asm idiom

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=efef20090a06a38f9d95755588d7830fb92a2a02
---

 common/x86/dct-64.asm |   30 ++++++------------------------
 common/x86/dct-a.asm  |   24 ++++--------------------
 common/x86/mc-a.asm   |    7 +------
 common/x86/x86inc.asm |   47 +++++++++++++++++++++++------------------------
 4 files changed, 34 insertions(+), 74 deletions(-)

diff --git a/common/x86/dct-64.asm b/common/x86/dct-64.asm
index 2a2c386..8f40222 100644
--- a/common/x86/dct-64.asm
+++ b/common/x86/dct-64.asm
@@ -141,10 +141,7 @@ cextern hsub_mul
 
 %macro SUB8x8_DCT8 0
 cglobal sub8x8_dct8, 3,3,14
-%if WIN64
-    call .skip_prologue
-    RET
-%endif
+    TAIL_CALL .skip_prologue, 0
 global current_function %+ .skip_prologue
 .skip_prologue:
     LOAD_DIFF8x4 0,1,2,3, none,none, r1, r2
@@ -194,10 +191,7 @@ SUB8x8_DCT8
 %macro ADD8x8_IDCT8 0
 cglobal add8x8_idct8, 2,2,16
     add r1, 128
-%if WIN64
-    call .skip_prologue
-    RET
-%endif
+    TAIL_CALL .skip_prologue, 0
 global current_function %+ .skip_prologue
 .skip_prologue:
     mova     m0, [r1-128]
@@ -260,10 +254,7 @@ cglobal sub8x8_dct, 3,3,10
 %if cpuflag(ssse3)
     mova m7, [hsub_mul]
 %endif
-%if WIN64
-    call .skip_prologue
-    RET
-%endif
+    TAIL_CALL .skip_prologue, 0
 global current_function %+ .skip_prologue
 .skip_prologue:
     SWAP 7, 9
@@ -287,10 +278,7 @@ cglobal sub8x8_dct8, 3,3,11
 %if cpuflag(ssse3)
     mova m7, [hsub_mul]
 %endif
-%if WIN64
-    call .skip_prologue
-    RET
-%endif
+    TAIL_CALL .skip_prologue, 0
 global current_function %+ .skip_prologue
 .skip_prologue:
     SWAP 7, 10
@@ -330,10 +318,7 @@ DCT_SUB8
 cglobal add8x8_idct8, 2,2,11
     add r0, 4*FDEC_STRIDE
     pxor m7, m7
-%if WIN64
-    call .skip_prologue
-    RET
-%endif
+    TAIL_CALL .skip_prologue, 0
 global current_function %+ .skip_prologue
 .skip_prologue:
     SWAP 7, 9
@@ -369,10 +354,7 @@ ADD8x8_IDCT8
 cglobal add8x8_idct, 2,2,11
     add  r0, 4*FDEC_STRIDE
     pxor m7, m7
-%if WIN64
-    call .skip_prologue
-    RET
-%endif
+    TAIL_CALL .skip_prologue, 0
 global current_function %+ .skip_prologue
 .skip_prologue:
     SWAP 7, 9
diff --git a/common/x86/dct-a.asm b/common/x86/dct-a.asm
index 7b355c7..1928604 100644
--- a/common/x86/dct-a.asm
+++ b/common/x86/dct-a.asm
@@ -406,12 +406,7 @@ cglobal %1, 3,3,%7
     add  r0, %3
     add  r1, %4-%5-%6*FENC_STRIDE
     add  r2, %4-%5-%6*FDEC_STRIDE
-%if WIN64
-    call %2.skip_prologue
-    RET
-%else
-    jmp  %2.skip_prologue
-%endif
+    TAIL_CALL %2.skip_prologue, 1
 %endmacro
 
 ;-----------------------------------------------------------------------------
@@ -440,12 +435,7 @@ cglobal %1, 2,2,11
     call %2.skip_prologue
     add  r0, %4-%5-%6*FDEC_STRIDE
     add  r1, %3
-%if WIN64
-    call %2.skip_prologue
-    RET
-%else
-    jmp  %2.skip_prologue
-%endif
+    TAIL_CALL %2.skip_prologue, 1
 %endmacro
 
 %if HIGH_BIT_DEPTH
@@ -680,10 +670,7 @@ INIT_XMM sse2
 cglobal add16x16_idct_dc, 2,2,8
     call .loop
     add       r0, FDEC_STRIDE*4
-%if WIN64
-    call .loop
-    RET
-%endif
+    TAIL_CALL .loop, 0
 .loop:
     add       r0, FDEC_STRIDE*4
     movq      m0, [r1+0]
@@ -712,10 +699,7 @@ cglobal add16x16_idct_dc, 2,2,8
 cglobal add16x16_idct_dc, 2,2,8
     call .loop
     add      r0, FDEC_STRIDE*4
-%if WIN64
-    call .loop
-    RET
-%endif
+    TAIL_CALL .loop, 0
 .loop:
     add      r0, FDEC_STRIDE*4
     mova     m0, [r1]
diff --git a/common/x86/mc-a.asm b/common/x86/mc-a.asm
index 3d8423f..e460aa2 100644
--- a/common/x86/mc-a.asm
+++ b/common/x86/mc-a.asm
@@ -1142,12 +1142,7 @@ cglobal pixel_avg2_w16_cache64_ssse3
 %else
     lea    r6, [avg_w16_addr + r6]
 %endif
-%if UNIX64
-    jmp    r6
-%else
-    call   r6
-    RET
-%endif
+    TAIL_CALL r6, 1
 
 %assign j 0
 %assign k 1
diff --git a/common/x86/x86inc.asm b/common/x86/x86inc.asm
index 4eb13a6..57ebc85 100644
--- a/common/x86/x86inc.asm
+++ b/common/x86/x86inc.asm
@@ -368,20 +368,14 @@ DECLARE_REG 14, R15, 120
     %assign xmm_regs_used 0
 %endmacro
 
+%define has_epilogue regs_used > 7 || xmm_regs_used > 6
+
 %macro RET 0
     WIN64_RESTORE_XMM_INTERNAL rsp
     POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7
     ret
 %endmacro
 
-%macro REP_RET 0
-    %if regs_used > 7 || xmm_regs_used > 6
-        RET
-    %else
-        rep ret
-    %endif
-%endmacro
-
 %elif ARCH_X86_64 ; *nix x64 ;=============================================
 
 DECLARE_REG 0,  rdi
@@ -410,19 +404,13 @@ DECLARE_REG 14, R15, 72
     DEFINE_ARGS %4
 %endmacro
 
+%define has_epilogue regs_used > 9
+
 %macro RET 0
     POP_IF_USED 14, 13, 12, 11, 10, 9
     ret
 %endmacro
 
-%macro REP_RET 0
-    %if regs_used > 9
-        RET
-    %else
-        rep ret
-    %endif
-%endmacro
-
 %else ; X86_32 ;==============================================================
 
 DECLARE_REG 0, eax, 4
@@ -456,19 +444,13 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
     DEFINE_ARGS %4
 %endmacro
 
+%define has_epilogue regs_used > 3
+
 %macro RET 0
     POP_IF_USED 6, 5, 4, 3
     ret
 %endmacro
 
-%macro REP_RET 0
-    %if regs_used > 3
-        RET
-    %else
-        rep ret
-    %endif
-%endmacro
-
 %endif ;======================================================================
 
 %if WIN64 == 0
@@ -478,6 +460,23 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
 %endmacro
 %endif
 
+%macro REP_RET 0
+    %if has_epilogue
+        RET
+    %else
+        rep ret
+    %endif
+%endmacro
+
+%macro TAIL_CALL 2 ; callee, is_nonadjacent
+    %if has_epilogue
+        call %1
+        RET
+    %elif %2
+        jmp %1
+    %endif
+%endmacro
+
 ;=============================================================================
 ; arch-independent part
 ;=============================================================================



More information about the x264-devel mailing list