[x264-devel] Windows x64 support

Loren Merritt lorenm at u.washington.edu
Thu Dec 18 19:23:41 CET 2008


On Thu, 18 Dec 2008, BugMaster wrote:
> On Thu, 18 Dec 2008 00:34:51 +0000 (UTC), Loren Merritt wrote:
>
>>> +x264_dequant_%2x%2_%1.skip_prologue:
>>> [...]
>>> +    jl x264_dequant_%2x%2_%1.skip_prologue
>
>> .skip_prologue:
>> [...]
>>      jl x264_dequant_%2x%2_%1 %+ .skip_prologue
>
> It wouldn't compile with defined PREFIX because yasm for some reason
> will not replace x264_dequant_%2x%2_%1 with _x264_dequant_%2x%2_%1.

Huh, %+ works with literal names, but for names containing macro arguments 
it evaluates the defines in an inconvenient order.
How about this instead?


btw, I assume all the movsxd can go away if I change the function 
prototypes to take stride as an intptr_t? (Separate patch and needs to be 
benchmarked, but would be nice.)

--Loren Merritt
-------------- next part --------------
diff --git a/common/x86/dct-32.asm b/common/x86/dct-32.asm
index be0008a..987c938 100644
--- a/common/x86/dct-32.asm
+++ b/common/x86/dct-32.asm
@@ -189,7 +189,7 @@ dct8_mmx:
 ; void x264_sub8x8_dct8_mmx( int16_t dct[8][8], uint8_t *pix1, uint8_t *pix2 )
 ;-----------------------------------------------------------------------------
 cglobal x264_sub8x8_dct8_mmx, 3,3
-global x264_sub8x8_dct8_mmx %+ .skip_prologue
+global x264_sub8x8_dct8_mmx.skip_prologue
 .skip_prologue:
     INIT_MMX
     call load_diff_4x8_mmx
@@ -255,7 +255,7 @@ idct8_mmx:
 ; void x264_add8x8_idct8_mmx( uint8_t *dst, int16_t dct[8][8] )
 ;-----------------------------------------------------------------------------
 cglobal x264_add8x8_idct8_mmx, 2,2
-global x264_add8x8_idct8_mmx %+ .skip_prologue
+global x264_add8x8_idct8_mmx.skip_prologue
 .skip_prologue:
     INIT_MMX
     add word [r1], 32
@@ -348,7 +348,7 @@ INIT_XMM
 ; void x264_sub8x8_dct8_sse2( int16_t dct[8][8], uint8_t *pix1, uint8_t *pix2 )
 ;-----------------------------------------------------------------------------
 cglobal x264_sub8x8_dct8_sse2, 3,3
-global x264_sub8x8_dct8_sse2 %+ .skip_prologue
+global x264_sub8x8_dct8_sse2.skip_prologue
 .skip_prologue:
     LOAD_DIFF m0, m7, none, [r1+0*FENC_STRIDE], [r2+0*FDEC_STRIDE]
     LOAD_DIFF m1, m7, none, [r1+1*FENC_STRIDE], [r2+1*FDEC_STRIDE]
@@ -372,7 +372,7 @@ global x264_sub8x8_dct8_sse2 %+ .skip_prologue
 ; void x264_add8x8_idct8_sse2( uint8_t *p_dst, int16_t dct[8][8] )
 ;-----------------------------------------------------------------------------
 cglobal x264_add8x8_idct8_sse2, 2,2
-global x264_add8x8_idct8_sse2 %+ .skip_prologue
+global x264_add8x8_idct8_sse2.skip_prologue
 .skip_prologue:
     UNSPILL r1, 1,2,3,5,6,7
     IDCT8_1D   0,1,2,3,4,5,6,7,r1
diff --git a/common/x86/dct-a.asm b/common/x86/dct-a.asm
index 012e25a..2f60e02 100644
--- a/common/x86/dct-a.asm
+++ b/common/x86/dct-a.asm
@@ -223,21 +223,21 @@ cglobal %1, 2,2
 %endmacro
 
 %ifndef ARCH_X86_64
-SUB_NxN_DCT  x264_sub8x8_dct_mmx,    x264_sub4x4_dct_mmx  %+ .skip_prologue, 32, 4, 0, 0
-ADD_NxN_IDCT x264_add8x8_idct_mmx,   x264_add4x4_idct_mmx %+ .skip_prologue, 32, 4, 0, 0
-SUB_NxN_DCT  x264_sub16x16_dct_mmx,  x264_sub8x8_dct_mmx  %+ .skip_prologue, 32, 8, 4, 4
-ADD_NxN_IDCT x264_add16x16_idct_mmx, x264_add8x8_idct_mmx %+ .skip_prologue, 32, 8, 4, 4
+SUB_NxN_DCT  x264_sub8x8_dct_mmx,    x264_sub4x4_dct_mmx.skip_prologue,  32, 4, 0, 0
+ADD_NxN_IDCT x264_add8x8_idct_mmx,   x264_add4x4_idct_mmx.skip_prologue, 32, 4, 0, 0
+SUB_NxN_DCT  x264_sub16x16_dct_mmx,  x264_sub8x8_dct_mmx.skip_prologue,  32, 8, 4, 4
+ADD_NxN_IDCT x264_add16x16_idct_mmx, x264_add8x8_idct_mmx.skip_prologue, 32, 8, 4, 4
 
 cextern x264_sub8x8_dct8_mmx.skip_prologue
 cextern x264_add8x8_idct8_mmx.skip_prologue
-SUB_NxN_DCT  x264_sub16x16_dct8_mmx,  x264_sub8x8_dct8_mmx  %+ .skip_prologue, 128, 8, 0, 0
-ADD_NxN_IDCT x264_add16x16_idct8_mmx, x264_add8x8_idct8_mmx %+ .skip_prologue, 128, 8, 0, 0
+SUB_NxN_DCT  x264_sub16x16_dct8_mmx,  x264_sub8x8_dct8_mmx.skip_prologue,  128, 8, 0, 0
+ADD_NxN_IDCT x264_add16x16_idct8_mmx, x264_add8x8_idct8_mmx.skip_prologue, 128, 8, 0, 0
 %define x264_sub8x8_dct8_sse2 x264_sub8x8_dct8_sse2.skip_prologue
 %define x264_add8x8_idct8_sse2 x264_add8x8_idct8_sse2.skip_prologue
 %endif
 
-SUB_NxN_DCT  x264_sub16x16_dct_sse2,  x264_sub8x8_dct_sse2  %+ .skip_prologue, 64, 8, 0, 4
-ADD_NxN_IDCT x264_add16x16_idct_sse2, x264_add8x8_idct_sse2 %+ .skip_prologue, 64, 8, 0, 4
+SUB_NxN_DCT  x264_sub16x16_dct_sse2,  x264_sub8x8_dct_sse2.skip_prologue,  64, 8, 0, 4
+ADD_NxN_IDCT x264_add16x16_idct_sse2, x264_add8x8_idct_sse2.skip_prologue, 64, 8, 0, 4
 
 cextern x264_sub8x8_dct8_sse2
 cextern x264_add8x8_idct8_sse2
diff --git a/common/x86/mc-a.asm b/common/x86/mc-a.asm
index 0580f5d..b43d5d1 100644
--- a/common/x86/mc-a.asm
+++ b/common/x86/mc-a.asm
@@ -723,7 +723,7 @@ cglobal x264_prefetch_ref_mmxext, 3,3
 cglobal x264_mc_chroma_%1, 0,6
 %if mmsize == 16
     cmp dword r6m, 4
-    jle x264_mc_chroma_mmxext %+ .skip_prologue
+    jle x264_mc_chroma_mmxext.skip_prologue
 %endif
 .skip_prologue:
     MC_CHROMA_START
diff --git a/common/x86/x86inc.asm b/common/x86/x86inc.asm
index 9a4a92b..33a2f09 100644
--- a/common/x86/x86inc.asm
+++ b/common/x86/x86inc.asm
@@ -303,20 +303,14 @@ DECLARE_REG 6, ebp, ebp, bp, null, [esp + stack_offset + 28]
 
 ; Symbol prefix for C linkage
 %macro cglobal 1-2+
+    %ifdef PREFIX
+        %xdefine %1 _%1
+        %xdefine %1.skip_prologue _%1.skip_prologue
+    %endif
     %ifidn __OUTPUT_FORMAT__,elf
-        %ifdef PREFIX
-            global _%1:function hidden
-            %define %1 _%1
-        %else
-            global %1:function hidden
-        %endif
+        global %1:function hidden
     %else
-        %ifdef PREFIX
-            global _%1
-            %define %1 _%1
-        %else
-            global %1
-        %endif
+        global %1
     %endif
     align function_align
     %1:
@@ -328,11 +322,9 @@ DECLARE_REG 6, ebp, ebp, bp, null, [esp + stack_offset + 28]
 
 %macro cextern 1
     %ifdef PREFIX
-        extern _%1
-        %define %1 _%1
-    %else
-        extern %1
+        %xdefine %1 _%1
     %endif
+    extern %1
 %endmacro
 
 ; This is needed for ELF, otherwise the GNU linker assumes the stack is


More information about the x264-devel mailing list