[x264-devel] Windows x64 support
Loren Merritt
lorenm at u.washington.edu
Thu Dec 18 19:23:41 CET 2008
On Thu, 18 Dec 2008, BugMaster wrote:
> On Thu, 18 Dec 2008 00:34:51 +0000 (UTC), Loren Merritt wrote:
>
>>> +x264_dequant_%2x%2_%1.skip_prologue:
>>> [...]
>>> + jl x264_dequant_%2x%2_%1.skip_prologue
>
>> .skip_prologue:
>> [...]
>> jl x264_dequant_%2x%2_%1 %+ .skip_prologue
>
> It wouldn't compile with defined PREFIX because yasm for some reason
> will not replace x264_dequant_%2x%2_%1 with _x264_dequant_%2x%2_%1.
Huh, %+ works with literal names, but for names containing macro arguments
it evaluates the defines in an inconvenient order.
How about this instead?
btw, I assume all the movsxd can go away if I change the function
prototypes to take stride as an intptr_t? (Separate patch and needs to be
benchmarked, but would be nice.)
--Loren Merritt
-------------- next part --------------
diff --git a/common/x86/dct-32.asm b/common/x86/dct-32.asm
index be0008a..987c938 100644
--- a/common/x86/dct-32.asm
+++ b/common/x86/dct-32.asm
@@ -189,7 +189,7 @@ dct8_mmx:
; void x264_sub8x8_dct8_mmx( int16_t dct[8][8], uint8_t *pix1, uint8_t *pix2 )
;-----------------------------------------------------------------------------
cglobal x264_sub8x8_dct8_mmx, 3,3
-global x264_sub8x8_dct8_mmx %+ .skip_prologue
+global x264_sub8x8_dct8_mmx.skip_prologue
.skip_prologue:
INIT_MMX
call load_diff_4x8_mmx
@@ -255,7 +255,7 @@ idct8_mmx:
; void x264_add8x8_idct8_mmx( uint8_t *dst, int16_t dct[8][8] )
;-----------------------------------------------------------------------------
cglobal x264_add8x8_idct8_mmx, 2,2
-global x264_add8x8_idct8_mmx %+ .skip_prologue
+global x264_add8x8_idct8_mmx.skip_prologue
.skip_prologue:
INIT_MMX
add word [r1], 32
@@ -348,7 +348,7 @@ INIT_XMM
; void x264_sub8x8_dct8_sse2( int16_t dct[8][8], uint8_t *pix1, uint8_t *pix2 )
;-----------------------------------------------------------------------------
cglobal x264_sub8x8_dct8_sse2, 3,3
-global x264_sub8x8_dct8_sse2 %+ .skip_prologue
+global x264_sub8x8_dct8_sse2.skip_prologue
.skip_prologue:
LOAD_DIFF m0, m7, none, [r1+0*FENC_STRIDE], [r2+0*FDEC_STRIDE]
LOAD_DIFF m1, m7, none, [r1+1*FENC_STRIDE], [r2+1*FDEC_STRIDE]
@@ -372,7 +372,7 @@ global x264_sub8x8_dct8_sse2 %+ .skip_prologue
; void x264_add8x8_idct8_sse2( uint8_t *p_dst, int16_t dct[8][8] )
;-----------------------------------------------------------------------------
cglobal x264_add8x8_idct8_sse2, 2,2
-global x264_add8x8_idct8_sse2 %+ .skip_prologue
+global x264_add8x8_idct8_sse2.skip_prologue
.skip_prologue:
UNSPILL r1, 1,2,3,5,6,7
IDCT8_1D 0,1,2,3,4,5,6,7,r1
diff --git a/common/x86/dct-a.asm b/common/x86/dct-a.asm
index 012e25a..2f60e02 100644
--- a/common/x86/dct-a.asm
+++ b/common/x86/dct-a.asm
@@ -223,21 +223,21 @@ cglobal %1, 2,2
%endmacro
%ifndef ARCH_X86_64
-SUB_NxN_DCT x264_sub8x8_dct_mmx, x264_sub4x4_dct_mmx %+ .skip_prologue, 32, 4, 0, 0
-ADD_NxN_IDCT x264_add8x8_idct_mmx, x264_add4x4_idct_mmx %+ .skip_prologue, 32, 4, 0, 0
-SUB_NxN_DCT x264_sub16x16_dct_mmx, x264_sub8x8_dct_mmx %+ .skip_prologue, 32, 8, 4, 4
-ADD_NxN_IDCT x264_add16x16_idct_mmx, x264_add8x8_idct_mmx %+ .skip_prologue, 32, 8, 4, 4
+SUB_NxN_DCT x264_sub8x8_dct_mmx, x264_sub4x4_dct_mmx.skip_prologue, 32, 4, 0, 0
+ADD_NxN_IDCT x264_add8x8_idct_mmx, x264_add4x4_idct_mmx.skip_prologue, 32, 4, 0, 0
+SUB_NxN_DCT x264_sub16x16_dct_mmx, x264_sub8x8_dct_mmx.skip_prologue, 32, 8, 4, 4
+ADD_NxN_IDCT x264_add16x16_idct_mmx, x264_add8x8_idct_mmx.skip_prologue, 32, 8, 4, 4
cextern x264_sub8x8_dct8_mmx.skip_prologue
cextern x264_add8x8_idct8_mmx.skip_prologue
-SUB_NxN_DCT x264_sub16x16_dct8_mmx, x264_sub8x8_dct8_mmx %+ .skip_prologue, 128, 8, 0, 0
-ADD_NxN_IDCT x264_add16x16_idct8_mmx, x264_add8x8_idct8_mmx %+ .skip_prologue, 128, 8, 0, 0
+SUB_NxN_DCT x264_sub16x16_dct8_mmx, x264_sub8x8_dct8_mmx.skip_prologue, 128, 8, 0, 0
+ADD_NxN_IDCT x264_add16x16_idct8_mmx, x264_add8x8_idct8_mmx.skip_prologue, 128, 8, 0, 0
%define x264_sub8x8_dct8_sse2 x264_sub8x8_dct8_sse2.skip_prologue
%define x264_add8x8_idct8_sse2 x264_add8x8_idct8_sse2.skip_prologue
%endif
-SUB_NxN_DCT x264_sub16x16_dct_sse2, x264_sub8x8_dct_sse2 %+ .skip_prologue, 64, 8, 0, 4
-ADD_NxN_IDCT x264_add16x16_idct_sse2, x264_add8x8_idct_sse2 %+ .skip_prologue, 64, 8, 0, 4
+SUB_NxN_DCT x264_sub16x16_dct_sse2, x264_sub8x8_dct_sse2.skip_prologue, 64, 8, 0, 4
+ADD_NxN_IDCT x264_add16x16_idct_sse2, x264_add8x8_idct_sse2.skip_prologue, 64, 8, 0, 4
cextern x264_sub8x8_dct8_sse2
cextern x264_add8x8_idct8_sse2
diff --git a/common/x86/mc-a.asm b/common/x86/mc-a.asm
index 0580f5d..b43d5d1 100644
--- a/common/x86/mc-a.asm
+++ b/common/x86/mc-a.asm
@@ -723,7 +723,7 @@ cglobal x264_prefetch_ref_mmxext, 3,3
cglobal x264_mc_chroma_%1, 0,6
%if mmsize == 16
cmp dword r6m, 4
- jle x264_mc_chroma_mmxext %+ .skip_prologue
+ jle x264_mc_chroma_mmxext.skip_prologue
%endif
.skip_prologue:
MC_CHROMA_START
diff --git a/common/x86/x86inc.asm b/common/x86/x86inc.asm
index 9a4a92b..33a2f09 100644
--- a/common/x86/x86inc.asm
+++ b/common/x86/x86inc.asm
@@ -303,20 +303,14 @@ DECLARE_REG 6, ebp, ebp, bp, null, [esp + stack_offset + 28]
; Symbol prefix for C linkage
%macro cglobal 1-2+
+ %ifdef PREFIX
+ %xdefine %1 _%1
+ %xdefine %1.skip_prologue _%1.skip_prologue
+ %endif
%ifidn __OUTPUT_FORMAT__,elf
- %ifdef PREFIX
- global _%1:function hidden
- %define %1 _%1
- %else
- global %1:function hidden
- %endif
+ global %1:function hidden
%else
- %ifdef PREFIX
- global _%1
- %define %1 _%1
- %else
- global %1
- %endif
+ global %1
%endif
align function_align
%1:
@@ -328,11 +322,9 @@ DECLARE_REG 6, ebp, ebp, bp, null, [esp + stack_offset + 28]
%macro cextern 1
%ifdef PREFIX
- extern _%1
- %define %1 _%1
- %else
- extern %1
+ %xdefine %1 _%1
%endif
+ extern %1
%endmacro
; This is needed for ELF, otherwise the GNU linker assumes the stack is
More information about the x264-devel
mailing list