[x264-devel] Windows x64 support

Loren Merritt lorenm at u.washington.edu
Tue Jan 27 13:52:43 CET 2009


On Tue, 27 Jan 2009, BugMaster wrote:

> http://komisar.gin.by/x.patch/BugMaster/20090126/independent/x264_win64_support.03.r1089.diff

Fixed a crash in x264_pixel_avg_weight_*_ssse3 x86_32.
The rest of my changes are cosmetic, but you should still check whether 
they work.

Yes, `yasm -f win32` is the same as -f win64, and this way is more 
convenient for ffmpeg's configure, I think.

--Loren Merritt
-------------- next part --------------
diff --git a/common/x86/cpu-a.asm b/common/x86/cpu-a.asm
index f8f22bc..0929cf5 100644
--- a/common/x86/cpu-a.asm
+++ b/common/x86/cpu-a.asm
@@ -26,39 +26,22 @@
 
 SECTION .text
 
-%ifdef WIN64
+%ifdef ARCH_X86_64
 
 ;-----------------------------------------------------------------------------
 ; int x264_cpu_cpuid( int op, int *eax, int *ebx, int *ecx, int *edx )
 ;-----------------------------------------------------------------------------
 cglobal x264_cpu_cpuid
     push    rbx
-    mov     r10,   rdx
-    mov     r11,   [rsp+48]
-    mov     eax,   ecx
-    cpuid
-    mov     [r10], eax
-    mov     [r8],  ebx
-    mov     [r9],  ecx
-    mov     [r11], edx
-    pop     rbx
-    ret
-
-%elifdef ARCH_X86_64
-
-;-----------------------------------------------------------------------------
-; int x264_cpu_cpuid( int op, int *eax, int *ebx, int *ecx, int *edx )
-;-----------------------------------------------------------------------------
-cglobal x264_cpu_cpuid
-    push    rbx
-    mov     r10,   r3
-    mov     r11,   r2
-    mov     r9,    r1
+    mov     r11,   r1
+    mov     r10,   r2
+    mov     r9,    r3
+    mov     r8,    r4
     mov     eax,   r0d
     cpuid
-    mov     [r9],  eax
-    mov     [r11], ebx
-    mov     [r10], ecx
+    mov     [r11], eax
+    mov     [r10], ebx
+    mov     [r9],  ecx
     mov     [r8],  edx
     pop     rbx
     ret
diff --git a/common/x86/mc-a.asm b/common/x86/mc-a.asm
index 6a58700..1e70c64 100644
--- a/common/x86/mc-a.asm
+++ b/common/x86/mc-a.asm
@@ -73,30 +73,30 @@ SECTION .text
 %macro BIWEIGHT_MMX 2
     movh      m0, %1
     movh      m1, %2
-    punpcklbw m0, m7
-    punpcklbw m1, m7
-    pmullw    m0, m4
-    pmullw    m1, m5
+    punpcklbw m0, m5
+    punpcklbw m1, m5
+    pmullw    m0, m2
+    pmullw    m1, m3
     paddw     m0, m1
-    paddw     m0, m6
+    paddw     m0, m4
     psraw     m0, 6
 %endmacro
 
 %macro BIWEIGHT_START_MMX 0
-    movd    m4, r6m
-    SPLATW  m4, m4   ; weight_dst
-    mova    m5, [pw_64 GLOBAL]
-    psubw   m5, m4   ; weight_src
-    mova    m6, [pw_32 GLOBAL] ; rounding
-    pxor    m7, m7
+    movd    m2, r6m
+    SPLATW  m2, m2   ; weight_dst
+    mova    m3, [pw_64 GLOBAL]
+    psubw   m3, m2   ; weight_src
+    mova    m4, [pw_32 GLOBAL] ; rounding
+    pxor    m5, m5
 %endmacro
 
 %macro BIWEIGHT_SSSE3 2
     movh      m0, %1
     movh      m1, %2
     punpcklbw m0, m1
-    pmaddubsw m0, m5
-    paddw     m0, m6
+    pmaddubsw m0, m3
+    paddw     m0, m4
     psraw     m0, 6
 %endmacro
 
@@ -106,9 +106,9 @@ SECTION .text
     sub    t7d, t6d
     shl    t7d, 8
     add    t6d, t7d
-    movd    m5, t6d
-    mova    m6, [pw_32 GLOBAL]
-    SPLATW  m5, m5   ; weight_dst,src
+    movd    m3, t6d
+    mova    m4, [pw_32 GLOBAL]
+    SPLATW  m3, m3   ; weight_dst,src
 %endmacro
 
 %macro BIWEIGHT_ROW 4
@@ -117,10 +117,10 @@ SECTION .text
     packuswb   m0, m0
     movh     [%1], m0
 %else
-    SWAP 0, 2
+    SWAP 0, 6
     BIWEIGHT [%2+mmsize/2], [%3+mmsize/2]
-    packuswb   m2, m0
-    mova     [%1], m2
+    packuswb   m6, m0
+    mova     [%1], m6
 %endif
 %endmacro
 
@@ -129,16 +129,16 @@ SECTION .text
 ;-----------------------------------------------------------------------------
 %macro AVG_WEIGHT 3
 cglobal x264_pixel_avg_weight_w%2_%1
-    AVG_START %3
     BIWEIGHT_START
+    AVG_START %3
 .height_loop:
 %if %2==8 && mmsize==16
     BIWEIGHT [t2], [t4]
-    SWAP 0, 2
+    SWAP 0, 6
     BIWEIGHT [t2+t3], [t4+t5]
-    packuswb m2, m0
-    movlps   [t0], m2
-    movhps   [t0+t1], m2
+    packuswb m6, m0
+    movlps   [t0], m6
+    movhps   [t0+t1], m6
 %else
 %assign x 0
 %rep 1+%2/(mmsize*2)
@@ -163,15 +163,15 @@ AVG_WEIGHT mmxext, 8,  0
 AVG_WEIGHT mmxext, 16, 0
 INIT_XMM
 %define x264_pixel_avg_weight_w4_sse2 x264_pixel_avg_weight_w4_mmxext
-AVG_WEIGHT sse2, 8,  8
-AVG_WEIGHT sse2, 16, 8
+AVG_WEIGHT sse2, 8,  7
+AVG_WEIGHT sse2, 16, 7
 %define BIWEIGHT BIWEIGHT_SSSE3
 %define BIWEIGHT_START BIWEIGHT_START_SSSE3
 INIT_MMX
 AVG_WEIGHT ssse3, 4,  0
 INIT_XMM
-AVG_WEIGHT ssse3, 8,  8
-AVG_WEIGHT ssse3, 16, 8
+AVG_WEIGHT ssse3, 8,  7
+AVG_WEIGHT ssse3, 16, 7
 
 
 
diff --git a/common/x86/pixel-a.asm b/common/x86/pixel-a.asm
index 4426e33..3c29c31 100644
--- a/common/x86/pixel-a.asm
+++ b/common/x86/pixel-a.asm
@@ -1635,13 +1635,10 @@ cglobal x264_pixel_ssim_4x4x2_core_sse2, 4,4,8
     punpckldq m3, m4
     punpckhdq m5, m4
 
-%ifdef WIN64
-    %define t0 rax
-    mov t0, r4mp
-%elifdef ARCH_X86_64
+%ifdef UNIX64
     %define t0 r4
 %else
-    %define t0 eax
+    %define t0 rax
     mov t0, r4mp
 %endif
 
diff --git a/common/x86/quant-a.asm b/common/x86/quant-a.asm
index cececbe..7451a31 100644
--- a/common/x86/quant-a.asm
+++ b/common/x86/quant-a.asm
@@ -271,7 +271,7 @@ QUANT_DC x264_quant_2x2_dc_ssse3, 1
 ;-----------------------------------------------------------------------------
 %macro DEQUANT 4
 cglobal x264_dequant_%2x%2_%1, 0,3
-x264_dequant_%2x%2_%1.skip_prologue:
+.skip_prologue:
     DEQUANT_START %3+2, %3
 
 .lshift:
diff --git a/common/x86/sad-a.asm b/common/x86/sad-a.asm
index 6b850ff..dc8b985 100644
--- a/common/x86/sad-a.asm
+++ b/common/x86/sad-a.asm
@@ -538,12 +538,7 @@ INTRA_SAD16 ssse3, 8
 %endmacro
 
 %macro SAD_X3_END 0
-%ifdef WIN64
-    mov     r0, r5mp
-    movd    [r0+0], mm0
-    movd    [r0+4], mm1
-    movd    [r0+8], mm2
-%elifdef ARCH_X86_64
+%ifdef UNIX64
     movd    [r5+0], mm0
     movd    [r5+4], mm1
     movd    [r5+8], mm2
@@ -572,11 +567,8 @@ INTRA_SAD16 ssse3, 8
 %macro SAD_X 3
 cglobal x264_pixel_sad_x%1_%2x%3_mmxext, %1+2, %1+2
 %ifdef WIN64
-    %if %1 == 3
-        movsxd r4, r4d
-    %elif %1 == 4
-        movsxd r5, r5d
-    %endif
+    %assign i %1+1
+    movsxd r %+ i, r %+ i %+ d
 %endif
     SAD_X%1_2x%2P 1
 %rep %3/2-1
@@ -815,12 +807,7 @@ SAD_X 4,  4,  4
     paddw   xmm0, xmm4
     paddw   xmm1, xmm5
     paddw   xmm2, xmm6
-%ifdef WIN64
-    mov      r0, r5mp
-    movd [r0+0], xmm0
-    movd [r0+4], xmm1
-    movd [r0+8], xmm2
-%elifdef ARCH_X86_64
+%ifdef UNIX64
     movd [r5+0], xmm0
     movd [r5+4], xmm1
     movd [r5+8], xmm2
@@ -929,11 +916,8 @@ SAD_X 4,  4,  4
 %macro SAD_X_SSE2 4
 cglobal x264_pixel_sad_x%1_%2x%3_%4, 2+%1,2+%1,9
 %ifdef WIN64
-    %if %1 == 3
-        movsxd r4, r4d
-    %elif %1 == 4
-        movsxd r5, r5d
-    %endif
+    %assign i %1+1
+    movsxd r %+ i, r %+ i %+ d
 %endif
     SAD_X%1_2x%2P_SSE2 1
 %rep %3/2-1
@@ -945,11 +929,8 @@ cglobal x264_pixel_sad_x%1_%2x%3_%4, 2+%1,2+%1,9
 %macro SAD_X_SSE2_MISALIGN 4
 cglobal x264_pixel_sad_x%1_%2x%3_%4_misalign, 2+%1,2+%1,9
 %ifdef WIN64
-    %if %1 == 3
-        movsxd r4, r4d
-    %elif %1 == 4
-        movsxd r5, r5d
-    %endif
+    %assign i %1+1
+    movsxd r %+ i, r %+ i %+ d
 %endif
     SAD_X%1_2x%2P_SSE2_MISALIGN 1
 %rep %3/2-1
diff --git a/common/x86/x86inc.asm b/common/x86/x86inc.asm
index bd4d154..e420532 100644
--- a/common/x86/x86inc.asm
+++ b/common/x86/x86inc.asm
@@ -19,8 +19,10 @@
 ;*****************************************************************************
 
 %ifdef ARCH_X86_64
-    %ifidn __OUTPUT_FORMAT__,win64
+    %ifidn __OUTPUT_FORMAT__,win32
         %define WIN64
+    %else
+        %define UNIX64
     %endif
 %endif
 
diff --git a/configure b/configure
index 7f4daa1..897ad78 100755
--- a/configure
+++ b/configure
@@ -254,7 +254,7 @@ case $host_cpu in
       CFLAGS="$CFLAGS -arch x86_64"
       LDFLAGS="$LDFLAGS -arch x86_64"
     elif [ "$SYS" = MINGW ]; then
-      ASFLAGS="-f win64 -m amd64 -DPREFIX"
+      ASFLAGS="-f win32 -m amd64 -DPREFIX"
     else
       ASFLAGS="-f elf -m amd64"
     fi


More information about the x264-devel mailing list