[x264-devel] Windows x64 support
Loren Merritt
lorenm at u.washington.edu
Tue Jan 27 13:52:43 CET 2009
On Tue, 27 Jan 2009, BugMaster wrote:
> http://komisar.gin.by/x.patch/BugMaster/20090126/independent/x264_win64_support.03.r1089.diff
Fixed a crash in x264_pixel_avg_weight_*_ssse3 x86_32.
The rest of my changes are cosmetic, but you should still check whether
they work.
Yes, `yasm -f win32` is the same as -f win64, and this way is more
convenient for ffmpeg's configure, I think.
--Loren Merritt
-------------- next part --------------
diff --git a/common/x86/cpu-a.asm b/common/x86/cpu-a.asm
index f8f22bc..0929cf5 100644
--- a/common/x86/cpu-a.asm
+++ b/common/x86/cpu-a.asm
@@ -26,39 +26,22 @@
SECTION .text
-%ifdef WIN64
+%ifdef ARCH_X86_64
;-----------------------------------------------------------------------------
; int x264_cpu_cpuid( int op, int *eax, int *ebx, int *ecx, int *edx )
;-----------------------------------------------------------------------------
cglobal x264_cpu_cpuid
push rbx
- mov r10, rdx
- mov r11, [rsp+48]
- mov eax, ecx
- cpuid
- mov [r10], eax
- mov [r8], ebx
- mov [r9], ecx
- mov [r11], edx
- pop rbx
- ret
-
-%elifdef ARCH_X86_64
-
-;-----------------------------------------------------------------------------
-; int x264_cpu_cpuid( int op, int *eax, int *ebx, int *ecx, int *edx )
-;-----------------------------------------------------------------------------
-cglobal x264_cpu_cpuid
- push rbx
- mov r10, r3
- mov r11, r2
- mov r9, r1
+ mov r11, r1
+ mov r10, r2
+ mov r9, r3
+ mov r8, r4
mov eax, r0d
cpuid
- mov [r9], eax
- mov [r11], ebx
- mov [r10], ecx
+ mov [r11], eax
+ mov [r10], ebx
+ mov [r9], ecx
mov [r8], edx
pop rbx
ret
diff --git a/common/x86/mc-a.asm b/common/x86/mc-a.asm
index 6a58700..1e70c64 100644
--- a/common/x86/mc-a.asm
+++ b/common/x86/mc-a.asm
@@ -73,30 +73,30 @@ SECTION .text
%macro BIWEIGHT_MMX 2
movh m0, %1
movh m1, %2
- punpcklbw m0, m7
- punpcklbw m1, m7
- pmullw m0, m4
- pmullw m1, m5
+ punpcklbw m0, m5
+ punpcklbw m1, m5
+ pmullw m0, m2
+ pmullw m1, m3
paddw m0, m1
- paddw m0, m6
+ paddw m0, m4
psraw m0, 6
%endmacro
%macro BIWEIGHT_START_MMX 0
- movd m4, r6m
- SPLATW m4, m4 ; weight_dst
- mova m5, [pw_64 GLOBAL]
- psubw m5, m4 ; weight_src
- mova m6, [pw_32 GLOBAL] ; rounding
- pxor m7, m7
+ movd m2, r6m
+ SPLATW m2, m2 ; weight_dst
+ mova m3, [pw_64 GLOBAL]
+ psubw m3, m2 ; weight_src
+ mova m4, [pw_32 GLOBAL] ; rounding
+ pxor m5, m5
%endmacro
%macro BIWEIGHT_SSSE3 2
movh m0, %1
movh m1, %2
punpcklbw m0, m1
- pmaddubsw m0, m5
- paddw m0, m6
+ pmaddubsw m0, m3
+ paddw m0, m4
psraw m0, 6
%endmacro
@@ -106,9 +106,9 @@ SECTION .text
sub t7d, t6d
shl t7d, 8
add t6d, t7d
- movd m5, t6d
- mova m6, [pw_32 GLOBAL]
- SPLATW m5, m5 ; weight_dst,src
+ movd m3, t6d
+ mova m4, [pw_32 GLOBAL]
+ SPLATW m3, m3 ; weight_dst,src
%endmacro
%macro BIWEIGHT_ROW 4
@@ -117,10 +117,10 @@ SECTION .text
packuswb m0, m0
movh [%1], m0
%else
- SWAP 0, 2
+ SWAP 0, 6
BIWEIGHT [%2+mmsize/2], [%3+mmsize/2]
- packuswb m2, m0
- mova [%1], m2
+ packuswb m6, m0
+ mova [%1], m6
%endif
%endmacro
@@ -129,16 +129,16 @@ SECTION .text
;-----------------------------------------------------------------------------
%macro AVG_WEIGHT 3
cglobal x264_pixel_avg_weight_w%2_%1
- AVG_START %3
BIWEIGHT_START
+ AVG_START %3
.height_loop:
%if %2==8 && mmsize==16
BIWEIGHT [t2], [t4]
- SWAP 0, 2
+ SWAP 0, 6
BIWEIGHT [t2+t3], [t4+t5]
- packuswb m2, m0
- movlps [t0], m2
- movhps [t0+t1], m2
+ packuswb m6, m0
+ movlps [t0], m6
+ movhps [t0+t1], m6
%else
%assign x 0
%rep 1+%2/(mmsize*2)
@@ -163,15 +163,15 @@ AVG_WEIGHT mmxext, 8, 0
AVG_WEIGHT mmxext, 16, 0
INIT_XMM
%define x264_pixel_avg_weight_w4_sse2 x264_pixel_avg_weight_w4_mmxext
-AVG_WEIGHT sse2, 8, 8
-AVG_WEIGHT sse2, 16, 8
+AVG_WEIGHT sse2, 8, 7
+AVG_WEIGHT sse2, 16, 7
%define BIWEIGHT BIWEIGHT_SSSE3
%define BIWEIGHT_START BIWEIGHT_START_SSSE3
INIT_MMX
AVG_WEIGHT ssse3, 4, 0
INIT_XMM
-AVG_WEIGHT ssse3, 8, 8
-AVG_WEIGHT ssse3, 16, 8
+AVG_WEIGHT ssse3, 8, 7
+AVG_WEIGHT ssse3, 16, 7
diff --git a/common/x86/pixel-a.asm b/common/x86/pixel-a.asm
index 4426e33..3c29c31 100644
--- a/common/x86/pixel-a.asm
+++ b/common/x86/pixel-a.asm
@@ -1635,13 +1635,10 @@ cglobal x264_pixel_ssim_4x4x2_core_sse2, 4,4,8
punpckldq m3, m4
punpckhdq m5, m4
-%ifdef WIN64
- %define t0 rax
- mov t0, r4mp
-%elifdef ARCH_X86_64
+%ifdef UNIX64
%define t0 r4
%else
- %define t0 eax
+ %define t0 rax
mov t0, r4mp
%endif
diff --git a/common/x86/quant-a.asm b/common/x86/quant-a.asm
index cececbe..7451a31 100644
--- a/common/x86/quant-a.asm
+++ b/common/x86/quant-a.asm
@@ -271,7 +271,7 @@ QUANT_DC x264_quant_2x2_dc_ssse3, 1
;-----------------------------------------------------------------------------
%macro DEQUANT 4
cglobal x264_dequant_%2x%2_%1, 0,3
-x264_dequant_%2x%2_%1.skip_prologue:
+.skip_prologue:
DEQUANT_START %3+2, %3
.lshift:
diff --git a/common/x86/sad-a.asm b/common/x86/sad-a.asm
index 6b850ff..dc8b985 100644
--- a/common/x86/sad-a.asm
+++ b/common/x86/sad-a.asm
@@ -538,12 +538,7 @@ INTRA_SAD16 ssse3, 8
%endmacro
%macro SAD_X3_END 0
-%ifdef WIN64
- mov r0, r5mp
- movd [r0+0], mm0
- movd [r0+4], mm1
- movd [r0+8], mm2
-%elifdef ARCH_X86_64
+%ifdef UNIX64
movd [r5+0], mm0
movd [r5+4], mm1
movd [r5+8], mm2
@@ -572,11 +567,8 @@ INTRA_SAD16 ssse3, 8
%macro SAD_X 3
cglobal x264_pixel_sad_x%1_%2x%3_mmxext, %1+2, %1+2
%ifdef WIN64
- %if %1 == 3
- movsxd r4, r4d
- %elif %1 == 4
- movsxd r5, r5d
- %endif
+ %assign i %1+1
+ movsxd r %+ i, r %+ i %+ d
%endif
SAD_X%1_2x%2P 1
%rep %3/2-1
@@ -815,12 +807,7 @@ SAD_X 4, 4, 4
paddw xmm0, xmm4
paddw xmm1, xmm5
paddw xmm2, xmm6
-%ifdef WIN64
- mov r0, r5mp
- movd [r0+0], xmm0
- movd [r0+4], xmm1
- movd [r0+8], xmm2
-%elifdef ARCH_X86_64
+%ifdef UNIX64
movd [r5+0], xmm0
movd [r5+4], xmm1
movd [r5+8], xmm2
@@ -929,11 +916,8 @@ SAD_X 4, 4, 4
%macro SAD_X_SSE2 4
cglobal x264_pixel_sad_x%1_%2x%3_%4, 2+%1,2+%1,9
%ifdef WIN64
- %if %1 == 3
- movsxd r4, r4d
- %elif %1 == 4
- movsxd r5, r5d
- %endif
+ %assign i %1+1
+ movsxd r %+ i, r %+ i %+ d
%endif
SAD_X%1_2x%2P_SSE2 1
%rep %3/2-1
@@ -945,11 +929,8 @@ cglobal x264_pixel_sad_x%1_%2x%3_%4, 2+%1,2+%1,9
%macro SAD_X_SSE2_MISALIGN 4
cglobal x264_pixel_sad_x%1_%2x%3_%4_misalign, 2+%1,2+%1,9
%ifdef WIN64
- %if %1 == 3
- movsxd r4, r4d
- %elif %1 == 4
- movsxd r5, r5d
- %endif
+ %assign i %1+1
+ movsxd r %+ i, r %+ i %+ d
%endif
SAD_X%1_2x%2P_SSE2_MISALIGN 1
%rep %3/2-1
diff --git a/common/x86/x86inc.asm b/common/x86/x86inc.asm
index bd4d154..e420532 100644
--- a/common/x86/x86inc.asm
+++ b/common/x86/x86inc.asm
@@ -19,8 +19,10 @@
;*****************************************************************************
%ifdef ARCH_X86_64
- %ifidn __OUTPUT_FORMAT__,win64
+ %ifidn __OUTPUT_FORMAT__,win32
%define WIN64
+ %else
+ %define UNIX64
%endif
%endif
diff --git a/configure b/configure
index 7f4daa1..897ad78 100755
--- a/configure
+++ b/configure
@@ -254,7 +254,7 @@ case $host_cpu in
CFLAGS="$CFLAGS -arch x86_64"
LDFLAGS="$LDFLAGS -arch x86_64"
elif [ "$SYS" = MINGW ]; then
- ASFLAGS="-f win64 -m amd64 -DPREFIX"
+ ASFLAGS="-f win32 -m amd64 -DPREFIX"
else
ASFLAGS="-f elf -m amd64"
fi
More information about the x264-devel
mailing list