[x264-devel] Missing emms in x264 (+ workaround patch)
Andrew Church
achurch at achurch.org
Mon Oct 9 03:07:12 CEST 2006
After spending several hours trying to track down mysterious data
corruption while working on an x264 encoding module for transcode, I've
discovered that there's at least one case when an EMMS instruction is not
executed by x264 (SVN as of ~7 hours ago) after calling MMX/SSE assembly
routines. I don't have the time to track down exactly which routine it is;
hopefully someone here that has more experience with the code can take care
of that. In the meantime, the patch below (which just tacks an EMMS onto
the end of every assembly routine) seems to have resolved the problem.
For the record, here are the symptoms (in all cases using the default
x264 parameter set, except in the "bframes=2" example):
### Works fine (image size = 1048576 pixels):
$ transcode -i source -c 0-30 -o test.avi -y x264,null,avi -Z 1024x1024
### Corruption in lower right corner of image for P-frames (image size =
### 1048580 pixels):
$ transcode -i source -c 0-30 -o test.avi -y x264,null,avi -Z 1090x962
### Corruption throughout lower half of image for P-frames:
$ transcode -i source -c 0-30 -o test.avi -y x264,null,avi -Z 1440x1080
### Corruption throughout lower half of image for P- and B-frames, and the
### progress counter's frames-per-second calculation (floating point) gives
### a NaN:
$ transcode -i source -c 0-30 -o test.avi -y x264=bframes=2,null,avi \
-Z 1440x1080
--Andrew Church
achurch at achurch.org
http://achurch.org/
---------------------------------------------------------------------------
diff -urN ../x264-svn-20061008.2019-orig/common/i386/dct-a.asm common/i386/dct-a.asm
--- ../x264-svn-20061008.2019-orig/common/i386/dct-a.asm 2006-10-08 20:19:42 +0900
+++ common/i386/dct-a.asm 2006-10-09 02:51:40 +0900
@@ -182,6 +182,7 @@
psraw mm4, 1
movq [eax+24], mm4
picpop ebx
+ emms
ret
cglobal x264_idct4x4dc_mmx
@@ -209,6 +210,7 @@
movq [eax+ 8], mm2
movq [eax+16], mm3
movq [eax+24], mm4
+ emms
ret
cglobal x264_sub4x4_dct_mmx
@@ -248,6 +250,7 @@
movq [eax+16], mm3
movq [eax+24], mm0
+ emms
ret
cglobal x264_add4x4_idct_mmx
@@ -291,6 +294,7 @@
MMX_STORE_DIFF_4P mm3, mm0, mm6, mm7, [eax+3*FDEC_STRIDE]
picpop ebx
+ emms
ret
@@ -343,6 +347,7 @@
%assign disp disp+16
%endrep
+ emms
ret
ALIGN 16
@@ -428,6 +433,7 @@
%assign disp disp+8
%endrep
+ emms
ret
ALIGN 16
@@ -522,6 +528,7 @@
%assign disp disp+8
%endrep
+ emms
ret
ALIGN 16
@@ -551,6 +558,7 @@
add eax, FDEC_STRIDE
%assign disp disp+16
%endrep
+ emms
ret
ALIGN 16
@@ -600,6 +608,7 @@
movq [eax+ 40], mm4
movq [eax+ 56], mm6
+ emms
ret
;-----------------------------------------------------------------------------
@@ -729,4 +738,5 @@
movq [ecx+56], mm7
movq [ecx+12], mm1
movd [ecx+ 8], mm2
+ emms
ret
diff -urN ../x264-svn-20061008.2019-orig/common/i386/deblock-a.asm common/i386/deblock-a.asm
--- ../x264-svn-20061008.2019-orig/common/i386/deblock-a.asm 2006-10-08 20:19:42 +0900
+++ common/i386/deblock-a.asm 2006-10-09 02:51:22 +0900
@@ -311,6 +311,7 @@
pop esi
pop edi
picpop ebx
+ emms
ret
@@ -371,6 +372,7 @@
add esp, 96
pop ebp
pop ebx
+ emms
ret
@@ -409,6 +411,7 @@
%macro CHROMA_END 0
pop esi
pop edi
+ emms
ret
%endmacro
diff -urN ../x264-svn-20061008.2019-orig/common/i386/mc-a.asm common/i386/mc-a.asm
--- ../x264-svn-20061008.2019-orig/common/i386/mc-a.asm 2006-10-08 20:19:42 +0900
+++ common/i386/mc-a.asm 2006-10-09 02:51:14 +0900
@@ -115,6 +115,7 @@
pop esi
pop ebx
pop ebp
+ emms
ret
@@ -154,6 +155,7 @@
pop esi
pop ebx
pop ebp
+ emms
ret
@@ -196,6 +198,7 @@
pop esi
pop ebx
pop ebp
+ emms
ret
ALIGN 16
@@ -234,6 +237,7 @@
pop esi
pop ebx
pop ebp
+ emms
ret
@@ -278,6 +282,7 @@
picpop ebx
pop esi
pop edi
+ emms
ret
%endmacro
@@ -373,6 +378,7 @@
pop edi
pop esi
pop ebx
+ emms
ret
ALIGN 16
@@ -411,6 +417,7 @@
pop edi
pop esi
pop ebx
+ emms
ret
ALIGN 16
@@ -457,6 +464,7 @@
pop edi
pop esi
pop ebx
+ emms
ret
@@ -490,6 +498,7 @@
pop edi
pop esi
pop ebx
+ emms
ret
@@ -594,4 +603,5 @@
.finish
pop edi
picpop ebx
+ emms
ret
diff -urN ../x264-svn-20061008.2019-orig/common/i386/mc-a2.asm common/i386/mc-a2.asm
--- ../x264-svn-20061008.2019-orig/common/i386/mc-a2.asm 2006-10-08 20:19:42 +0900
+++ common/i386/mc-a2.asm 2006-10-09 02:50:57 +0900
@@ -267,6 +267,7 @@
pop esi
pop ebx
pop ebp
+ emms
ret
@@ -328,5 +329,6 @@
pop ebx
pop esi
pop edi
+ emms
ret
diff -urN ../x264-svn-20061008.2019-orig/common/i386/pixel-a.asm common/i386/pixel-a.asm
--- ../x264-svn-20061008.2019-orig/common/i386/pixel-a.asm 2006-10-08 20:19:42 +0900
+++ common/i386/pixel-a.asm 2006-10-09 02:50:52 +0900
@@ -250,6 +250,7 @@
movd [eax+8], mm2
pop esi
pop edi
+ emms
ret
%endmacro
@@ -262,6 +263,7 @@
pop ebx
pop esi
pop edi
+ emms
ret
%endmacro
@@ -510,6 +512,7 @@
movd eax, mm0
pop ebx
+ emms
ret
%endmacro
@@ -580,6 +583,7 @@
jl .continue
pop ebx
mov eax, 0xffff
+ emms
ret
ALIGN 4
.continue:
@@ -616,6 +620,7 @@
movd eax, mm0
pop ebx
+ emms
ret
%endmacro
@@ -659,6 +664,7 @@
movd eax, mm0
and eax, 0xffff
pop ebx
+ emms
ret
%endmacro
@@ -826,6 +832,7 @@
paddd mm0, mm1
movd eax, mm0
pop ebx
+ emms
ret
@@ -947,6 +954,7 @@
shr eax, 1
add esp, 0x70
pop ebx
+ emms
ret
%undef args
%undef spill
@@ -988,6 +996,7 @@
pop ebp
pop edi
pop esi
+ emms
ret
@@ -1117,6 +1126,7 @@
pop esi
pop edi
pop ebx
+ emms
ret
ALIGN 16
@@ -1248,6 +1258,7 @@
pop edi
pop ebp
pop ebx
+ emms
ret
ALIGN 16
@@ -1393,6 +1404,7 @@
pop edi
pop ebp
pop ebx
+ emms
ret
%macro LOAD_4x8P 1 ; dx
@@ -1570,6 +1582,7 @@
mov [eax+8], ecx ; i8x8_dc satd
add esp, 0x70
+ emms
ret
%undef args
%undef spill
@@ -1683,6 +1696,7 @@
sub edx, 4
jg .loop
pop ebx
+ emms
ret
ALIGN 16
@@ -1711,6 +1725,7 @@
sub edx, 4
jg .loop
pop ebx
+ emms
ret
ALIGN 16
@@ -1734,4 +1749,5 @@
sub edx, 8
jg .loop
nop
+ emms
ret
diff -urN ../x264-svn-20061008.2019-orig/common/i386/pixel-sse2.asm common/i386/pixel-sse2.asm
--- ../x264-svn-20061008.2019-orig/common/i386/pixel-sse2.asm 2006-10-08 20:19:42 +0900
+++ common/i386/pixel-sse2.asm 2006-10-09 02:50:26 +0900
@@ -103,6 +103,7 @@
movd eax, xmm0
pop ebx
+ emms
ret
%endmacro
@@ -294,6 +295,7 @@
movd [eax+8], xmm2
pop esi
pop edi
+ emms
ret
%endmacro
@@ -314,6 +316,7 @@
pop ebx
pop esi
pop edi
+ emms
ret
%endmacro
@@ -397,6 +400,7 @@
movd eax, xmm0
pop ebx
+ emms
ret
%endmacro
@@ -540,6 +544,7 @@
HADDW xmm6, xmm7
movd eax, xmm6
pop ebx
+ emms
ret
%endmacro
@@ -682,6 +687,7 @@
movq [eax+16], xmm1
movq [eax+24], xmm5
pop ebx
+ emms
ret
;-----------------------------------------------------------------------------
@@ -746,5 +752,6 @@
movd [picesp+4], xmm0
fld dword [picesp+4]
picpop ebx
+ emms
ret
diff -urN ../x264-svn-20061008.2019-orig/common/i386/predict-a.asm common/i386/predict-a.asm
--- ../x264-svn-20061008.2019-orig/common/i386/predict-a.asm 2006-10-08 20:19:42 +0900
+++ common/i386/predict-a.asm 2006-10-09 02:50:13 +0900
@@ -108,6 +108,7 @@
mov edx, [esp+4]
movq mm0, [eax+16]
STORE8x8 mm0, mm0
+ emms
ret
;-----------------------------------------------------------------------------
@@ -131,6 +132,7 @@
packuswb mm0, mm0
STORE8x8 mm0, mm0
picpop ebx
+ emms
ret
;-----------------------------------------------------------------------------
@@ -151,6 +153,7 @@
packuswb mm0, mm0
STORE8x8 mm0, mm0
picpop ebx
+ emms
ret
%endmacro
@@ -192,6 +195,7 @@
movq [edx + Y*FDEC_STRIDE], mm1
picpop ebx
+ emms
ret
;-----------------------------------------------------------------------------
@@ -229,6 +233,7 @@
movq [edx + Y*FDEC_STRIDE], mm0
picpop ebx
+ emms
ret
;-----------------------------------------------------------------------------
@@ -271,6 +276,7 @@
movq [edx + (Y+1)*FDEC_STRIDE], mm0
picpop ebx
+ emms
ret
;-----------------------------------------------------------------------------
@@ -282,6 +288,7 @@
mov edx, [esp + 4]
movq mm0, [edx - FDEC_STRIDE]
STORE8x8 mm0, mm0
+ emms
ret
;-----------------------------------------------------------------------------
@@ -321,6 +328,7 @@
STORE8x8 mm0, mm2
picpop ebx
+ emms
ret
;-----------------------------------------------------------------------------
@@ -361,6 +369,7 @@
nop
picpop ebx
+ emms
ret
;-----------------------------------------------------------------------------
@@ -416,6 +425,7 @@
nop
picpop ebx
+ emms
ret
;-----------------------------------------------------------------------------
@@ -453,6 +463,7 @@
SAVE_0_1 (edx + 2 * ecx) ; 14
SAVE_0_1 (edx + eax) ; 15
+ emms
ret
;-----------------------------------------------------------------------------
@@ -493,6 +504,7 @@
ALIGN 16
predict_16x16_dc_core_mmxext:
PRED16x16_DC [esp+8], 5, esp
+ emms
ret
ALIGN 16
@@ -501,5 +513,6 @@
picgetgot ebx
PRED16x16_DC [pw_8 GOT_ebx], 4, picesp
picpop ebx
+ emms
ret
diff -urN ../x264-svn-20061008.2019-orig/common/i386/quant-a.asm common/i386/quant-a.asm
--- ../x264-svn-20061008.2019-orig/common/i386/quant-a.asm 2006-10-08 20:19:42 +0900
+++ common/i386/quant-a.asm 2006-10-09 02:49:49 +0900
@@ -115,6 +115,7 @@
x264_quant_2x2_dc_core15_mmx:
MMX_QUANT15_DC_START
MMX_QUANT15_1x4 [eax], mm5, mm6, mm7
+ emms
ret
ALIGN 16
@@ -130,6 +131,7 @@
add eax, byte 8
%endrep
+ emms
ret
ALIGN 16
@@ -148,6 +150,7 @@
add eax, byte 8
%endrep
+ emms
ret
ALIGN 16
@@ -166,6 +169,7 @@
add eax, byte 8
%endrep
+ emms
ret
; ============================================================================
@@ -218,6 +222,7 @@
x264_quant_2x2_dc_core16_mmxext:
MMXEXT_QUANT16_DC_START
MMXEXT_QUANT16_1x4 [eax], mm5, mm6, mm7
+ emms
ret
ALIGN 16
@@ -233,6 +238,7 @@
add eax, byte 8
%endrep
+ emms
ret
ALIGN 16
@@ -252,6 +258,7 @@
add eax, byte 8
%endrep
+ emms
ret
ALIGN 16
@@ -271,6 +278,7 @@
add eax, byte 8
%endrep
+ emms
ret
@@ -329,6 +337,7 @@
x264_quant_2x2_dc_core32_mmxext:
MMX_QUANT32_DC_START
MMXEXT_QUANT32_1x4 [eax], mm5, mm5, mm6, mm7
+ emms
ret
ALIGN 16
@@ -344,6 +353,7 @@
add eax, byte 8
%endrep
+ emms
ret
ALIGN 16
@@ -360,6 +370,7 @@
add ecx, byte 16
%endrep
+ emms
ret
ALIGN 16
@@ -376,6 +387,7 @@
add ecx, byte 16
%endrep
+ emms
ret
@@ -474,6 +486,7 @@
jge .loopl16
nop
+ emms
ret
.rshift32:
@@ -496,6 +509,7 @@
jge .loopr32
nop
+ emms
ret
%endmacro
--
This is the x264-devel mailing-list
To unsubscribe, go to: http://developers.videolan.org/lists.html
More information about the x264-devel
mailing list