[x265] [PATCH 1 of 2] fix miss emms in mmx functions

Min Chen chenm003 at 163.com
Wed Jul 30 03:56:58 CEST 2014


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1406685333 25200
# Node ID 9fbda07104f6e7c03c608320a6b88ed877074e25
# Parent  a9678988dda2aea1f8d8babf05de7717896946f3
fix miss emms in mmx functions

diff -r a9678988dda2 -r 9fbda07104f6 source/common/x86/mc-a.asm
--- a/source/common/x86/mc-a.asm	Tue Jul 29 23:14:42 2014 +0530
+++ b/source/common/x86/mc-a.asm	Tue Jul 29 18:55:33 2014 -0700
@@ -1983,7 +1983,7 @@
     lea  t0, [t0+t1*2*SIZEOF_PIXEL]
     sub eax, 2
     jg .height_loop
- %ifidn movu,movq ; detect MMX
+ %if (mmsize == 8)
     EMMS
  %endif
     RET
@@ -2422,6 +2422,9 @@
     lea  r2, [r2+r3*2]
     sub r5d, 2
     jg .loop
+%if (mmsize == 8)
+    emms
+%endif
     RET
 %if cpuflag(ssse3) && HIGH_BIT_DEPTH == 0
 .fast:
@@ -2432,6 +2435,9 @@
     lea  r2, [r2+r3*2]
     sub r5d, 2
     jg .fastloop
+%if (mmsize == 8)
+    emms
+%endif
     RET
 %endif
 %endmacro
@@ -2517,6 +2523,9 @@
     lea  r2, [r2+r3*2]
     sub r5d, 2
     jg .loop
+%if (mmsize == 8)
+    emms
+%endif
     RET
 %endmacro
 
@@ -2851,6 +2860,9 @@
     lea     r0, [r0+r1*4]
     sub    r5d, 2
     jg .height_loop
+%if (mmsize == 8)
+    emms
+%endif
     RET
 %endmacro
 
@@ -2886,6 +2898,9 @@
     lea     r0, [r0+r1*4]
     sub    r5d, 2
     jg .height_loop
+%if (mmsize == 8)
+    emms
+%endif
     RET
 %endmacro
 
@@ -2926,6 +2941,9 @@
     lea     r0, [r0+r1*2*2]
     sub    r5d, 2
     jg .height_loop
+%if (mmsize == 8)
+    emms
+%endif
     RET
 
 cglobal pixel_avg2_w16_mmx2, 6,7
@@ -2960,6 +2978,9 @@
     lea     r0, [r0+r1*2*2]
     sub    r5d, 2
     jg .height_loop
+%if (mmsize == 8)
+    emms
+%endif
     RET
 
 cglobal pixel_avg2_w18_mmx2, 6,7
@@ -2984,6 +3005,9 @@
     lea     r0, [r0+r1*2]
     dec    r5d
     jg .height_loop
+%if (mmsize == 8)
+    emms
+%endif
     RET
 
 %macro PIXEL_AVG_W18 0
@@ -3012,6 +3036,9 @@
     lea     r0, [r0+r1*2]
     dec    r5d
     jg .height_loop
+%if (mmsize == 8)
+    emms
+%endif
     RET
 %endmacro
 
@@ -3043,6 +3070,9 @@
     lea    r0, [r0+r1*2]
     sub    r5d, 2
     jg     .height_loop
+%if (mmsize == 8)
+    emms
+%endif
     RET
 %endmacro
 
@@ -3071,6 +3101,9 @@
     lea    r0, [r0+r1*2]
     sub    r5d, 2
     jg     .height_loop
+%if (mmsize == 8)
+    emms
+%endif
     RET
 %endmacro
 
@@ -3103,6 +3136,7 @@
     lea    r0, [r0+r1*2]
     sub    r5d, 2
     jg     .height_loop
+    emms
     RET
 
 INIT_XMM
@@ -3146,6 +3180,7 @@
     lea    r0, [r0+r1*2]
     sub   r5d, 2
     jg .height_loop
+    emms
     RET
 
 INIT_YMM avx2
@@ -3222,6 +3257,7 @@
     add    r0, r1
     dec    r5d
     jg .height_loop
+    emms
     RET
 %endmacro
 
@@ -3411,6 +3447,7 @@
     lea     r0, [r0+r1*4]
 .end:
     COPY1   r4, r5
+    emms
     RET
 
 %macro MC_COPY 1
@@ -3426,6 +3463,9 @@
     lea     r0, [r0+r1*4]
     sub    r4d, 4
     jg .height_loop
+%if (mmsize == 8)
+    emms
+%endif
     RET
 %endif
 %endmacro
diff -r a9678988dda2 -r 9fbda07104f6 source/common/x86/mc-a2.asm
--- a/source/common/x86/mc-a2.asm	Tue Jul 29 23:14:42 2014 +0530
+++ b/source/common/x86/mc-a2.asm	Tue Jul 29 18:55:33 2014 -0700
@@ -338,6 +338,9 @@
     add    r4, r5
     dec dword r7m
     jg .loopy
+%if (mmsize == 8)
+    emms
+%endif
     RET
 
 ;-----------------------------------------------------------------------------
@@ -353,6 +356,9 @@
     lea    r1, [r1+r2*2]
     sub   r3d, 2
     jg .loop
+%if (mmsize == 8)
+    emms
+%endif
     RET
 
 ;-----------------------------------------------------------------------------
@@ -368,6 +374,9 @@
     lea    r1, [r1+r2*2]
     sub   r3d, 2
     jg .loop
+%if (mmsize == 8)
+    emms
+%endif
     RET
 %endmacro ; PLANE_DEINTERLEAVE
 
@@ -433,6 +442,9 @@
     sub  r2d, 4*mmsize
     jg .copy4
 .ret:
+%if (mmsize == 8)
+    emms
+%endif
     REP_RET
 %endmacro
 
diff -r a9678988dda2 -r 9fbda07104f6 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm	Tue Jul 29 23:14:42 2014 +0530
+++ b/source/common/x86/pixel-a.asm	Tue Jul 29 18:55:33 2014 -0700
@@ -320,6 +320,9 @@
     movd       eax, m0
     and        eax, 0xffff
 %endif ; HIGH_BIT_DEPTH
+%if (mmsize == 8)
+    emms
+%endif
     RET
 %endmacro
 
@@ -373,6 +376,9 @@
     movd   r4, m2
 %endrep
     movifnidn eax, r6d
+%if (mmsize == 8)
+    emms
+%endif
     RET
 %endmacro
 
@@ -393,6 +399,9 @@
     call pixel_satd_16x4_internal_mmx2
     HADDUW m0, m1
     movd  eax, m0
+%if (mmsize == 8)
+    emms
+%endif
     RET
 
 cglobal pixel_satd_16x8, 4,6
diff -r a9678988dda2 -r 9fbda07104f6 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm	Tue Jul 29 23:14:42 2014 +0530
+++ b/source/common/x86/pixel-util8.asm	Tue Jul 29 18:55:33 2014 -0700
@@ -4177,6 +4177,9 @@
     movd   eax, m5
     movd   edx, m6
 %endif
+%if (mmsize == 8)
+    emms
+%endif
     RET
 %endmacro
 
diff -r a9678988dda2 -r 9fbda07104f6 source/common/x86/sad-a.asm
--- a/source/common/x86/sad-a.asm	Tue Jul 29 23:14:42 2014 +0530
+++ b/source/common/x86/sad-a.asm	Tue Jul 29 18:55:33 2014 -0700
@@ -89,15 +89,19 @@
 ; int pixel_sad_16x16( uint8_t *, intptr_t, uint8_t *, intptr_t )
 ;-----------------------------------------------------------------------------
 %macro SAD 2
-cglobal pixel_sad_%1x%2_mmx2, 4,4
+cglobal pixel_sad_%1x%2, 4,4
     pxor    mm0, mm0
 %rep %2/2
     SAD_INC_2x%1P
 %endrep
     movd    eax, mm0
+%if (mmsize == 8)
+    emms
+%endif
     RET
 %endmacro
 
+INIT_MMX mmx2
 SAD 16, 16
 SAD 16,  8
 SAD  8, 16
@@ -106,7 +110,7 @@
 SAD  4, 16
 SAD  4,  8
 SAD  4,  4
-
+%undef SAD
 
 
 ;=============================================================================
@@ -117,6 +121,9 @@
     movhlps m1, m0
     paddw   m0, m1
     movd   eax, m0
+%if (cpuflags <= cpuflags_mmx2)
+    %error SSE2 macro use by MMX function!
+%endif
     RET
 %endmacro
 
@@ -833,9 +840,9 @@
     paddw   m0, m2
 %endmacro
 
-INIT_XMM
+INIT_XMM sse2
 ;Even on Nehalem, no sizes other than 8x16 benefit from this method.
-cglobal pixel_sad_8x16_sse2, 4,4
+cglobal pixel_sad_8x16, 4,4
     SAD_INC_4x8P_SSE 0
     SAD_INC_4x8P_SSE 1
     SAD_INC_4x8P_SSE 1
@@ -1046,6 +1053,9 @@
     movd    [r0+4], mm1
     movd    [r0+8], mm2
 %endif
+%if (mmsize == 8)
+    emms
+%endif
     RET
 %endmacro
 
@@ -1055,6 +1065,9 @@
     movd    [r0+4], mm1
     movd    [r0+8], mm2
     movd    [r0+12], mm3
+%if (mmsize == 8)
+    emms
+%endif
     RET
 %endmacro
 
@@ -2384,7 +2397,7 @@
 ;                          uint8_t *pix2, intptr_t i_stride, int scores[3] )
 ;-----------------------------------------------------------------------------
 %macro SAD_X 3
-cglobal pixel_sad_x%1_%2x%3_mmx2, %1+2, %1+2
+cglobal pixel_sad_x%1_%2x%3, %1+2, %1+2
     SAD_X%1_2x%2P 1
 %rep %3/2-1
     SAD_X%1_2x%2P 0
@@ -2392,7 +2405,7 @@
     SAD_X%1_END
 %endmacro
 
-INIT_MMX
+INIT_MMX mmx2
 SAD_X 3, 16, 16
 SAD_X 3, 16,  8
 SAD_X 3,  8, 16
@@ -3472,6 +3485,7 @@
     dec    r4
     jg .loop
     movd   eax, mm0
+    emms
     RET
 %endmacro
 
@@ -3498,6 +3512,7 @@
     dec    r4
     jg .loop
     movd   eax, mm0
+    emms
     RET
 %endmacro
 



More information about the x265-devel mailing list