[x265] [PATCH] fix asm and c code output mismatch

praveen at multicorewareinc.com praveen at multicorewareinc.com
Thu Jan 8 07:16:03 CET 2015


# HG changeset patch
# User Praveen Tiwari
# Date 1420697603 -19800
# Node ID b3754a42dfa1793f99ca39acc828ac3ebd8906fd
# Parent  ff32d97fe59ce9d8dc04d785c605f44d18dcdcee
fix asm and c code output mismatch

 MMX instructions automatically enable a floating-point (FP) tag word in the register to enable use of the
 __m64 data type. This resets the FP register set to alias it as the MMX register set. To enable the FP
 register set again, reset the register state with the EMMS instruction.

diff -r ff32d97fe59c -r b3754a42dfa1 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm	Wed Jan 07 14:27:33 2015 +0530
+++ b/source/common/x86/pixel-a.asm	Thu Jan 08 11:43:23 2015 +0530
@@ -449,7 +449,19 @@
 cglobal pixel_satd_4x4, 4,6
     SATD_START_MMX
     SATD_4x4_MMX m0, 0, 0
-    SATD_END_MMX
+%if HIGH_BIT_DEPTH
+    HADDUW      m0, m1
+    movd       eax, m0
+%else ; !HIGH_BIT_DEPTH
+    pshufw      m1, m0, q1032
+    paddw       m0, m1
+    pshufw      m1, m0, q2301
+    paddw       m0, m1
+    movd       eax, m0
+    and        eax, 0xffff
+%endif ; HIGH_BIT_DEPTH
+    EMMS
+    RET
 
 %macro SATD_START_SSE2 2-3 0
     FIX_STRIDES r1, r3


More information about the x265-devel mailing list