[x265] [PATCH] fix asm and c code output mismatch
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Thu Jan 8 07:16:03 CET 2015
# HG changeset patch
# User Praveen Tiwari
# Date 1420697603 -19800
# Node ID b3754a42dfa1793f99ca39acc828ac3ebd8906fd
# Parent ff32d97fe59ce9d8dc04d785c605f44d18dcdcee
fix asm and c code output mismatch
MMX instructions automatically enable a floating-point (FP) tag word in the register to enable use of the
__m64 data type. This resets the FP register set to alias it as the MMX register set. To enable the FP
register set again, reset the register state with the EMMS instruction.
diff -r ff32d97fe59c -r b3754a42dfa1 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm Wed Jan 07 14:27:33 2015 +0530
+++ b/source/common/x86/pixel-a.asm Thu Jan 08 11:43:23 2015 +0530
@@ -449,7 +449,19 @@
cglobal pixel_satd_4x4, 4,6
SATD_START_MMX
SATD_4x4_MMX m0, 0, 0
- SATD_END_MMX
+%if HIGH_BIT_DEPTH
+ HADDUW m0, m1
+ movd eax, m0
+%else ; !HIGH_BIT_DEPTH
+ pshufw m1, m0, q1032
+ paddw m0, m1
+ pshufw m1, m0, q2301
+ paddw m0, m1
+ movd eax, m0
+ and eax, 0xffff
+%endif ; HIGH_BIT_DEPTH
+ EMMS
+ RET
%macro SATD_START_SSE2 2-3 0
FIX_STRIDES r1, r3
More information about the x265-devel
mailing list