[x265] [PATCH] asm: fix sad_x4 stress case failure on AVX2

yuvaraj at multicorewareinc.com yuvaraj at multicorewareinc.com
Fri Feb 7 08:09:37 CET 2014


# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1391756957 -19800
#      Fri Feb 07 12:39:17 2014 +0530
# Node ID db439a6019a2896e7ade9b7be7e654843e22b813
# Parent  d2d181f1881a2c71fc751242a58d18f42aae8890
asm: fix sad_x4 stress case failure on AVX2

diff -r d2d181f1881a -r db439a6019a2 source/common/x86/sad-a.asm
--- a/source/common/x86/sad-a.asm	Fri Feb 07 00:32:32 2014 -0600
+++ b/source/common/x86/sad-a.asm	Fri Feb 07 12:39:17 2014 +0530
@@ -2759,10 +2759,10 @@
     psadbw  m3, m6
     psadbw  m4, m7
     psadbw  m5, m7
-    paddw   m0, m2
-    paddw   m1, m3
-    paddw   m0, m4
-    paddw   m1, m5
+    paddd   m0, m2
+    paddd   m1, m3
+    paddd   m0, m4
+    paddd   m1, m5
 %endmacro
 
 %macro SAD_X4_4x16P_AVX2 2
@@ -2797,9 +2797,13 @@
 
 %macro SAD_X4_END_AVX2 0
     mov       r0, r6mp
-    packssdw  m0, m1        ; 0 0 1 1 2 2 3 3
-    vextracti128 xm1, m0, 1
-    phaddd   xm0, xm1       ; 0 1 2 3
+    pshufd     m0, m0, 0x8
+    pshufd     m1, m1, 0x8
+    vextracti128 xm2, m0, 1
+    vextracti128 xm3, m1, 1
+    punpcklqdq   xm0, xm1
+    punpcklqdq   xm2, xm3
+    phaddd   xm0, xm2       ; 0 1 2 3
     mova    [r0], xm0
     RET
 %endmacro


More information about the x265-devel mailing list