[x265] [PATCH] asm: fix sad_x4 stress case failure on AVX2
yuvaraj at multicorewareinc.com
yuvaraj at multicorewareinc.com
Fri Feb 7 08:09:37 CET 2014
# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1391756957 -19800
# Fri Feb 07 12:39:17 2014 +0530
# Node ID db439a6019a2896e7ade9b7be7e654843e22b813
# Parent d2d181f1881a2c71fc751242a58d18f42aae8890
asm: fix sad_x4 stress case failure on AVX2
diff -r d2d181f1881a -r db439a6019a2 source/common/x86/sad-a.asm
--- a/source/common/x86/sad-a.asm Fri Feb 07 00:32:32 2014 -0600
+++ b/source/common/x86/sad-a.asm Fri Feb 07 12:39:17 2014 +0530
@@ -2759,10 +2759,10 @@
psadbw m3, m6
psadbw m4, m7
psadbw m5, m7
- paddw m0, m2
- paddw m1, m3
- paddw m0, m4
- paddw m1, m5
+ paddd m0, m2
+ paddd m1, m3
+ paddd m0, m4
+ paddd m1, m5
%endmacro
%macro SAD_X4_4x16P_AVX2 2
@@ -2797,9 +2797,13 @@
%macro SAD_X4_END_AVX2 0
mov r0, r6mp
- packssdw m0, m1 ; 0 0 1 1 2 2 3 3
- vextracti128 xm1, m0, 1
- phaddd xm0, xm1 ; 0 1 2 3
+ pshufd m0, m0, 0x8
+ pshufd m1, m1, 0x8
+ vextracti128 xm2, m0, 1
+ vextracti128 xm3, m1, 1
+ punpcklqdq xm0, xm1
+ punpcklqdq xm2, xm3
+ phaddd xm0, xm2 ; 0 1 2 3
mova [r0], xm0
RET
%endmacro
More information about the x265-devel
mailing list