[x265] [PATCH] asm: fix output change bug in pixel_sa8d_32x32, the reason is intermedia result overflow
Min Chen
chenm003 at 163.com
Wed Jun 29 23:12:24 CEST 2016
# HG changeset patch
# User Min Chen <min.chen at multicorewareinc.com>
# Date 1467233520 18000
# Node ID a4f46c182d42080d6674b665cedfd8ec90a47e62
# Parent 626fcbac7ffba723dabd3a9f0507c4c80f3e7bc9
asm: fix output change bug in pixel_sa8d_32x32, the reason is intermedia result overflow
---
source/common/x86/pixel-a.asm | 48 +++++++++++++++++++++++++++-------------
1 files changed, 32 insertions(+), 16 deletions(-)
diff -r 626fcbac7ffb -r a4f46c182d42 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm Thu Jun 16 12:57:38 2016 +0530
+++ b/source/common/x86/pixel-a.asm Wed Jun 29 15:52:00 2016 -0500
@@ -14041,10 +14041,12 @@
psubw m9, m6
HADAMARD8_2D 0, 1, 2, 8, 4, 5, 3, 9, 6, amax
- paddw m0, m1
- paddw m2, m8
pmaddwd m0, m7
+ pmaddwd m1, m7
pmaddwd m2, m7
+ pmaddwd m8, m7
+ paddd m0, m1
+ paddd m2, m8
paddd m10, m0, m2
@@ -14083,10 +14085,12 @@
psubw m9, m6
HADAMARD8_2D 0, 1, 2, 8, 4, 5, 3, 9, 6, amax
- paddw m0, m1
- paddw m2, m8
pmaddwd m0, m7
+ pmaddwd m1, m7
pmaddwd m2, m7
+ pmaddwd m8, m7
+ paddd m0, m1
+ paddd m2, m8
paddd m12, m0, m2
@@ -14125,10 +14129,12 @@
psubw m9, m6
HADAMARD8_2D 0, 1, 2, 8, 4, 5, 3, 9, 6, amax
- paddw m0, m1
- paddw m2, m8
pmaddwd m0, m7
+ pmaddwd m1, m7
pmaddwd m2, m7
+ pmaddwd m8, m7
+ paddd m0, m1
+ paddd m2, m8
paddd m12, m0
paddd m12, m2
@@ -14171,10 +14177,12 @@
psubw m9, m6
HADAMARD8_2D 0, 1, 2, 8, 4, 5, 3, 9, 6, amax
- paddw m0, m1
- paddw m2, m8
pmaddwd m0, m7
+ pmaddwd m1, m7
pmaddwd m2, m7
+ pmaddwd m8, m7
+ paddd m0, m1
+ paddd m2, m8
paddd m10, m0
paddd m10, m2
@@ -14218,10 +14226,12 @@
psubw m9, m6
HADAMARD8_2D 0, 1, 2, 8, 4, 5, 3, 9, 6, amax
- paddw m0, m1
- paddw m2, m8
pmaddwd m0, m7
+ pmaddwd m1, m7
pmaddwd m2, m7
+ pmaddwd m8, m7
+ paddd m0, m1
+ paddd m2, m8
paddd m12, m0, m2
@@ -14260,10 +14270,12 @@
psubw m9, m6
HADAMARD8_2D 0, 1, 2, 8, 4, 5, 3, 9, 6, amax
- paddw m0, m1
- paddw m2, m8
pmaddwd m0, m7
+ pmaddwd m1, m7
pmaddwd m2, m7
+ pmaddwd m8, m7
+ paddd m0, m1
+ paddd m2, m8
paddd m13, m0, m2
@@ -14302,10 +14314,12 @@
psubw m9, m6
HADAMARD8_2D 0, 1, 2, 8, 4, 5, 3, 9, 6, amax
- paddw m0, m1
- paddw m2, m8
pmaddwd m0, m7
+ pmaddwd m1, m7
pmaddwd m2, m7
+ pmaddwd m8, m7
+ paddd m0, m1
+ paddd m2, m8
paddd m13, m0
paddd m13, m2
@@ -14348,10 +14362,12 @@
psubw m9, m6
HADAMARD8_2D 0, 1, 2, 8, 4, 5, 3, 9, 6, amax
- paddw m0, m1
- paddw m2, m8
pmaddwd m0, m7
+ pmaddwd m1, m7
pmaddwd m2, m7
+ pmaddwd m8, m7
+ paddd m0, m1
+ paddd m2, m8
paddd m12, m0
paddd m12, m2
More information about the x265-devel
mailing list