[x265] [PATCH] asm: fix output change bug in pixel_sa8d_32x32, the reason is intermedia result overflow

Min Chen chenm003 at 163.com
Wed Jun 29 23:12:24 CEST 2016


# HG changeset patch
# User Min Chen <min.chen at multicorewareinc.com>
# Date 1467233520 18000
# Node ID a4f46c182d42080d6674b665cedfd8ec90a47e62
# Parent  626fcbac7ffba723dabd3a9f0507c4c80f3e7bc9
asm: fix output change bug in pixel_sa8d_32x32, the reason is intermedia result overflow
---
 source/common/x86/pixel-a.asm |   48 +++++++++++++++++++++++++++-------------
 1 files changed, 32 insertions(+), 16 deletions(-)

diff -r 626fcbac7ffb -r a4f46c182d42 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm	Thu Jun 16 12:57:38 2016 +0530
+++ b/source/common/x86/pixel-a.asm	Wed Jun 29 15:52:00 2016 -0500
@@ -14041,10 +14041,12 @@
     psubw m9, m6
 
     HADAMARD8_2D 0, 1, 2, 8, 4, 5, 3, 9, 6, amax
-    paddw m0, m1
-    paddw m2, m8
     pmaddwd m0, m7
+    pmaddwd m1, m7
     pmaddwd m2, m7
+    pmaddwd m8, m7
+    paddd m0, m1
+    paddd m2, m8
     paddd m10, m0, m2
 
 
@@ -14083,10 +14085,12 @@
     psubw m9, m6
 
     HADAMARD8_2D 0, 1, 2, 8, 4, 5, 3, 9, 6, amax
-    paddw m0, m1
-    paddw m2, m8
     pmaddwd m0, m7
+    pmaddwd m1, m7
     pmaddwd m2, m7
+    pmaddwd m8, m7
+    paddd m0, m1
+    paddd m2, m8
     paddd m12, m0, m2
 
 
@@ -14125,10 +14129,12 @@
     psubw m9, m6
 
     HADAMARD8_2D 0, 1, 2, 8, 4, 5, 3, 9, 6, amax
-    paddw m0, m1
-    paddw m2, m8
     pmaddwd m0, m7
+    pmaddwd m1, m7
     pmaddwd m2, m7
+    pmaddwd m8, m7
+    paddd m0, m1
+    paddd m2, m8
     paddd m12, m0
     paddd m12, m2
 
@@ -14171,10 +14177,12 @@
     psubw m9, m6
 
     HADAMARD8_2D 0, 1, 2, 8, 4, 5, 3, 9, 6, amax
-    paddw m0, m1
-    paddw m2, m8
     pmaddwd m0, m7
+    pmaddwd m1, m7
     pmaddwd m2, m7
+    pmaddwd m8, m7
+    paddd m0, m1
+    paddd m2, m8
     paddd m10, m0
     paddd m10, m2
 
@@ -14218,10 +14226,12 @@
     psubw m9, m6
 
     HADAMARD8_2D 0, 1, 2, 8, 4, 5, 3, 9, 6, amax
-    paddw m0, m1
-    paddw m2, m8
     pmaddwd m0, m7
+    pmaddwd m1, m7
     pmaddwd m2, m7
+    pmaddwd m8, m7
+    paddd m0, m1
+    paddd m2, m8
     paddd m12, m0, m2
 
 
@@ -14260,10 +14270,12 @@
     psubw m9, m6
 
     HADAMARD8_2D 0, 1, 2, 8, 4, 5, 3, 9, 6, amax
-    paddw m0, m1
-    paddw m2, m8
     pmaddwd m0, m7
+    pmaddwd m1, m7
     pmaddwd m2, m7
+    pmaddwd m8, m7
+    paddd m0, m1
+    paddd m2, m8
     paddd m13, m0, m2
 
 
@@ -14302,10 +14314,12 @@
     psubw m9, m6
 
     HADAMARD8_2D 0, 1, 2, 8, 4, 5, 3, 9, 6, amax
-    paddw m0, m1
-    paddw m2, m8
     pmaddwd m0, m7
+    pmaddwd m1, m7
     pmaddwd m2, m7
+    pmaddwd m8, m7
+    paddd m0, m1
+    paddd m2, m8
     paddd m13, m0
     paddd m13, m2
 
@@ -14348,10 +14362,12 @@
     psubw m9, m6
 
     HADAMARD8_2D 0, 1, 2, 8, 4, 5, 3, 9, 6, amax
-    paddw m0, m1
-    paddw m2, m8
     pmaddwd m0, m7
+    pmaddwd m1, m7
     pmaddwd m2, m7
+    pmaddwd m8, m7
+    paddd m0, m1
+    paddd m2, m8
     paddd m12, m0
     paddd m12, m2
 



More information about the x265-devel mailing list