[x264-devel] x86: Fix integral_init4/8h_avx2

Henrik Gramner git at videolan.org
Sun Oct 11 19:01:01 CEST 2015


x264 | branch: master | Henrik Gramner <henrik at gramner.com> | Thu Aug 27 19:53:00 2015 +0200| [67076513267907b5601828ae6864cc063c8c7548] | committer: Henrik Gramner

x86: Fix integral_init4/8h_avx2

The AVX2 implementation was using the wrong offsets. It went undetected due to
the checkasm test being incorrect.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=67076513267907b5601828ae6864cc063c8c7548
---

 common/x86/mc-a2.asm |   14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/common/x86/mc-a2.asm b/common/x86/mc-a2.asm
index 7fa72fc..727e9c8 100644
--- a/common/x86/mc-a2.asm
+++ b/common/x86/mc-a2.asm
@@ -1511,11 +1511,12 @@ cglobal integral_init4h, 3,4
     neg     r2
     pxor    m4, m4
 .loop:
-    mova    m0, [r1+r2]
+    mova   xm0, [r1+r2]
+    mova   xm1, [r1+r2+16]
 %if mmsize==32
-    movu    m1, [r1+r2+8]
+    vinserti128 m0, m0, [r1+r2+ 8], 1
+    vinserti128 m1, m1, [r1+r2+24], 1
 %else
-    mova    m1, [r1+r2+16]
     palignr m1, m0, 8
 %endif
     mpsadbw m0, m4, 0
@@ -1541,13 +1542,14 @@ cglobal integral_init8h, 3,4
     neg     r2
     pxor    m4, m4
 .loop:
-    mova    m0, [r1+r2]
+    mova   xm0, [r1+r2]
+    mova   xm1, [r1+r2+16]
 %if mmsize==32
-    movu    m1, [r1+r2+8]
+    vinserti128 m0, m0, [r1+r2+ 8], 1
+    vinserti128 m1, m1, [r1+r2+24], 1
     mpsadbw m2, m0, m4, 100100b
     mpsadbw m3, m1, m4, 100100b
 %else
-    mova    m1, [r1+r2+16]
     palignr m1, m0, 8
     mpsadbw m2, m0, m4, 100b
     mpsadbw m3, m1, m4, 100b



More information about the x264-devel mailing list