[x264-devel] x86: Fix integral_init4/8h_avx2
Henrik Gramner
git at videolan.org
Sun Oct 11 19:01:01 CEST 2015
x264 | branch: master | Henrik Gramner <henrik at gramner.com> | Thu Aug 27 19:53:00 2015 +0200| [67076513267907b5601828ae6864cc063c8c7548] | committer: Henrik Gramner
x86: Fix integral_init4/8h_avx2
The AVX2 implementation was using the wrong offsets. It went undetected due to
the checkasm test being incorrect.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=67076513267907b5601828ae6864cc063c8c7548
---
common/x86/mc-a2.asm | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/common/x86/mc-a2.asm b/common/x86/mc-a2.asm
index 7fa72fc..727e9c8 100644
--- a/common/x86/mc-a2.asm
+++ b/common/x86/mc-a2.asm
@@ -1511,11 +1511,12 @@ cglobal integral_init4h, 3,4
neg r2
pxor m4, m4
.loop:
- mova m0, [r1+r2]
+ mova xm0, [r1+r2]
+ mova xm1, [r1+r2+16]
%if mmsize==32
- movu m1, [r1+r2+8]
+ vinserti128 m0, m0, [r1+r2+ 8], 1
+ vinserti128 m1, m1, [r1+r2+24], 1
%else
- mova m1, [r1+r2+16]
palignr m1, m0, 8
%endif
mpsadbw m0, m4, 0
@@ -1541,13 +1542,14 @@ cglobal integral_init8h, 3,4
neg r2
pxor m4, m4
.loop:
- mova m0, [r1+r2]
+ mova xm0, [r1+r2]
+ mova xm1, [r1+r2+16]
%if mmsize==32
- movu m1, [r1+r2+8]
+ vinserti128 m0, m0, [r1+r2+ 8], 1
+ vinserti128 m1, m1, [r1+r2+24], 1
mpsadbw m2, m0, m4, 100100b
mpsadbw m3, m1, m4, 100100b
%else
- mova m1, [r1+r2+16]
palignr m1, m0, 8
mpsadbw m2, m0, m4, 100b
mpsadbw m3, m1, m4, 100b
More information about the x264-devel
mailing list