[x264-devel] x86: faster AVX2 quant_4x4x4
Jason Garrett-Glaser
git at videolan.org
Mon May 20 23:06:49 CEST 2013
x264 | branch: master | Jason Garrett-Glaser <jason at x264.com> | Mon Apr 29 16:16:54 2013 -0700| [c82db4ed07d4a69a84ac99d5e79e32f61141494f] | committer: Jason Garrett-Glaser
x86: faster AVX2 quant_4x4x4
10->9 cycles
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=c82db4ed07d4a69a84ac99d5e79e32f61141494f
---
common/x86/quant-a.asm | 22 ++++++++++------------
1 file changed, 10 insertions(+), 12 deletions(-)
diff --git a/common/x86/quant-a.asm b/common/x86/quant-a.asm
index 6f4b5f9..fbe2d79 100644
--- a/common/x86/quant-a.asm
+++ b/common/x86/quant-a.asm
@@ -470,25 +470,23 @@ QUANT_AC quant_8x8, 4
QUANT_DC quant_4x4_dc, 1, 6
INIT_YMM avx2
-cglobal quant_4x4x4, 3,3,7
+cglobal quant_4x4x4, 3,3,6
mova m2, [r1]
mova m3, [r2]
QUANT_ONE [r0+ 0], m2, m3, 0, 4
QUANT_ONE [r0+32], m2, m3, 0, 5
packssdw m4, m5
QUANT_ONE [r0+64], m2, m3, 0, 5
- QUANT_ONE [r0+96], m2, m3, 0, 6
- packssdw m5, m6
+ QUANT_ONE [r0+96], m2, m3, 0, 1
+ packssdw m5, m1
packssdw m4, m5
- vextracti128 xm5, m4, 1
- por xm4, xm5
- packssdw xm4, xm4
- packsswb xm4, xm4
- pxor xm3, xm3
- pcmpeqb xm4, xm3
- pmovmskb eax, xm4
- not eax
- and eax, 0xf
+ pxor m3, m3
+ pcmpeqd m4, m3
+ movmskps eax, m4
+ mov edx, eax
+ shr eax, 4
+ and eax, edx
+ xor eax, 0xf
RET
%endif ; !HIGH_BIT_DEPTH
More information about the x264-devel
mailing list