[x264-devel] x86: faster AVX2 quant_4x4x4

Jason Garrett-Glaser git at videolan.org
Mon May 20 23:06:49 CEST 2013


x264 | branch: master | Jason Garrett-Glaser <jason at x264.com> | Mon Apr 29 16:16:54 2013 -0700| [c82db4ed07d4a69a84ac99d5e79e32f61141494f] | committer: Jason Garrett-Glaser

x86: faster AVX2 quant_4x4x4

10->9 cycles

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=c82db4ed07d4a69a84ac99d5e79e32f61141494f
---

 common/x86/quant-a.asm |   22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/common/x86/quant-a.asm b/common/x86/quant-a.asm
index 6f4b5f9..fbe2d79 100644
--- a/common/x86/quant-a.asm
+++ b/common/x86/quant-a.asm
@@ -470,25 +470,23 @@ QUANT_AC quant_8x8, 4
 QUANT_DC quant_4x4_dc, 1, 6
 
 INIT_YMM avx2
-cglobal quant_4x4x4, 3,3,7
+cglobal quant_4x4x4, 3,3,6
     mova      m2, [r1]
     mova      m3, [r2]
     QUANT_ONE [r0+ 0], m2, m3, 0, 4
     QUANT_ONE [r0+32], m2, m3, 0, 5
     packssdw  m4, m5
     QUANT_ONE [r0+64], m2, m3, 0, 5
-    QUANT_ONE [r0+96], m2, m3, 0, 6
-    packssdw  m5, m6
+    QUANT_ONE [r0+96], m2, m3, 0, 1
+    packssdw  m5, m1
     packssdw  m4, m5
-    vextracti128 xm5, m4, 1
-    por      xm4, xm5
-    packssdw xm4, xm4
-    packsswb xm4, xm4
-    pxor     xm3, xm3
-    pcmpeqb  xm4, xm3
-    pmovmskb eax, xm4
-    not      eax
-    and      eax, 0xf
+    pxor      m3, m3
+    pcmpeqd   m4, m3
+    movmskps eax, m4
+    mov      edx, eax
+    shr      eax, 4
+    and      eax, edx
+    xor      eax, 0xf
     RET
 %endif ; !HIGH_BIT_DEPTH
 



More information about the x264-devel mailing list