[x264-devel] x86: Faster quant_4x4x4

Henrik Gramner git at videolan.org
Tue Aug 26 18:23:14 CEST 2014


x264 | branch: master | Henrik Gramner <henrik at gramner.com> | Tue Aug  5 01:42:47 2014 +0200| [98100b88b475227f375d9bcbaea0bac57008accc] | committer: Fiona Glaser

x86: Faster quant_4x4x4

Also drop the MMX version instead of doing a bunch of ifdeffery to support it after this change.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=98100b88b475227f375d9bcbaea0bac57008accc
---

 common/quant.c         |    1 -
 common/x86/quant-a.asm |   25 ++++++++-----------------
 common/x86/quant.h     |    1 -
 3 files changed, 8 insertions(+), 19 deletions(-)

diff --git a/common/quant.c b/common/quant.c
index 3515b2e..d7b6911 100644
--- a/common/quant.c
+++ b/common/quant.c
@@ -559,7 +559,6 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
     {
 #if ARCH_X86
         pf->quant_4x4 = x264_quant_4x4_mmx;
-        pf->quant_4x4x4 = x264_quant_4x4x4_mmx;
         pf->quant_8x8 = x264_quant_8x8_mmx;
         pf->dequant_4x4 = x264_dequant_4x4_mmx;
         pf->dequant_4x4_dc = x264_dequant_4x4dc_mmx2;
diff --git a/common/x86/quant-a.asm b/common/x86/quant-a.asm
index ed01c37..fb588d3 100644
--- a/common/x86/quant-a.asm
+++ b/common/x86/quant-a.asm
@@ -292,14 +292,11 @@ cglobal quant_4x4x4, 3,3,8
     QUANT_4x4  0, 6
     QUANT_4x4 64, 7
     packssdw  m6, m7
-    packssdw  m5, m6
-    packssdw  m5, m5  ; AA BB CC DD
-    packsswb  m5, m5  ; A B C D
+    packssdw  m5, m6  ; AAAA BBBB CCCC DDDD
     pxor      m4, m4
-    pcmpeqb   m5, m4
-    pmovmskb eax, m5
-    not      eax
-    and      eax, 0xf
+    pcmpeqd   m5, m4
+    movmskps eax, m5
+    xor      eax, 0xf
     RET
 %endmacro
 
@@ -444,16 +441,11 @@ cglobal quant_4x4x4, 3,3,7
     QUANT_4x4 64, 5
     QUANT_4x4 96, 6
     packssdw  m5, m6
-    packssdw  m4, m5
-%if mmsize == 16
-    packssdw  m4, m4  ; AA BB CC DD
-%endif
-    packsswb  m4, m4  ; A B C D
+    packssdw  m4, m5  ; AAAA BBBB CCCC DDDD
     pxor      m3, m3
-    pcmpeqb   m4, m3
-    pmovmskb eax, m4
-    not      eax
-    and      eax, 0xf
+    pcmpeqd   m4, m3
+    movmskps eax, m4
+    xor      eax, 0xf
     RET
 %endmacro
 
@@ -464,7 +456,6 @@ QUANT_DC quant_4x4_dc, 4
 INIT_MMX mmx
 QUANT_AC quant_4x4, 4
 QUANT_AC quant_8x8, 16
-QUANT_4x4x4
 %endif
 
 INIT_XMM sse2
diff --git a/common/x86/quant.h b/common/x86/quant.h
index 5adc687..1fcb800 100644
--- a/common/x86/quant.h
+++ b/common/x86/quant.h
@@ -31,7 +31,6 @@
 int x264_quant_2x2_dc_mmx2( dctcoef dct[4], int mf, int bias );
 int x264_quant_4x4_dc_mmx2( dctcoef dct[16], int mf, int bias );
 int x264_quant_4x4_mmx( dctcoef dct[16], udctcoef mf[16], udctcoef bias[16] );
-int x264_quant_4x4x4_mmx( dctcoef dct[4][16], udctcoef mf[16], udctcoef bias[16] );
 int x264_quant_8x8_mmx( dctcoef dct[64], udctcoef mf[64], udctcoef bias[64] );
 int x264_quant_2x2_dc_sse2( dctcoef dct[16], int mf, int bias );
 int x264_quant_4x4_dc_sse2( dctcoef dct[16], int mf, int bias );



More information about the x264-devel mailing list