[x264-devel] x86: Faster quant_4x4x4
Henrik Gramner
git at videolan.org
Tue Aug 26 18:23:14 CEST 2014
x264 | branch: master | Henrik Gramner <henrik at gramner.com> | Tue Aug 5 01:42:47 2014 +0200| [98100b88b475227f375d9bcbaea0bac57008accc] | committer: Fiona Glaser
x86: Faster quant_4x4x4
Also drop the MMX version instead of doing a bunch of ifdeffery to support it after this change.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=98100b88b475227f375d9bcbaea0bac57008accc
---
common/quant.c | 1 -
common/x86/quant-a.asm | 25 ++++++++-----------------
common/x86/quant.h | 1 -
3 files changed, 8 insertions(+), 19 deletions(-)
diff --git a/common/quant.c b/common/quant.c
index 3515b2e..d7b6911 100644
--- a/common/quant.c
+++ b/common/quant.c
@@ -559,7 +559,6 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
{
#if ARCH_X86
pf->quant_4x4 = x264_quant_4x4_mmx;
- pf->quant_4x4x4 = x264_quant_4x4x4_mmx;
pf->quant_8x8 = x264_quant_8x8_mmx;
pf->dequant_4x4 = x264_dequant_4x4_mmx;
pf->dequant_4x4_dc = x264_dequant_4x4dc_mmx2;
diff --git a/common/x86/quant-a.asm b/common/x86/quant-a.asm
index ed01c37..fb588d3 100644
--- a/common/x86/quant-a.asm
+++ b/common/x86/quant-a.asm
@@ -292,14 +292,11 @@ cglobal quant_4x4x4, 3,3,8
QUANT_4x4 0, 6
QUANT_4x4 64, 7
packssdw m6, m7
- packssdw m5, m6
- packssdw m5, m5 ; AA BB CC DD
- packsswb m5, m5 ; A B C D
+ packssdw m5, m6 ; AAAA BBBB CCCC DDDD
pxor m4, m4
- pcmpeqb m5, m4
- pmovmskb eax, m5
- not eax
- and eax, 0xf
+ pcmpeqd m5, m4
+ movmskps eax, m5
+ xor eax, 0xf
RET
%endmacro
@@ -444,16 +441,11 @@ cglobal quant_4x4x4, 3,3,7
QUANT_4x4 64, 5
QUANT_4x4 96, 6
packssdw m5, m6
- packssdw m4, m5
-%if mmsize == 16
- packssdw m4, m4 ; AA BB CC DD
-%endif
- packsswb m4, m4 ; A B C D
+ packssdw m4, m5 ; AAAA BBBB CCCC DDDD
pxor m3, m3
- pcmpeqb m4, m3
- pmovmskb eax, m4
- not eax
- and eax, 0xf
+ pcmpeqd m4, m3
+ movmskps eax, m4
+ xor eax, 0xf
RET
%endmacro
@@ -464,7 +456,6 @@ QUANT_DC quant_4x4_dc, 4
INIT_MMX mmx
QUANT_AC quant_4x4, 4
QUANT_AC quant_8x8, 16
-QUANT_4x4x4
%endif
INIT_XMM sse2
diff --git a/common/x86/quant.h b/common/x86/quant.h
index 5adc687..1fcb800 100644
--- a/common/x86/quant.h
+++ b/common/x86/quant.h
@@ -31,7 +31,6 @@
int x264_quant_2x2_dc_mmx2( dctcoef dct[4], int mf, int bias );
int x264_quant_4x4_dc_mmx2( dctcoef dct[16], int mf, int bias );
int x264_quant_4x4_mmx( dctcoef dct[16], udctcoef mf[16], udctcoef bias[16] );
-int x264_quant_4x4x4_mmx( dctcoef dct[4][16], udctcoef mf[16], udctcoef bias[16] );
int x264_quant_8x8_mmx( dctcoef dct[64], udctcoef mf[64], udctcoef bias[64] );
int x264_quant_2x2_dc_sse2( dctcoef dct[16], int mf, int bias );
int x264_quant_4x4_dc_sse2( dctcoef dct[16], int mf, int bias );
More information about the x264-devel
mailing list