[x264-devel] x86: AVX2 high bit-depth quant
Henrik Gramner
git at videolan.org
Mon May 20 23:06:49 CEST 2013
x264 | branch: master | Henrik Gramner <henrik at gramner.com> | Sat May 4 18:48:58 2013 +0200| [db95d6af63bec7839b3d3e1f2eb67b8689dc8170] | committer: Jason Garrett-Glaser
x86: AVX2 high bit-depth quant
quant_4x4: 13->6 cycles
quant_4x4_dc: 14->8 cycles
quant_8x8: 47->24 cycles
quant_4x4x4: 48->25 cycles
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=db95d6af63bec7839b3d3e1f2eb67b8689dc8170
---
common/quant.c | 7 +++++++
common/x86/quant-a.asm | 34 +++++++++++++++++++++++++++++-----
2 files changed, 36 insertions(+), 5 deletions(-)
diff --git a/common/quant.c b/common/quant.c
index cffd8e8..d4fd405 100644
--- a/common/quant.c
+++ b/common/quant.c
@@ -537,6 +537,13 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
pf->dequant_8x8 = x264_dequant_8x8_xop;
}
}
+ if( cpu&X264_CPU_AVX2 )
+ {
+ pf->quant_4x4 = x264_quant_4x4_avx2;
+ pf->quant_4x4_dc = x264_quant_4x4_dc_avx2;
+ pf->quant_8x8 = x264_quant_8x8_avx2;
+ pf->quant_4x4x4 = x264_quant_4x4x4_avx2;
+ }
#endif // HAVE_MMX
#else // !HIGH_BIT_DEPTH
#if HAVE_MMX
diff --git a/common/x86/quant-a.asm b/common/x86/quant-a.asm
index fbe2d79..3f7e9b3 100644
--- a/common/x86/quant-a.asm
+++ b/common/x86/quant-a.asm
@@ -7,7 +7,7 @@
;* Jason Garrett-Glaser <darkshikari at gmail.com>
;* Christian Heine <sennindemokrit at gmx.net>
;* Oskar Arvidsson <oskar at irock.se>
-;* Henrik Gramner <hengar-6 at student.ltu.se>
+;* Henrik Gramner <henrik at gramner.com>
;*
;* This program is free software; you can redistribute it and/or modify
;* it under the terms of the GNU General Public License as published by
@@ -238,10 +238,10 @@ cextern popcnt_table
mova [%1 ], m2
mova [%1+mmsize], m3
ACCUM por, %5, 2, %4
- ACCUM por, %5, 3, %4+mmsize
+ por m%5, m3
%else ; !sse4
QUANT_ONE_AC_MMX %1, %2, %3, %4, %5
- QUANT_ONE_AC_MMX %1+mmsize, %2+mmsize, %3+mmsize, %4+mmsize, %5
+ QUANT_ONE_AC_MMX %1+mmsize, %2+mmsize, %3+mmsize, 1, %5
%endif ; cpuflag
%endmacro
@@ -279,8 +279,8 @@ cglobal quant_%1x%2, 3,3,8
%endmacro
%macro QUANT_4x4 2
- QUANT_TWO_AC r0+%1+mmsize*0, r1+mmsize*0, r2+mmsize*0, mmsize*0, %2
- QUANT_TWO_AC r0+%1+mmsize*2, r1+mmsize*2, r2+mmsize*2, mmsize*2, %2
+ QUANT_TWO_AC r0+%1+mmsize*0, r1+mmsize*0, r2+mmsize*0, 0, %2
+ QUANT_TWO_AC r0+%1+mmsize*2, r1+mmsize*2, r2+mmsize*2, 1, %2
%endmacro
%macro QUANT_4x4x4 0
@@ -324,6 +324,30 @@ QUANT_AC 4, 4
QUANT_AC 8, 8
QUANT_4x4x4
+INIT_YMM avx2
+QUANT_DC 4, 4
+QUANT_AC 4, 4
+QUANT_AC 8, 8
+
+INIT_YMM avx2
+cglobal quant_4x4x4, 3,3,6
+ QUANT_TWO_AC r0, r1, r2, 0, 4
+ QUANT_TWO_AC r0+64, r1, r2, 0, 5
+ add r0, 128
+ packssdw m4, m5
+ QUANT_TWO_AC r0, r1, r2, 0, 5
+ QUANT_TWO_AC r0+64, r1, r2, 0, 1
+ packssdw m5, m1
+ packssdw m4, m5
+ pxor m3, m3
+ pcmpeqd m4, m3
+ movmskps eax, m4
+ mov edx, eax
+ shr eax, 4
+ and eax, edx
+ xor eax, 0xf
+ RET
+
%endif ; HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH == 0
More information about the x264-devel
mailing list