[x264-devel] x86: AVX-512 zigzag_scan_4x4_field
Henrik Gramner
git at videolan.org
Mon May 22 00:03:07 CEST 2017
x264 | branch: master | Henrik Gramner <henrik at gramner.com> | Sat Mar 25 22:13:22 2017 +0100| [77b9a818fc622d0cdaa96aeb37339fbd5b1ef857] | committer: Henrik Gramner
x86: AVX-512 zigzag_scan_4x4_field
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=77b9a818fc622d0cdaa96aeb37339fbd5b1ef857
---
common/dct.c | 2 ++
common/x86/dct-a.asm | 14 ++++++++++++++
common/x86/dct.h | 5 +++--
3 files changed, 19 insertions(+), 2 deletions(-)
diff --git a/common/dct.c b/common/dct.c
index 5c1b8b5c..1be89350 100644
--- a/common/dct.c
+++ b/common/dct.c
@@ -988,6 +988,7 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zig
#endif // ARCH_X86_64
if( cpu&X264_CPU_AVX512 )
{
+ pf_interlaced->scan_4x4 = x264_zigzag_scan_4x4_field_avx512;
pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_avx512;
pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_avx512;
}
@@ -1033,6 +1034,7 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zig
}
if( cpu&X264_CPU_AVX512 )
{
+ pf_interlaced->scan_4x4 = x264_zigzag_scan_4x4_field_avx512;
pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_avx512;
pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_avx512;
}
diff --git a/common/x86/dct-a.asm b/common/x86/dct-a.asm
index a9b853c4..5a4f316a 100644
--- a/common/x86/dct-a.asm
+++ b/common/x86/dct-a.asm
@@ -1906,6 +1906,13 @@ cglobal zigzag_scan_4x4_frame, 2,2
mova [r0], m0
RET
+cglobal zigzag_scan_4x4_field, 2,2
+ mova m0, [r1]
+ pshufd xmm1, [r1+8], q3102
+ mova [r0], m0
+ movu [r0+8], xmm1
+ RET
+
cglobal zigzag_scan_8x8_frame, 2,2
psrld m0, [scan_frame_avx512], 4
mova m1, [r1+0*64]
@@ -1944,6 +1951,13 @@ cglobal zigzag_scan_4x4_frame, 2,2
mova [r0], m0
RET
+cglobal zigzag_scan_4x4_field, 2,2
+ mova m0, [r1]
+ pshuflw xmm1, [r1+4], q3102
+ mova [r0], m0
+ movq [r0+4], xmm1
+ RET
+
INIT_ZMM avx512
cglobal zigzag_scan_8x8_frame, 2,2
psrlw m0, [scan_frame_avx512], 4
diff --git a/common/x86/dct.h b/common/x86/dct.h
index 6254368b..1a5c75c4 100644
--- a/common/x86/dct.h
+++ b/common/x86/dct.h
@@ -113,8 +113,9 @@ void x264_zigzag_scan_4x4_frame_ssse3 ( int16_t level[16], int16_t dct[16] );
void x264_zigzag_scan_4x4_frame_avx ( dctcoef level[16], dctcoef dct[16] );
void x264_zigzag_scan_4x4_frame_xop ( dctcoef level[16], dctcoef dct[16] );
void x264_zigzag_scan_4x4_frame_avx512( dctcoef level[16], dctcoef dct[16] );
-void x264_zigzag_scan_4x4_field_sse2 ( int32_t level[16], int32_t dct[16] );
-void x264_zigzag_scan_4x4_field_sse ( int16_t level[16], int16_t dct[16] );
+void x264_zigzag_scan_4x4_field_sse ( int16_t level[16], int16_t dct[16] );
+void x264_zigzag_scan_4x4_field_sse2 ( int32_t level[16], int32_t dct[16] );
+void x264_zigzag_scan_4x4_field_avx512( dctcoef level[16], dctcoef dct[16] );
void x264_zigzag_scan_8x8_field_xop ( int16_t level[64], int16_t dct[64] );
void x264_zigzag_scan_8x8_field_avx ( int32_t level[64], int32_t dct[64] );
void x264_zigzag_scan_8x8_field_sse4 ( int32_t level[64], int32_t dct[64] );
More information about the x264-devel
mailing list