[x264-devel] x86: AVX-512 zigzag_scan_4x4_field

Henrik Gramner git at videolan.org
Mon May 22 00:03:07 CEST 2017


x264 | branch: master | Henrik Gramner <henrik at gramner.com> | Sat Mar 25 22:13:22 2017 +0100| [77b9a818fc622d0cdaa96aeb37339fbd5b1ef857] | committer: Henrik Gramner

x86: AVX-512 zigzag_scan_4x4_field

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=77b9a818fc622d0cdaa96aeb37339fbd5b1ef857
---

 common/dct.c         |  2 ++
 common/x86/dct-a.asm | 14 ++++++++++++++
 common/x86/dct.h     |  5 +++--
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/common/dct.c b/common/dct.c
index 5c1b8b5c..1be89350 100644
--- a/common/dct.c
+++ b/common/dct.c
@@ -988,6 +988,7 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zig
 #endif // ARCH_X86_64
     if( cpu&X264_CPU_AVX512 )
     {
+        pf_interlaced->scan_4x4  = x264_zigzag_scan_4x4_field_avx512;
         pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_avx512;
         pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_avx512;
     }
@@ -1033,6 +1034,7 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zig
     }
     if( cpu&X264_CPU_AVX512 )
     {
+        pf_interlaced->scan_4x4  = x264_zigzag_scan_4x4_field_avx512;
         pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_avx512;
         pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_avx512;
     }
diff --git a/common/x86/dct-a.asm b/common/x86/dct-a.asm
index a9b853c4..5a4f316a 100644
--- a/common/x86/dct-a.asm
+++ b/common/x86/dct-a.asm
@@ -1906,6 +1906,13 @@ cglobal zigzag_scan_4x4_frame, 2,2
     mova      [r0], m0
     RET
 
+cglobal zigzag_scan_4x4_field, 2,2
+    mova        m0, [r1]
+    pshufd    xmm1, [r1+8], q3102
+    mova      [r0], m0
+    movu    [r0+8], xmm1
+    RET
+
 cglobal zigzag_scan_8x8_frame, 2,2
     psrld       m0, [scan_frame_avx512], 4
     mova        m1, [r1+0*64]
@@ -1944,6 +1951,13 @@ cglobal zigzag_scan_4x4_frame, 2,2
     mova      [r0], m0
     RET
 
+cglobal zigzag_scan_4x4_field, 2,2
+    mova        m0, [r1]
+    pshuflw   xmm1, [r1+4], q3102
+    mova      [r0], m0
+    movq    [r0+4], xmm1
+    RET
+
 INIT_ZMM avx512
 cglobal zigzag_scan_8x8_frame, 2,2
     psrlw       m0, [scan_frame_avx512], 4
diff --git a/common/x86/dct.h b/common/x86/dct.h
index 6254368b..1a5c75c4 100644
--- a/common/x86/dct.h
+++ b/common/x86/dct.h
@@ -113,8 +113,9 @@ void x264_zigzag_scan_4x4_frame_ssse3 ( int16_t level[16], int16_t dct[16] );
 void x264_zigzag_scan_4x4_frame_avx   ( dctcoef level[16], dctcoef dct[16] );
 void x264_zigzag_scan_4x4_frame_xop   ( dctcoef level[16], dctcoef dct[16] );
 void x264_zigzag_scan_4x4_frame_avx512( dctcoef level[16], dctcoef dct[16] );
-void x264_zigzag_scan_4x4_field_sse2 ( int32_t level[16], int32_t dct[16] );
-void x264_zigzag_scan_4x4_field_sse  ( int16_t level[16], int16_t dct[16] );
+void x264_zigzag_scan_4x4_field_sse   ( int16_t level[16], int16_t dct[16] );
+void x264_zigzag_scan_4x4_field_sse2  ( int32_t level[16], int32_t dct[16] );
+void x264_zigzag_scan_4x4_field_avx512( dctcoef level[16], dctcoef dct[16] );
 void x264_zigzag_scan_8x8_field_xop  ( int16_t level[64], int16_t dct[64] );
 void x264_zigzag_scan_8x8_field_avx  ( int32_t level[64], int32_t dct[64] );
 void x264_zigzag_scan_8x8_field_sse4 ( int32_t level[64], int32_t dct[64] );



More information about the x264-devel mailing list