[x265] [PATCH 301 of 307] x86: AVX512 'count_nonzero_32x32' avx-512 kernel

mythreyi at multicorewareinc.com mythreyi at multicorewareinc.com
Sat Apr 7 04:34:59 CEST 2018


# HG changeset patch
# User Jayashree
# Date 1517285149 28800
#      Mon Jan 29 20:05:49 2018 -0800
# Node ID 3a08a957d4cd2bf0eb57524651a824513378e0a3
# Parent  3c6e5ce07dbca7f967e4b5b62fe450979da3bf81
x86: AVX512 'count_nonzero_32x32' avx-512 kernel

diff -r 3c6e5ce07dbc -r 3a08a957d4cd source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Mon Jan 29 19:38:59 2018 -0800
+++ b/source/common/x86/asm-primitives.cpp	Mon Jan 29 20:05:49 2018 -0800
@@ -5376,6 +5376,7 @@
         p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].satd = PFX(pixel_satd_32x16_avx512);
         p.planecopy_sp_shl = PFX(upShift_16_avx512);
         p.cu[BLOCK_16x16].count_nonzero = PFX(count_nonzero_16x16_avx512);
+        p.cu[BLOCK_32x32].count_nonzero = PFX(count_nonzero_32x32_avx512);
 
     }
 #endif
diff -r 3c6e5ce07dbc -r 3a08a957d4cd source/common/x86/pixel-util.h
--- a/source/common/x86/pixel-util.h	Mon Jan 29 19:38:59 2018 -0800
+++ b/source/common/x86/pixel-util.h	Mon Jan 29 20:05:49 2018 -0800
@@ -62,5 +62,6 @@
 uint32_t PFX(costCoeffNxN_avx2_bmi2(const uint16_t *scan, const coeff_t *coeff, intptr_t trSize, uint16_t *absCoeff, const uint8_t *tabSigCtx, uint32_t scanFlagMask, uint8_t *baseCtx, int offset, int scanPosSigOff, int subPosBase));
 
 int  PFX(count_nonzero_16x16_avx512(const int16_t* quantCoeff));
+int  PFX(count_nonzero_32x32_avx512(const int16_t* quantCoeff));
 
 #endif // ifndef X265_PIXEL_UTIL_H
diff -r 3c6e5ce07dbc -r 3a08a957d4cd source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm	Mon Jan 29 19:38:59 2018 -0800
+++ b/source/common/x86/pixel-util8.asm	Mon Jan 29 20:05:49 2018 -0800
@@ -1932,6 +1932,30 @@
     RET
 
 
+;-----------------------------------------------------------------------------
+; int x265_count_nonzero_32x32_avx512(const int16_t *quantCoeff);
+;-----------------------------------------------------------------------------
+INIT_ZMM avx512
+cglobal count_nonzero_32x32, 1,4,2
+    mov             r1, 0xFFFFFFFFFFFFFFFF
+    kmovq           k2, r1
+    xor             r3, r3
+    pxor            m0, m0
+
+%assign x 0
+%rep 16
+    movu            m1, [r0 + x]
+    vpacksswb       m1, [r0 + x + 64]
+%assign x x+128
+    vpcmpb          k1 {k2}, m1, m0, 00000100b
+    kmovq           r1, k1
+    popcnt          r2, r1
+    add             r3d, r2d
+%endrep
+    mov             eax, r3d
+
+    RET
+
 ;-----------------------------------------------------------------------------------------------------------------------------------------------
 ;void weight_pp(pixel *src, pixel *dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset)
 ;-----------------------------------------------------------------------------------------------------------------------------------------------


More information about the x265-devel mailing list