[x265] [PATCH 062 of 307] x86: AVX512 pixel_avg_weight_W32 for high bit depth

mythreyi at multicorewareinc.com mythreyi at multicorewareinc.com
Sat Apr 7 04:31:00 CEST 2018


# HG changeset patch
# User Vignesh Vijayakumar
# Date 1501592257 -19800
#      Tue Aug 01 18:27:37 2017 +0530
# Node ID ef7fd93923fa24a8f77a557817b03078356443e7
# Parent  465b4925d622ba66e2536c9f79eaaffcdd26d5fc
x86: AVX512 pixel_avg_weight_W32 for high bit depth

Size  |  AVX2 performance | AVX512 performance
----------------------------------------------
32x8  |     11.23x        |      15.70x
32x16 |     10.88x        |      19.51x
32x24 |     10.90x        |      20.04x
32x32 |     11.78x        |      20.37x
32x64 |     11.38x        |      20.30x

diff -r 465b4925d622 -r ef7fd93923fa source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Tue Aug 01 17:21:50 2017 +0530
+++ b/source/common/x86/asm-primitives.cpp	Tue Aug 01 18:27:37 2017 +0530
@@ -2296,6 +2296,12 @@
         p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].addAvg = PFX(addAvg_32x48_avx512);
         p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].addAvg = PFX(addAvg_32x64_avx512);
 
+        p.pu[LUMA_32x8].pixelavg_pp = PFX(pixel_avg_32x8_avx512);
+        p.pu[LUMA_32x16].pixelavg_pp = PFX(pixel_avg_32x16_avx512);
+        p.pu[LUMA_32x24].pixelavg_pp = PFX(pixel_avg_32x24_avx512);
+        p.pu[LUMA_32x32].pixelavg_pp = PFX(pixel_avg_32x32_avx512);
+        p.pu[LUMA_32x64].pixelavg_pp = PFX(pixel_avg_32x64_avx512);
+
     }
 }
 #else // if HIGH_BIT_DEPTH
diff -r 465b4925d622 -r ef7fd93923fa source/common/x86/mc-a.asm
--- a/source/common/x86/mc-a.asm	Tue Aug 01 17:21:50 2017 +0530
+++ b/source/common/x86/mc-a.asm	Tue Aug 01 18:27:37 2017 +0530
@@ -5631,6 +5631,79 @@
     RET
 %endif
 
+;-----------------------------------------------------------------------------
+;pixel_avg_pp avx512 high bit depth code start
+;-----------------------------------------------------------------------------
+%macro PROCESS_PIXELAVG_32x8_HBD_AVX512 0
+    movu    m0, [r2]
+    movu    m1, [r4]
+    movu    m2, [r2 + r3]
+    movu    m3, [r4 + r5]
+    pavgw   m0, m1
+    pavgw   m2, m3
+    movu    [r0], m0
+    movu    [r0 + r1], m2
+
+    movu    m0, [r2 + r3 * 2]
+    movu    m1, [r4 + r5 * 2]
+    movu    m2, [r2 + r6]
+    movu    m3, [r4 + r7]
+    pavgw   m0, m1
+    pavgw   m2, m3
+    movu    [r0 + r1 * 2], m0
+    movu    [r0 + r8], m2
+
+    lea     r0, [r0 + 4 * r1]
+    lea     r2, [r2 + 4 * r3]
+    lea     r4, [r4 + 4 * r5]
+
+    movu    m0, [r2]
+    movu    m1, [r4]
+    movu    m2, [r2 + r3]
+    movu    m3, [r4 + r5]
+    pavgw   m0, m1
+    pavgw   m2, m3
+    movu    [r0], m0
+    movu    [r0 + r1], m2
+
+    movu    m0, [r2 + r3 * 2]
+    movu    m1, [r4 + r5 * 2]
+    movu    m2, [r2 + r6]
+    movu    m3, [r4 + r7]
+    pavgw   m0, m1
+    pavgw   m2, m3
+    movu    [r0 + r1 * 2], m0
+    movu    [r0 + r8], m2
+%endmacro
+
+%macro PIXEL_AVG_HBD_W32 1
+INIT_ZMM avx512
+cglobal pixel_avg_32x%1, 6,9,4
+    add     r1d, r1d
+    add     r3d, r3d
+    add     r5d, r5d
+    lea     r6, [r3 * 3]
+    lea     r7, [r5 * 3]
+    lea     r8, [r1 * 3]
+
+%rep %1/8 - 1
+    PROCESS_PIXELAVG_32x8_HBD_AVX512
+    lea     r0, [r0 + 4 * r1]
+    lea     r2, [r2 + 4 * r3]
+    lea     r4, [r4 + 4 * r5]
+%endrep
+    PROCESS_PIXELAVG_32x8_HBD_AVX512
+    RET
+%endmacro
+
+PIXEL_AVG_HBD_W32 8
+PIXEL_AVG_HBD_W32 16
+PIXEL_AVG_HBD_W32 24
+PIXEL_AVG_HBD_W32 32
+PIXEL_AVG_HBD_W32 64
+;-----------------------------------------------------------------------------
+;pixel_avg_pp avx512 high bit depth code end
+;-----------------------------------------------------------------------------
 %endif ; HIGH_BIT_DEPTH
 
 %if HIGH_BIT_DEPTH == 0


More information about the x265-devel mailing list