[x265] [PATCH 006 of 307] x86: AVX-512 pixel_avg_weight_w8

mythreyi at multicorewareinc.com mythreyi at multicorewareinc.com
Sat Apr 7 04:30:04 CEST 2018


# HG changeset patch
# User Vignesh Vijayakumar
# Date 1498474862 -19800
#      Mon Jun 26 16:31:02 2017 +0530
# Node ID c7b36dac20317b3819fb30cf437a029a2ce7ca99
# Parent  5309fe76c442d720d2d3419eefab11f2a1f9731a
x86: AVX-512 pixel_avg_weight_w8

diff -r 5309fe76c442 -r c7b36dac2031 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Mon Jun 26 16:21:18 2017 +0530
+++ b/source/common/x86/asm-primitives.cpp	Mon Jun 26 16:31:02 2017 +0530
@@ -3756,6 +3756,9 @@
         p.cu[BLOCK_16x16].var = PFX(pixel_var_16x16_avx512);
         p.pu[LUMA_16x16].pixelavg_pp = PFX(pixel_avg_16x16_avx512);
         p.pu[LUMA_16x8].pixelavg_pp = PFX(pixel_avg_16x8_avx512);
+        p.pu[LUMA_8x16].pixelavg_pp = PFX(pixel_avg_8x16_avx512);
+        p.pu[LUMA_8x8].pixelavg_pp = PFX(pixel_avg_8x8_avx512);
+        p.pu[LUMA_8x4].pixelavg_pp = PFX(pixel_avg_8x4_avx512);
     }
 #endif
 }
diff -r 5309fe76c442 -r c7b36dac2031 source/common/x86/mc-a.asm
--- a/source/common/x86/mc-a.asm	Mon Jun 26 16:21:18 2017 +0530
+++ b/source/common/x86/mc-a.asm	Mon Jun 26 16:31:02 2017 +0530
@@ -3574,6 +3574,38 @@
 AVG_WEIGHT 24, 7
 AVG_WEIGHT 48, 7
 
+INIT_YMM avx512
+cglobal pixel_avg_weight_w8
+    BIWEIGHT_START
+    kxnorb         k1, k1, k1
+    kaddb          k1, k1, k1
+    AVG_START 5
+.height_loop:
+    movq          xm0, [t2]
+    movq          xm2, [t4]
+    movq          xm1, [t2+t3]
+    movq          xm5, [t4+t5]
+    lea            t2, [t2+t3*2]
+    lea            t4, [t4+t5*2]
+    vpbroadcastq   m0 {k1}, [t2]
+    vpbroadcastq   m2 {k1}, [t4]
+    vpbroadcastq   m1 {k1}, [t2+t3]
+    vpbroadcastq   m5 {k1}, [t4+t5]
+    punpcklbw      m0, m2
+    punpcklbw      m1, m5
+    pmaddubsw      m0, m3
+    pmaddubsw      m1, m3
+    pmulhrsw       m0, m4
+    pmulhrsw       m1, m4
+    packuswb       m0, m1
+    vextracti128 xmm1, m0, 1
+    movq         [t0], xm0
+    movhps    [t0+t1], xm0
+    lea            t0, [t0+t1*2]
+    movq         [t0], xmm1
+    movhps    [t0+t1], xmm1
+    AVG_END 4
+
 INIT_YMM avx2
 cglobal pixel_avg_weight_w16
     BIWEIGHT_START
@@ -4383,6 +4415,9 @@
 INIT_XMM avx512
 AVGH 16, 16
 AVGH 16,  8
+AVGH  8, 16
+AVGH  8,  8
+AVGH  8,  4
 
 %endif ;HIGH_BIT_DEPTH
 


More information about the x265-devel mailing list