[x265] [PATCH 006 of 307] x86: AVX-512 pixel_avg_weight_w8
mythreyi at multicorewareinc.com
mythreyi at multicorewareinc.com
Sat Apr 7 04:30:04 CEST 2018
# HG changeset patch
# User Vignesh Vijayakumar
# Date 1498474862 -19800
# Mon Jun 26 16:31:02 2017 +0530
# Node ID c7b36dac20317b3819fb30cf437a029a2ce7ca99
# Parent 5309fe76c442d720d2d3419eefab11f2a1f9731a
x86: AVX-512 pixel_avg_weight_w8
diff -r 5309fe76c442 -r c7b36dac2031 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Mon Jun 26 16:21:18 2017 +0530
+++ b/source/common/x86/asm-primitives.cpp Mon Jun 26 16:31:02 2017 +0530
@@ -3756,6 +3756,9 @@
p.cu[BLOCK_16x16].var = PFX(pixel_var_16x16_avx512);
p.pu[LUMA_16x16].pixelavg_pp = PFX(pixel_avg_16x16_avx512);
p.pu[LUMA_16x8].pixelavg_pp = PFX(pixel_avg_16x8_avx512);
+ p.pu[LUMA_8x16].pixelavg_pp = PFX(pixel_avg_8x16_avx512);
+ p.pu[LUMA_8x8].pixelavg_pp = PFX(pixel_avg_8x8_avx512);
+ p.pu[LUMA_8x4].pixelavg_pp = PFX(pixel_avg_8x4_avx512);
}
#endif
}
diff -r 5309fe76c442 -r c7b36dac2031 source/common/x86/mc-a.asm
--- a/source/common/x86/mc-a.asm Mon Jun 26 16:21:18 2017 +0530
+++ b/source/common/x86/mc-a.asm Mon Jun 26 16:31:02 2017 +0530
@@ -3574,6 +3574,38 @@
AVG_WEIGHT 24, 7
AVG_WEIGHT 48, 7
+INIT_YMM avx512
+cglobal pixel_avg_weight_w8
+ BIWEIGHT_START
+ kxnorb k1, k1, k1
+ kaddb k1, k1, k1
+ AVG_START 5
+.height_loop:
+ movq xm0, [t2]
+ movq xm2, [t4]
+ movq xm1, [t2+t3]
+ movq xm5, [t4+t5]
+ lea t2, [t2+t3*2]
+ lea t4, [t4+t5*2]
+ vpbroadcastq m0 {k1}, [t2]
+ vpbroadcastq m2 {k1}, [t4]
+ vpbroadcastq m1 {k1}, [t2+t3]
+ vpbroadcastq m5 {k1}, [t4+t5]
+ punpcklbw m0, m2
+ punpcklbw m1, m5
+ pmaddubsw m0, m3
+ pmaddubsw m1, m3
+ pmulhrsw m0, m4
+ pmulhrsw m1, m4
+ packuswb m0, m1
+ vextracti128 xmm1, m0, 1
+ movq [t0], xm0
+ movhps [t0+t1], xm0
+ lea t0, [t0+t1*2]
+ movq [t0], xmm1
+ movhps [t0+t1], xmm1
+ AVG_END 4
+
INIT_YMM avx2
cglobal pixel_avg_weight_w16
BIWEIGHT_START
@@ -4383,6 +4415,9 @@
INIT_XMM avx512
AVGH 16, 16
AVGH 16, 8
+AVGH 8, 16
+AVGH 8, 8
+AVGH 8, 4
%endif ;HIGH_BIT_DEPTH
More information about the x265-devel
mailing list