[x264-devel] x86: AVX-512 pixel_avg_weight_w8

Henrik Gramner git at videolan.org
Mon Jun 26 21:59:14 CEST 2017


x264 | branch: master | Henrik Gramner <henrik at gramner.com> | Sat Jun 24 15:12:57 2017 +0200| [ba24899b0bf23345921da022f7a51e0c57dbe73d] | committer: Henrik Gramner

x86: AVX-512 pixel_avg_weight_w8

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=ba24899b0bf23345921da022f7a51e0c57dbe73d
---

 common/x86/mc-a.asm | 35 +++++++++++++++++++++++++++++++++++
 common/x86/mc-c.c   |  3 +++
 2 files changed, 38 insertions(+)

diff --git a/common/x86/mc-a.asm b/common/x86/mc-a.asm
index 2dbdee5d..3c1d2145 100644
--- a/common/x86/mc-a.asm
+++ b/common/x86/mc-a.asm
@@ -276,6 +276,38 @@ cglobal pixel_avg_weight_w16
     vextracti128 [t0+t1], m0, 1
     AVG_END
 
+INIT_YMM avx512
+cglobal pixel_avg_weight_w8
+    BIWEIGHT_START
+    kxnorb         k1, k1, k1
+    kaddb          k1, k1, k1
+    AVG_START 5
+.height_loop:
+    movq          xm0, [t2]
+    movq          xm2, [t4]
+    movq          xm1, [t2+t3]
+    movq          xm5, [t4+t5]
+    lea            t2, [t2+t3*2]
+    lea            t4, [t4+t5*2]
+    vpbroadcastq   m0 {k1}, [t2]
+    vpbroadcastq   m2 {k1}, [t4]
+    vpbroadcastq   m1 {k1}, [t2+t3]
+    vpbroadcastq   m5 {k1}, [t4+t5]
+    punpcklbw      m0, m2
+    punpcklbw      m1, m5
+    pmaddubsw      m0, m3
+    pmaddubsw      m1, m3
+    pmulhrsw       m0, m4
+    pmulhrsw       m1, m4
+    packuswb       m0, m1
+    vextracti128 xmm1, m0, 1
+    movq         [t0], xm0
+    movhps    [t0+t1], xm0
+    lea            t0, [t0+t1*2]
+    movq         [t0], xmm1
+    movhps    [t0+t1], xmm1
+    AVG_END 4
+
 INIT_ZMM avx512
 cglobal pixel_avg_weight_w16
     BIWEIGHT_START
@@ -776,6 +808,9 @@ AVGH 16,  8
 INIT_XMM avx512
 AVGH 16, 16
 AVGH 16,  8
+AVGH  8, 16
+AVGH  8,  8
+AVGH  8,  4
 
 %endif ;HIGH_BIT_DEPTH
 
diff --git a/common/x86/mc-c.c b/common/x86/mc-c.c
index 0a7e414c..c06691c9 100644
--- a/common/x86/mc-c.c
+++ b/common/x86/mc-c.c
@@ -871,6 +871,9 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
     {
         pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_avx512;
         pf->avg[PIXEL_16x8]  = x264_pixel_avg_16x8_avx512;
+        pf->avg[PIXEL_8x16]  = x264_pixel_avg_8x16_avx512;
+        pf->avg[PIXEL_8x8]   = x264_pixel_avg_8x8_avx512;
+        pf->avg[PIXEL_8x4]   = x264_pixel_avg_8x4_avx512;
     }
 #endif // HIGH_BIT_DEPTH
 



More information about the x264-devel mailing list