[x265] [PATCH 303 of 307] X86:AVX512 intra_pred_ang16 mode 7 and 29 high bit depth

mythreyi at multicorewareinc.com mythreyi at multicorewareinc.com
Sat Apr 7 04:35:01 CEST 2018


# HG changeset patch
# User Jayashree
# Date 1516212669 28800
#      Wed Jan 17 10:11:09 2018 -0800
# Node ID f56354b2b542aaafa389a226f0fb3b41e4d33803
# Parent  ae3f7bd65b45df716f1cd56b6b15d91643772621
X86:AVX512 intra_pred_ang16 mode 7 and 29 high bit depth
Mode | AVX2 performance | AVX512 performance
---------------------------------------------------
 7   |    9.2x          |     11.9x
 29  |    17.3x         |     24.30x

diff -r ae3f7bd65b45 -r f56354b2b542 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Wed Jan 17 10:11:09 2018 -0800
+++ b/source/common/x86/asm-primitives.cpp	Wed Jan 17 10:11:09 2018 -0800
@@ -3127,6 +3127,8 @@
         p.cu[BLOCK_16x16].intra_pred[32] = PFX(intra_pred_ang16_32_avx512);
         p.cu[BLOCK_16x16].intra_pred[6] = PFX(intra_pred_ang16_6_avx512);
         p.cu[BLOCK_16x16].intra_pred[30] = PFX(intra_pred_ang16_30_avx512);
+        p.cu[BLOCK_16x16].intra_pred[7] = PFX(intra_pred_ang16_7_avx512);
+        p.cu[BLOCK_16x16].intra_pred[29] = PFX(intra_pred_ang16_29_avx512);
         p.pu[LUMA_64x64].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x64>;
         p.pu[LUMA_64x48].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x48>;
         p.pu[LUMA_64x32].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x32>;
diff -r ae3f7bd65b45 -r f56354b2b542 source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm	Wed Jan 17 10:11:09 2018 -0800
+++ b/source/common/x86/intrapred16.asm	Wed Jan 17 10:11:09 2018 -0800
@@ -20618,11 +20618,30 @@
     mov         r0,        r5
 
     call        ang16_mode_7_29
-
     add         r2,        8
-
     call        ang32_mode_7_29
     RET
+cglobal intra_pred_ang16_7, 3,7,17
+    add         r2,        64
+    xor         r6d,       r6d
+    vbroadcasti32x8  m15,  [pd_16]
+    lea         r3,        [ang_table_avx2 + 17 * 32]
+    add         r1d,       r1d
+    lea         r4,        [r1 * 3]
+
+    call        ang16_mode_7_29
+    RET
+
+cglobal intra_pred_ang16_29, 3,7,17
+    xor         r6d,       r6d
+    inc         r6d
+    vbroadcasti32x8  m15,  [pd_16]
+    lea         r3,        [ang_table_avx2 + 17 * 32]
+    add         r1d,       r1d
+    lea         r4,        [r1 * 3]
+
+    call        ang16_mode_7_29
+    RET
 ;-------------------------------------------------------------------------------------------------------
 ; avx512 code for intra_pred_ang32 mode 2 to 34 end
 ;-------------------------------------------------------------------------------------------------------


More information about the x265-devel mailing list