[x265] [PATCH 293 of 307] x86 : AVX512 intra_pred_ang16 mode 5 and 31 high bit depth

mythreyi at multicorewareinc.com mythreyi at multicorewareinc.com
Sat Apr 7 04:34:51 CEST 2018


# HG changeset patch
# User Jayashree
# Date 1515992814 -19800
#      Mon Jan 15 10:36:54 2018 +0530
# Node ID 3a310b157fdf345023ff4e96e7de316cee79b954
# Parent  c1daa99a8c14edbe5e9e5a59a74a6b0936c27a82
x86 : AVX512 intra_pred_ang16 mode 5 and 31 high bit depth
Mode | AVX2 performance | AVX512 performance
---------------------------------------------------
 5   |    10.5x   |      16.61x
 31  |    12.26x  |      20.3x

diff -r c1daa99a8c14 -r 3a310b157fdf source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Mon Jan 15 09:53:46 2018 +0530
+++ b/source/common/x86/asm-primitives.cpp	Mon Jan 15 10:36:54 2018 +0530
@@ -3111,7 +3111,8 @@
         p.cu[BLOCK_16x16].intra_pred[11]    = PFX(intra_pred_ang16_11_avx512);
         p.cu[BLOCK_16x16].intra_pred[25]    = PFX(intra_pred_ang16_25_avx512);
         p.cu[BLOCK_16x16].intra_pred[27]    = PFX(intra_pred_ang16_27_avx512);
-
+        p.cu[BLOCK_16x16].intra_pred[5] = PFX(intra_pred_ang16_5_avx512);
+        p.cu[BLOCK_16x16].intra_pred[31] = PFX(intra_pred_ang16_31_avx512);
         p.pu[LUMA_64x64].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x64>;
         p.pu[LUMA_64x48].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x48>;
         p.pu[LUMA_64x32].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x32>;
diff -r c1daa99a8c14 -r 3a310b157fdf source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm	Mon Jan 15 09:53:46 2018 +0530
+++ b/source/common/x86/intrapred16.asm	Mon Jan 15 10:36:54 2018 +0530
@@ -19283,10 +19283,29 @@
     call        ang16_mode_5_31
 
     add         r2,        18
-
     call        ang32_mode_5_31
     RET
-
+cglobal intra_pred_ang16_5, 3,7,13
+    add         r2,        64
+    xor         r6d,       r6d
+    vbroadcasti32x8  m15,  [pd_16]
+    lea         r3,        [ang_table_avx2 + 16 * 32]
+    add         r1d,       r1d
+    lea         r4,        [r1 * 3]
+
+    call        ang16_mode_5_31
+    RET
+
+cglobal intra_pred_ang16_31, 3,7,13
+    xor         r6d,       r6d
+    inc         r6d
+    vbroadcasti32x8  m15,  [pd_16]
+    lea         r3,        [ang_table_avx2 + 16 * 32]
+    add         r1d,       r1d
+    lea         r4,        [r1 * 3]
+
+    call        ang16_mode_5_31
+    RET
 ;-------------------------------------------------------------------------------------------------------
 ; avx512 code for intra_pred_ang32 mode 2 to 34 end
 ;-------------------------------------------------------------------------------------------------------


More information about the x265-devel mailing list