[x265] [PATCH 287 of 307] x86: AVX512 intra_pred_ang16 mode 9 and 27 for high bit depth

mythreyi at multicorewareinc.com mythreyi at multicorewareinc.com
Sat Apr 7 04:34:45 CEST 2018


# HG changeset patch
# User Vignesh Vijayakumar<vignesh at multicorewareinc.com>
# Date 1515581418 -19800
#      Wed Jan 10 16:20:18 2018 +0530
# Node ID b31c44c9bd690a67837f857081f1a27fa5512d59
# Parent  59e596ff83801d7c3e3e01f6d6f64d26b2e8010f
x86: AVX512 intra_pred_ang16 mode 9 and 27 for high bit depth

AVX2 peformance    : 12.10x
AVX512 performance : 15.68x

diff -r 59e596ff8380 -r b31c44c9bd69 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Wed Jan 10 14:37:02 2018 +0530
+++ b/source/common/x86/asm-primitives.cpp	Wed Jan 10 16:20:18 2018 +0530
@@ -3102,6 +3102,8 @@
         p.cu[BLOCK_32x32].intra_pred[18]    = PFX(intra_pred_ang32_18_avx512);
         p.cu[BLOCK_32x32].intra_pred[26]    = PFX(intra_pred_ang32_26_avx512);
         p.cu[BLOCK_32x32].intra_pred[27]    = PFX(intra_pred_ang32_27_avx512);
+        p.cu[BLOCK_16x16].intra_pred[9]     = PFX(intra_pred_ang16_9_avx512);
+        p.cu[BLOCK_16x16].intra_pred[27]    = PFX(intra_pred_ang16_27_avx512);
 
         p.pu[LUMA_64x64].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x64>;
         p.pu[LUMA_64x48].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x48>;
diff -r 59e596ff8380 -r b31c44c9bd69 source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm	Wed Jan 10 14:37:02 2018 +0530
+++ b/source/common/x86/intrapred16.asm	Wed Jan 10 16:20:18 2018 +0530
@@ -18779,6 +18779,26 @@
     add         r2,        2
     call        ang16_mode_9_27
     RET
+
+cglobal intra_pred_ang16_9, 3,7,17
+    add         r2,        64
+    xor         r6d,       r6d
+    lea         r3,        [ang_table_avx2 + 16 * 32]
+    shl         r1d,       1
+    lea         r4,        [r1 * 3]
+    vbroadcasti32x8  m15,  [pd_16]
+    call        ang16_mode_9_27
+    RET
+
+cglobal intra_pred_ang16_27, 3,7,17
+    xor         r6d,       r6d
+    inc         r6d
+    lea         r3,        [ang_table_avx2 + 16 * 32]
+    shl         r1d,       1
+    lea         r4,        [r1 * 3]
+    vbroadcasti32x8  m15,  [pd_16]
+    call        ang16_mode_9_27
+    RET
 ;-------------------------------------------------------------------------------------------------------
 ; avx512 code for intra_pred_ang32 mode 2 to 34 end
 ;-------------------------------------------------------------------------------------------------------


More information about the x265-devel mailing list