[x265] [PATCH 305 of 307] X86: AVX512 intra_pred_ang16 mode 8 and 28 high bit depth

mythreyi at multicorewareinc.com mythreyi at multicorewareinc.com
Sat Apr 7 04:35:03 CEST 2018


# HG changeset patch
# User Jayashri Murugan <jayashri at multicorewareinc.com>
# Date 1517294626 -19800
#      Tue Jan 30 12:13:46 2018 +0530
# Node ID b80e844209ecd0abc896df94306a5ef96b27b918
# Parent  e82bfd58acb99cd4c2e4767b1afdd3750881a68e
X86: AVX512 intra_pred_ang16 mode 8 and 28 high bit depth

Mode | AVX2 performance | AVX512 performance
---------------------------------------------------
 8   |    9.31x         |    10.78x
 28  |    12.80x        |    15.21x

diff -r e82bfd58acb9 -r b80e844209ec source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Fri Jan 19 16:56:49 2018 +0530
+++ b/source/common/x86/asm-primitives.cpp	Tue Jan 30 12:13:46 2018 +0530
@@ -3113,14 +3113,14 @@
         p.cu[BLOCK_32x32].intra_pred[6] = PFX(intra_pred_ang32_6_avx512);
         p.cu[BLOCK_32x32].intra_pred[29] = PFX(intra_pred_ang32_29_avx512);
         p.cu[BLOCK_32x32].intra_pred[7] = PFX(intra_pred_ang32_7_avx512);
-
+        p.cu[BLOCK_32x32].intra_pred[8]    = PFX(intra_pred_ang32_8_avx512);
+        p.cu[BLOCK_32x32].intra_pred[28]    = PFX(intra_pred_ang32_28_avx512);
         p.cu[BLOCK_16x16].intra_pred[9]     = PFX(intra_pred_ang16_9_avx512);
         p.cu[BLOCK_16x16].intra_pred[11]    = PFX(intra_pred_ang16_11_avx512);
         p.cu[BLOCK_16x16].intra_pred[25]    = PFX(intra_pred_ang16_25_avx512);
         p.cu[BLOCK_16x16].intra_pred[27]    = PFX(intra_pred_ang16_27_avx512);
-        p.cu[BLOCK_32x32].intra_pred[8]    = PFX(intra_pred_ang32_8_avx512);
-        p.cu[BLOCK_32x32].intra_pred[28]    = PFX(intra_pred_ang32_28_avx512);
-
+        p.cu[BLOCK_16x16].intra_pred[8]     = PFX(intra_pred_ang16_8_avx512);
+        p.cu[BLOCK_16x16].intra_pred[28]    = PFX(intra_pred_ang16_28_avx512);
         p.cu[BLOCK_16x16].intra_pred[5] = PFX(intra_pred_ang16_5_avx512);
         p.cu[BLOCK_16x16].intra_pred[31] = PFX(intra_pred_ang16_31_avx512);
         p.cu[BLOCK_16x16].intra_pred[4] = PFX(intra_pred_ang16_4_avx512);
diff -r e82bfd58acb9 -r b80e844209ec source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm	Fri Jan 19 16:56:49 2018 +0530
+++ b/source/common/x86/intrapred16.asm	Tue Jan 30 12:13:46 2018 +0530
@@ -11843,6 +11843,27 @@
     packusdw        m11, m3
     TRANSPOSE_STORE_AVX2 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 16
     ret
+cglobal intra_pred_ang16_8, 3,7,16
+    add         r2,        64
+    xor         r6d,       r6d
+    lea         r3,        [ang_table_avx2 + 15 * 32]
+    add         r1d,       r1d
+    lea         r4,        [r1 * 3]
+    vbroadcasti32x8  m15,  [pd_16]
+
+    call        ang16_mode_8_28
+    RET
+
+cglobal intra_pred_ang16_28, 3,7,16
+    xor         r6d,       r6d
+    inc         r6d
+    lea         r3,        [ang_table_avx2 + 15 * 32]
+    add         r1d,       r1d
+    lea         r4,        [r1 * 3]
+    vbroadcasti32x8  m15,  [pd_16]
+
+    call        ang16_mode_8_28
+    RET
 
 ;; angle 16, modes 7 and 29
 cglobal ang16_mode_7_29


More information about the x265-devel mailing list