[x265] [PATCH 305 of 307] X86: AVX512 intra_pred_ang16 mode 8 and 28 high bit depth
mythreyi at multicorewareinc.com
mythreyi at multicorewareinc.com
Sat Apr 7 04:35:03 CEST 2018
# HG changeset patch
# User Jayashri Murugan <jayashri at multicorewareinc.com>
# Date 1517294626 -19800
# Tue Jan 30 12:13:46 2018 +0530
# Node ID b80e844209ecd0abc896df94306a5ef96b27b918
# Parent e82bfd58acb99cd4c2e4767b1afdd3750881a68e
X86: AVX512 intra_pred_ang16 mode 8 and 28 high bit depth
Mode | AVX2 performance | AVX512 performance
---------------------------------------------------
8 | 9.31x | 10.78x
28 | 12.80x | 15.21x
diff -r e82bfd58acb9 -r b80e844209ec source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Fri Jan 19 16:56:49 2018 +0530
+++ b/source/common/x86/asm-primitives.cpp Tue Jan 30 12:13:46 2018 +0530
@@ -3113,14 +3113,14 @@
p.cu[BLOCK_32x32].intra_pred[6] = PFX(intra_pred_ang32_6_avx512);
p.cu[BLOCK_32x32].intra_pred[29] = PFX(intra_pred_ang32_29_avx512);
p.cu[BLOCK_32x32].intra_pred[7] = PFX(intra_pred_ang32_7_avx512);
-
+ p.cu[BLOCK_32x32].intra_pred[8] = PFX(intra_pred_ang32_8_avx512);
+ p.cu[BLOCK_32x32].intra_pred[28] = PFX(intra_pred_ang32_28_avx512);
p.cu[BLOCK_16x16].intra_pred[9] = PFX(intra_pred_ang16_9_avx512);
p.cu[BLOCK_16x16].intra_pred[11] = PFX(intra_pred_ang16_11_avx512);
p.cu[BLOCK_16x16].intra_pred[25] = PFX(intra_pred_ang16_25_avx512);
p.cu[BLOCK_16x16].intra_pred[27] = PFX(intra_pred_ang16_27_avx512);
- p.cu[BLOCK_32x32].intra_pred[8] = PFX(intra_pred_ang32_8_avx512);
- p.cu[BLOCK_32x32].intra_pred[28] = PFX(intra_pred_ang32_28_avx512);
-
+ p.cu[BLOCK_16x16].intra_pred[8] = PFX(intra_pred_ang16_8_avx512);
+ p.cu[BLOCK_16x16].intra_pred[28] = PFX(intra_pred_ang16_28_avx512);
p.cu[BLOCK_16x16].intra_pred[5] = PFX(intra_pred_ang16_5_avx512);
p.cu[BLOCK_16x16].intra_pred[31] = PFX(intra_pred_ang16_31_avx512);
p.cu[BLOCK_16x16].intra_pred[4] = PFX(intra_pred_ang16_4_avx512);
diff -r e82bfd58acb9 -r b80e844209ec source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm Fri Jan 19 16:56:49 2018 +0530
+++ b/source/common/x86/intrapred16.asm Tue Jan 30 12:13:46 2018 +0530
@@ -11843,6 +11843,27 @@
packusdw m11, m3
TRANSPOSE_STORE_AVX2 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 16
ret
+cglobal intra_pred_ang16_8, 3,7,16
+ add r2, 64
+ xor r6d, r6d
+ lea r3, [ang_table_avx2 + 15 * 32]
+ add r1d, r1d
+ lea r4, [r1 * 3]
+ vbroadcasti32x8 m15, [pd_16]
+
+ call ang16_mode_8_28
+ RET
+
+cglobal intra_pred_ang16_28, 3,7,16
+ xor r6d, r6d
+ inc r6d
+ lea r3, [ang_table_avx2 + 15 * 32]
+ add r1d, r1d
+ lea r4, [r1 * 3]
+ vbroadcasti32x8 m15, [pd_16]
+
+ call ang16_mode_8_28
+ RET
;; angle 16, modes 7 and 29
cglobal ang16_mode_7_29
More information about the x265-devel
mailing list