[x265] [PATCH 303 of 307] X86:AVX512 intra_pred_ang16 mode 7 and 29 high bit depth
mythreyi at multicorewareinc.com
mythreyi at multicorewareinc.com
Sat Apr 7 04:35:01 CEST 2018
# HG changeset patch
# User Jayashree
# Date 1516212669 28800
# Wed Jan 17 10:11:09 2018 -0800
# Node ID f56354b2b542aaafa389a226f0fb3b41e4d33803
# Parent ae3f7bd65b45df716f1cd56b6b15d91643772621
X86:AVX512 intra_pred_ang16 mode 7 and 29 high bit depth
Mode | AVX2 performance | AVX512 performance
---------------------------------------------------
7 | 9.2x | 11.9x
29 | 17.3x | 24.30x
diff -r ae3f7bd65b45 -r f56354b2b542 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Wed Jan 17 10:11:09 2018 -0800
+++ b/source/common/x86/asm-primitives.cpp Wed Jan 17 10:11:09 2018 -0800
@@ -3127,6 +3127,8 @@
p.cu[BLOCK_16x16].intra_pred[32] = PFX(intra_pred_ang16_32_avx512);
p.cu[BLOCK_16x16].intra_pred[6] = PFX(intra_pred_ang16_6_avx512);
p.cu[BLOCK_16x16].intra_pred[30] = PFX(intra_pred_ang16_30_avx512);
+ p.cu[BLOCK_16x16].intra_pred[7] = PFX(intra_pred_ang16_7_avx512);
+ p.cu[BLOCK_16x16].intra_pred[29] = PFX(intra_pred_ang16_29_avx512);
p.pu[LUMA_64x64].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x64>;
p.pu[LUMA_64x48].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x48>;
p.pu[LUMA_64x32].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x32>;
diff -r ae3f7bd65b45 -r f56354b2b542 source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm Wed Jan 17 10:11:09 2018 -0800
+++ b/source/common/x86/intrapred16.asm Wed Jan 17 10:11:09 2018 -0800
@@ -20618,11 +20618,30 @@
mov r0, r5
call ang16_mode_7_29
-
add r2, 8
-
call ang32_mode_7_29
RET
+cglobal intra_pred_ang16_7, 3,7,17
+ add r2, 64
+ xor r6d, r6d
+ vbroadcasti32x8 m15, [pd_16]
+ lea r3, [ang_table_avx2 + 17 * 32]
+ add r1d, r1d
+ lea r4, [r1 * 3]
+
+ call ang16_mode_7_29
+ RET
+
+cglobal intra_pred_ang16_29, 3,7,17
+ xor r6d, r6d
+ inc r6d
+ vbroadcasti32x8 m15, [pd_16]
+ lea r3, [ang_table_avx2 + 17 * 32]
+ add r1d, r1d
+ lea r4, [r1 * 3]
+
+ call ang16_mode_7_29
+ RET
;-------------------------------------------------------------------------------------------------------
; avx512 code for intra_pred_ang32 mode 2 to 34 end
;-------------------------------------------------------------------------------------------------------
More information about the x265-devel
mailing list