[x265] [PATCH 287 of 307] x86: AVX512 intra_pred_ang16 mode 9 and 27 for high bit depth
mythreyi at multicorewareinc.com
mythreyi at multicorewareinc.com
Sat Apr 7 04:34:45 CEST 2018
# HG changeset patch
# User Vignesh Vijayakumar<vignesh at multicorewareinc.com>
# Date 1515581418 -19800
# Wed Jan 10 16:20:18 2018 +0530
# Node ID b31c44c9bd690a67837f857081f1a27fa5512d59
# Parent 59e596ff83801d7c3e3e01f6d6f64d26b2e8010f
x86: AVX512 intra_pred_ang16 mode 9 and 27 for high bit depth
AVX2 peformance : 12.10x
AVX512 performance : 15.68x
diff -r 59e596ff8380 -r b31c44c9bd69 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Wed Jan 10 14:37:02 2018 +0530
+++ b/source/common/x86/asm-primitives.cpp Wed Jan 10 16:20:18 2018 +0530
@@ -3102,6 +3102,8 @@
p.cu[BLOCK_32x32].intra_pred[18] = PFX(intra_pred_ang32_18_avx512);
p.cu[BLOCK_32x32].intra_pred[26] = PFX(intra_pred_ang32_26_avx512);
p.cu[BLOCK_32x32].intra_pred[27] = PFX(intra_pred_ang32_27_avx512);
+ p.cu[BLOCK_16x16].intra_pred[9] = PFX(intra_pred_ang16_9_avx512);
+ p.cu[BLOCK_16x16].intra_pred[27] = PFX(intra_pred_ang16_27_avx512);
p.pu[LUMA_64x64].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x64>;
p.pu[LUMA_64x48].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x48>;
diff -r 59e596ff8380 -r b31c44c9bd69 source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm Wed Jan 10 14:37:02 2018 +0530
+++ b/source/common/x86/intrapred16.asm Wed Jan 10 16:20:18 2018 +0530
@@ -18779,6 +18779,26 @@
add r2, 2
call ang16_mode_9_27
RET
+
+cglobal intra_pred_ang16_9, 3,7,17
+ add r2, 64
+ xor r6d, r6d
+ lea r3, [ang_table_avx2 + 16 * 32]
+ shl r1d, 1
+ lea r4, [r1 * 3]
+ vbroadcasti32x8 m15, [pd_16]
+ call ang16_mode_9_27
+ RET
+
+cglobal intra_pred_ang16_27, 3,7,17
+ xor r6d, r6d
+ inc r6d
+ lea r3, [ang_table_avx2 + 16 * 32]
+ shl r1d, 1
+ lea r4, [r1 * 3]
+ vbroadcasti32x8 m15, [pd_16]
+ call ang16_mode_9_27
+ RET
;-------------------------------------------------------------------------------------------------------
; avx512 code for intra_pred_ang32 mode 2 to 34 end
;-------------------------------------------------------------------------------------------------------
More information about the x265-devel
mailing list