[x265] [PATCH 296 of 307] x86 : AVX512 intra_pred_ang16 mode 4 and 32 high bit depth
mythreyi at multicorewareinc.com
mythreyi at multicorewareinc.com
Sat Apr 7 04:34:54 CEST 2018
# HG changeset patch
# User Jayashree
# Date 1516014532 -19800
# Mon Jan 15 16:38:52 2018 +0530
# Node ID ab1c3fd4fc64e0239e4e632f03cd616e857718ed
# Parent a2b347ed81f90ac82f59d891deba7fa876df7f62
x86 : AVX512 intra_pred_ang16 mode 4 and 32 high bit depth
Mode | AVX2 performance | AVX512 performance
---------------------------------------------------
4 | 9.5x | 14.2x
32 | 12.x | 19.7x
diff -r a2b347ed81f9 -r ab1c3fd4fc64 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Mon Jan 15 17:52:26 2018 +0530
+++ b/source/common/x86/asm-primitives.cpp Mon Jan 15 16:38:52 2018 +0530
@@ -3116,6 +3116,9 @@
p.cu[BLOCK_16x16].intra_pred[27] = PFX(intra_pred_ang16_27_avx512);
p.cu[BLOCK_16x16].intra_pred[5] = PFX(intra_pred_ang16_5_avx512);
p.cu[BLOCK_16x16].intra_pred[31] = PFX(intra_pred_ang16_31_avx512);
+ p.cu[BLOCK_16x16].intra_pred[4] = PFX(intra_pred_ang16_4_avx512);
+ p.cu[BLOCK_16x16].intra_pred[32] = PFX(intra_pred_ang16_32_avx512);
+
p.pu[LUMA_64x64].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x64>;
p.pu[LUMA_64x48].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x48>;
p.pu[LUMA_64x32].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x32>;
diff -r a2b347ed81f9 -r ab1c3fd4fc64 source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm Mon Jan 15 17:52:26 2018 +0530
+++ b/source/common/x86/intrapred16.asm Mon Jan 15 16:38:52 2018 +0530
@@ -19659,6 +19659,27 @@
call ang32_mode_4_32
RET
+cglobal intra_pred_ang16_4, 3,7,13
+ add r2, 64
+ xor r6d, r6d
+ vbroadcasti32x8 m15, [pd_16]
+ lea r3, [ang_table_avx2 + 18 * 32]
+ add r1d, r1d
+ lea r4, [r1 * 3]
+
+ call ang16_mode_4_32
+ RET
+
+cglobal intra_pred_ang16_32, 3,7,13
+ xor r6d, r6d
+ inc r6d
+ vbroadcasti32x8 m15, [pd_16]
+ lea r3, [ang_table_avx2 + 18 * 32]
+ shl r1d, 1
+ lea r4, [r1 * 3]
+
+ call ang16_mode_4_32
+ RET
;-------------------------------------------------------------------------------------------------------
; avx512 code for intra_pred_ang32 mode 2 to 34 end
More information about the x265-devel
mailing list