[x265] [PATCH 298 of 307] X86:AVX512 intra_pred_ang16 mode 6 and 30 high bit depth
mythreyi at multicorewareinc.com
mythreyi at multicorewareinc.com
Sat Apr 7 04:34:56 CEST 2018
# HG changeset patch
# User Jayashree
# Date 1516097338 -19800
# Tue Jan 16 15:38:58 2018 +0530
# Node ID b0d00ca83af0cb2053d6eda82b6d4081236a0f5f
# Parent ce088a0f4c0ede8ff2e5f0ed6faa005c280acd84
X86:AVX512 intra_pred_ang16 mode 6 and 30 high bit depth
Mode | AVX2 performance | AVX512 performance
---------------------------------------------------
6 | 10.6x | 17.5x
30 | 12.62x | 21.0x
diff -r ce088a0f4c0e -r b0d00ca83af0 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Tue Jan 16 14:55:31 2018 +0530
+++ b/source/common/x86/asm-primitives.cpp Tue Jan 16 15:38:58 2018 +0530
@@ -3119,7 +3119,8 @@
p.cu[BLOCK_16x16].intra_pred[31] = PFX(intra_pred_ang16_31_avx512);
p.cu[BLOCK_16x16].intra_pred[4] = PFX(intra_pred_ang16_4_avx512);
p.cu[BLOCK_16x16].intra_pred[32] = PFX(intra_pred_ang16_32_avx512);
-
+ p.cu[BLOCK_16x16].intra_pred[6] = PFX(intra_pred_ang16_6_avx512);
+ p.cu[BLOCK_16x16].intra_pred[30] = PFX(intra_pred_ang16_30_avx512);
p.pu[LUMA_64x64].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x64>;
p.pu[LUMA_64x48].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x48>;
p.pu[LUMA_64x32].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x32>;
diff -r ce088a0f4c0e -r b0d00ca83af0 source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm Tue Jan 16 14:55:31 2018 +0530
+++ b/source/common/x86/intrapred16.asm Tue Jan 16 15:38:58 2018 +0530
@@ -19996,9 +19996,29 @@
call ang16_mode_6_30
add r2, 12
-
call ang32_mode_6_30
RET
+cglobal intra_pred_ang16_6, 3,7,14
+ add r2, 64
+ xor r6d, r6d
+ vbroadcasti32x8 m15, [pd_16]
+ lea r3, [ang_table_avx2 + 15 * 32]
+ shl r1d, 1
+ lea r4, [r1 * 3]
+
+ call ang16_mode_6_30
+ RET
+
+cglobal intra_pred_ang16_30, 3,7,14
+ xor r6d, r6d
+ inc r6d
+ vbroadcasti32x8 m15, [pd_16]
+ lea r3, [ang_table_avx2 + 15 * 32]
+ shl r1d, 1
+ lea r4, [r1 * 3]
+
+ call ang16_mode_6_30
+ RET
;-------------------------------------------------------------------------------------------------------
; avx512 code for intra_pred_ang32 mode 2 to 34 end
;-------------------------------------------------------------------------------------------------------
More information about the x265-devel
mailing list