[x265] [PATCH 275 of 307] x86:AVX512 intra_pred_ang32 mode 26 for high bit depth
mythreyi at multicorewareinc.com
mythreyi at multicorewareinc.com
Sat Apr 7 04:34:33 CEST 2018
# HG changeset patch
# User Jayashree
# Date 1514549317 -19800
# Fri Dec 29 17:38:37 2017 +0530
# Node ID 47fd272d3c7002b5a84067a818ca4ae1c61276c1
# Parent 74965520283a92095a542ba1997798d6b3af7281
x86:AVX512 intra_pred_ang32 mode 26 for high bit depth
Primitive | AVX2 performance | AVX512 performance
-------------------------------------------------------------
intra_ang_32x32[26] | 2.31x | 4.38x
diff -r 74965520283a -r 47fd272d3c70 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Wed Dec 27 14:51:40 2017 +0530
+++ b/source/common/x86/asm-primitives.cpp Fri Dec 29 17:38:37 2017 +0530
@@ -3093,6 +3093,8 @@
p.cu[BLOCK_32x32].intra_pred[34] = PFX(intra_pred_ang32_2_avx512);
p.cu[BLOCK_32x32].intra_pred[10] = PFX(intra_pred_ang32_10_avx512);
p.cu[BLOCK_32x32].intra_pred[18] = PFX(intra_pred_ang32_18_avx512);
+ p.cu[BLOCK_32x32].intra_pred[26] = PFX(intra_pred_ang32_26_avx512);
+
p.pu[LUMA_64x64].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x64>;
p.pu[LUMA_64x48].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x48>;
p.pu[LUMA_64x32].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x32>;
diff -r 74965520283a -r 47fd272d3c70 source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm Wed Dec 27 14:51:40 2017 +0530
+++ b/source/common/x86/intrapred16.asm Fri Dec 29 17:38:37 2017 +0530
@@ -18594,9 +18594,52 @@
palignr m4, m2, m0, 2
movu [r0 + r3], m4
mov rsp, [rsp+4*(mmsize/2)]
-
- RET
-
+ RET
+INIT_ZMM avx512
+cglobal intra_pred_ang32_26, 3,3,2
+ movu m0, [r2 + 2]
+ add r1d, r1d
+ lea r2, [r1 * 3]
+ movu [r0], m0
+ movu [r0 + r1], m0
+ movu [r0 + r1 * 2], m0
+ movu [r0 + r2], m0
+ lea r0, [r0 + r1 *4]
+ movu [r0], m0
+ movu [r0 + r1], m0
+ movu [r0 + r1 * 2], m0
+ movu [r0 + r2], m0
+ lea r0, [r0 + r1 *4]
+ movu [r0], m0
+ movu [r0 + r1], m0
+ movu [r0 + r1 * 2], m0
+ movu [r0 + r2], m0
+ lea r0, [r0 + r1 *4]
+ movu [r0], m0
+ movu [r0 + r1], m0
+ movu [r0 + r1 * 2], m0
+ movu [r0 + r2], m0
+ lea r0, [r0 + r1 *4]
+ movu [r0], m0
+ movu [r0 + r1], m0
+ movu [r0 + r1 * 2], m0
+ movu [r0 + r2], m0
+ lea r0, [r0 + r1 *4]
+ movu [r0], m0
+ movu [r0 + r1], m0
+ movu [r0 + r1 * 2], m0
+ movu [r0 + r2], m0
+ lea r0, [r0 + r1 *4]
+ movu [r0], m0
+ movu [r0 + r1], m0
+ movu [r0 + r1 * 2], m0
+ movu [r0 + r2], m0
+ lea r0, [r0 + r1 *4]
+ movu [r0], m0
+ movu [r0 + r1], m0
+ movu [r0 + r1 * 2], m0
+ movu [r0 + r2], m0
+ RET
;-------------------------------------------------------------------------------------------------------
; avx512 code for intra_pred_ang32 mode 2 to 34 end
;-------------------------------------------------------------------------------------------------------
More information about the x265-devel
mailing list