[x265] [PATCH 043 of 307] x86: AVX512 blockcopy_ss_64x64, blockcopy_pp_64xN, blockcopy_ps_64x64 and
mythreyi at multicorewareinc.com
mythreyi at multicorewareinc.com
Sat Apr 7 04:30:41 CEST 2018
# HG changeset patch
# User Jayashri Murugan <jayashri at multicorewareinc.com>
# Date 1500964333 -19800
# Tue Jul 25 12:02:13 2017 +0530
# Node ID 2ad06d32a8465ce20e673c819b917a7524ecf8e9
# Parent 6b3b8ef0f37e0f7860f4f43c99e581674b19f9e3
x86: AVX512 blockcopy_ss_64x64, blockcopy_pp_64xN, blockcopy_ps_64x64 and
blockcopy_sp_64x64 for HIGH_BIT_DEPTH
HIGH_BIT_DEPTH:
Primitive | AVX2 performance | AVX512 performance
-------------------------------------------------------
copy_ss[64x64] | 1.38x | 2.85x
copy_pp[64x64] | 1.91x | 3.03x
copy_pp[64x48] | 1.90x | 3.21x
copy_pp[64x32] | 1.99x | 3.26x
copy_pp[64x16] | 2.01x | 3.56x
copy_ps[64x64] | 1.78x | 3.46x
copy_sp[64x64] | 1.80x | 3.25x
diff -r 6b3b8ef0f37e -r 2ad06d32a846 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Fri Jul 21 14:55:49 2017 +0530
+++ b/source/common/x86/asm-primitives.cpp Tue Jul 25 12:02:13 2017 +0530
@@ -2191,10 +2191,20 @@
if (cpuMask & X265_CPU_AVX512)
{
p.cu[BLOCK_16x16].var = PFX(pixel_var_16x16_avx512);
+ p.cu[BLOCK_32x32].calcresidual = PFX(getResidual32_avx512);
p.cu[BLOCK_64x64].sub_ps = PFX(pixel_sub_ps_64x64_avx512);
p.cu[BLOCK_32x32].sub_ps = PFX(pixel_sub_ps_32x32_avx512);
p.chroma[X265_CSP_I420].cu[BLOCK_420_32x32].sub_ps = PFX(pixel_sub_ps_32x32_avx512);
p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].sub_ps = PFX(pixel_sub_ps_32x64_avx512);
+
+ // 64 X N
+ p.cu[BLOCK_64x64].copy_ss = PFX(blockcopy_ss_64x64_avx512);
+ p.pu[LUMA_64x64].copy_pp = (copy_pp_t)PFX(blockcopy_ss_64x64_avx512);
+ p.pu[LUMA_64x48].copy_pp = (copy_pp_t)PFX(blockcopy_ss_64x48_avx512);
+ p.pu[LUMA_64x32].copy_pp = (copy_pp_t)PFX(blockcopy_ss_64x32_avx512);
+ p.pu[LUMA_64x16].copy_pp = (copy_pp_t)PFX(blockcopy_ss_64x16_avx512);
+ p.cu[BLOCK_64x64].copy_ps = (copy_ps_t)PFX(blockcopy_ss_64x64_avx512);
+ p.cu[BLOCK_64x64].copy_sp = (copy_sp_t)PFX(blockcopy_ss_64x64_avx512);
}
}
#else // if HIGH_BIT_DEPTH
@@ -3727,7 +3737,6 @@
p.integral_inith[INTEGRAL_24] = PFX(integral24h_avx2);
p.integral_inith[INTEGRAL_32] = PFX(integral32h_avx2);
- p.cu[BLOCK_32x32].calcresidual = PFX(getResidual32_avx512);
}
if (cpuMask & X265_CPU_AVX512)
{
More information about the x265-devel
mailing list