[x265] [PATCH 230 of 307] x86: AVX512 - dct16 and dct32 for main10 profile

mythreyi at multicorewareinc.com mythreyi at multicorewareinc.com
Sat Apr 7 04:33:48 CEST 2018


# HG changeset patch
# User Praveen Tiwari <praveen at multicorewareinc.com>
# Date 1512104438 -19800
#      Fri Dec 01 10:30:38 2017 +0530
# Node ID 4f690222337dbc1757665729ea15f2380a11c329
# Parent  f86b11b8c629b0e4bf8342d42a0e9c475d7c3a7d
x86: AVX512 - dct16 and dct32 for main10 profile

diff -r f86b11b8c629 -r 4f690222337d source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Thu Nov 30 15:44:01 2017 +0530
+++ b/source/common/x86/asm-primitives.cpp	Fri Dec 01 10:30:38 2017 +0530
@@ -2883,6 +2883,12 @@
         p.pu[LUMA_48x64].luma_vsp = PFX(interp_8tap_vert_sp_48x64_avx512);
 
         p.cu[BLOCK_8x8].dct = PFX(dct8_avx512);
+        /* TODO: Currently these kernels performance are similar to AVX2 version, we need a to improve them further to ebable
+        * it. Probably a Vtune analysis will help here.
+
+        * p.cu[BLOCK_16x16].dct  = PFX(dct16_avx512);
+        * p.cu[BLOCK_32x32].dct  = PFX(dct32_avx512); */
+
         p.cu[BLOCK_8x8].idct = PFX(idct8_avx512);
         p.cu[BLOCK_16x16].idct = PFX(idct16_avx512);
         p.cu[BLOCK_32x32].idct = PFX(idct32_avx512);


More information about the x265-devel mailing list