[x265] [PATCH] asm: avx2 interp_8tap_hv_pp for 16bpp
aasaipriya at multicorewareinc.com
aasaipriya at multicorewareinc.com
Mon Jun 15 09:11:21 CEST 2015
# HG changeset patch
# User Aasaipriya Chandran <aasaipriya at multicorewareinc.com>
# Date 1434352269 -19800
# Mon Jun 15 12:41:09 2015 +0530
# Node ID 41b1baa5885f53e0a9aa4bf1c3c52c49c7b2bcdb
# Parent 32590b25678b5b87bf6beed4c3074ec3837d35da
asm: avx2 interp_8tap_hv_pp for 16bpp
Including ALL_LUMA_PU_T for luma_hvpp which calls interp_8tap_hv_pp_cpu C function(which calls luma_hps and luma_vsp asm functions individually)
ALL_LUMA_PU_T has declared all sizes except 4x4, hence including luma_hvpp[4x4] separately.
diff -r 32590b25678b -r 41b1baa5885f source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Fri Jun 12 16:48:06 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp Mon Jun 15 12:41:09 2015 +0530
@@ -1443,6 +1443,7 @@
ALL_LUMA_PU(luma_vps, interp_8tap_vert_ps, avx2);
ALL_LUMA_PU(luma_vsp, interp_8tap_vert_sp, avx2);
ALL_LUMA_PU(luma_vss, interp_8tap_vert_ss, avx2);
+ p.pu[LUMA_4x4].luma_vsp = x265_interp_8tap_vert_sp_4x4_avx2; // since ALL_LUMA_PU didn't declare 4x4 size, calling separately luma_vsp function to use
p.cu[BLOCK_16x16].add_ps = x265_pixel_add_ps_16x16_avx2;
p.cu[BLOCK_32x32].add_ps = x265_pixel_add_ps_32x32_avx2;
@@ -2000,6 +2001,9 @@
p.chroma[X265_CSP_I444].pu[LUMA_64x48].filter_vsp = x265_interp_4tap_vert_sp_64x48_avx2;
p.chroma[X265_CSP_I444].pu[LUMA_64x64].filter_vsp = x265_interp_4tap_vert_sp_64x64_avx2;
+ ALL_LUMA_PU_T(luma_hvpp, interp_8tap_hv_pp_cpu); // calling luma_hvpp for all sizes
+ p.pu[LUMA_4x4].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_4x4>; // ALL_LUMA_PU_T has declared all sizes except 4x4, hence calling luma_hvpp[4x4]
+
if (cpuMask & X265_CPU_BMI2)
p.scanPosLast = x265_scanPosLast_avx2_bmi2;
}
More information about the x265-devel
mailing list