[x265] [PATCH] asm: avx2 interp_8tap_hv_pp for 8bpp
aasaipriya at multicorewareinc.com
aasaipriya at multicorewareinc.com
Fri Jun 12 13:38:38 CEST 2015
# HG changeset patch
# User Aasaipriya Chandran <aasaipriya at multicorewareinc.com>
# Date 1434109112 -19800
# Fri Jun 12 17:08:32 2015 +0530
# Node ID 5fdd9366d5f2e923509400850771e96a160df809
# Parent 2cd9183df03edff0b148bab6e133dfe1ae4f69a1
asm: avx2 interp_8tap_hv_pp for 8bpp
Removing x265_interp_8tap_hv_pp_16x16_avx2 seperate asm code, since its giving same performnace as calling interp_8tap_hv_pp_cpu C function(which calls luma_hps and luma_vsp asm functions individually)
Including ALL_LUMA_PU_T for luma_hvpp which calls interp_8tap_hv_pp_cpu C function.
ALL_LUMA_PU_T has declared all sizes except 4x4, hence including luma_hvpp[4x4] separately.
diff -r 2cd9183df03e -r 5fdd9366d5f2 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Thu Jun 11 17:06:46 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp Fri Jun 12 17:08:32 2015 +0530
@@ -2835,6 +2835,7 @@
ALL_LUMA_PU(luma_vps, interp_8tap_vert_ps, avx2);
ALL_LUMA_PU(luma_vsp, interp_8tap_vert_sp, avx2);
ALL_LUMA_PU(luma_vss, interp_8tap_vert_ss, avx2);
+ p.pu[LUMA_4x4].luma_vsp = x265_interp_8tap_vert_sp_4x4_avx2; // since ALL_LUMA_PU didn't declare 4x4 size, calling separately luma_vsp function to use in interp_8tap_hv_pp_cpu C function
// missing 4x8, 4x16, 24x32, 12x16 for the fill set of luma PU
p.pu[LUMA_4x4].luma_hpp = x265_interp_8tap_horiz_pp_4x4_avx2;
@@ -3106,7 +3107,9 @@
p.chroma[X265_CSP_I444].pu[LUMA_64x16].filter_vss = x265_interp_4tap_vert_ss_64x16_avx2;
p.chroma[X265_CSP_I444].pu[LUMA_16x64].filter_vss = x265_interp_4tap_vert_ss_16x64_avx2;
- p.pu[LUMA_16x16].luma_hvpp = x265_interp_8tap_hv_pp_16x16_avx2;
+ //p.pu[LUMA_16x16].luma_hvpp = x265_interp_8tap_hv_pp_16x16_avx2; // Removing x265_interp_8tap_hv_pp_16x16_avx2 seperate asm code, since its giving same performnace as calling interp_8tap_hv_pp_cpu C function(which calls luma_hps and luma_vsp asm functions individually)
+ ALL_LUMA_PU_T(luma_hvpp, interp_8tap_hv_pp_cpu); // calling luma_hvpp for all sizes
+ p.pu[LUMA_4x4].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_4x4>; // ALL_LUMA_PU_T has declared all sizes except 4x4, hence calling luma_hvpp[4x4] separately.
p.pu[LUMA_32x8].convert_p2s = x265_filterPixelToShort_32x8_avx2;
p.pu[LUMA_32x16].convert_p2s = x265_filterPixelToShort_32x16_avx2;
More information about the x265-devel
mailing list