[x265] [PATCH] asm: avx2 interp_8tap_hv_pp for 8bpp

aasaipriya at multicorewareinc.com aasaipriya at multicorewareinc.com
Fri Jun 12 13:19:15 CEST 2015


# HG changeset patch
# User Aasaipriya Chandran <aasaipriya at multicorewareinc.com>
# Date 1434107886 -19800
#      Fri Jun 12 16:48:06 2015 +0530
# Node ID 5fc5e0d20a595a7e666f181e9f7593fbc2fbe2df
# Parent  2cd9183df03edff0b148bab6e133dfe1ae4f69a1
asm: avx2 interp_8tap_hv_pp for 8bpp

Removing x265_interp_8tap_hv_pp_16x16_avx2 seperate asm code, since its giving same performnace as calling interp_8tap_hv_pp_cpu C function(which calls luma_hps and luma_vsp asm functions individually)

Including ALL_LUMA_PU_T for luma_hvpp which calls interp_8tap_hv_pp_cpu C function.
ALL_LUMA_PU_T has declared all sizes except 4x4, hence including luma_hvpp[4x4] separately.

diff -r 2cd9183df03e -r 5fc5e0d20a59 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Thu Jun 11 17:06:46 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Fri Jun 12 16:48:06 2015 +0530
@@ -2835,6 +2835,7 @@
         ALL_LUMA_PU(luma_vps, interp_8tap_vert_ps, avx2);
         ALL_LUMA_PU(luma_vsp, interp_8tap_vert_sp, avx2);
         ALL_LUMA_PU(luma_vss, interp_8tap_vert_ss, avx2);
+        p.pu[LUMA_4x4].luma_vsp = x265_interp_8tap_vert_sp_4x4_avx2;
 
         // missing 4x8, 4x16, 24x32, 12x16 for the fill set of luma PU
         p.pu[LUMA_4x4].luma_hpp = x265_interp_8tap_horiz_pp_4x4_avx2;
@@ -3106,7 +3107,9 @@
         p.chroma[X265_CSP_I444].pu[LUMA_64x16].filter_vss = x265_interp_4tap_vert_ss_64x16_avx2;
         p.chroma[X265_CSP_I444].pu[LUMA_16x64].filter_vss = x265_interp_4tap_vert_ss_16x64_avx2;
 
-        p.pu[LUMA_16x16].luma_hvpp = x265_interp_8tap_hv_pp_16x16_avx2;
+        //p.pu[LUMA_16x16].luma_hvpp = x265_interp_8tap_hv_pp_16x16_avx2;
+        ALL_LUMA_PU_T(luma_hvpp, interp_8tap_hv_pp_cpu);
+        p.pu[LUMA_4x4].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_4x4>;
 
         p.pu[LUMA_32x8].convert_p2s = x265_filterPixelToShort_32x8_avx2;
         p.pu[LUMA_32x16].convert_p2s = x265_filterPixelToShort_32x16_avx2;


More information about the x265-devel mailing list