[x265] [PATCH 12 of 12] Call macros to reduce code size of primitive setup

dtyx265 at gmail.com dtyx265 at gmail.com
Tue May 19 02:24:39 CEST 2015


# HG changeset patch
# User David T Yuen <dtyx265 at gmail.com>
# Date 1431994972 25200
# Node ID b7da9b5e994f52e5beace23d99bb35e0c62e7973
# Parent  a1ae3a91f5e011753017db8579296b5702439579
Call macros to reduce code size of primitive setup

diff -r a1ae3a91f5e0 -r b7da9b5e994f source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Mon May 18 17:11:18 2015 -0700
+++ b/source/common/x86/asm-primitives.cpp	Mon May 18 17:22:52 2015 -0700
@@ -1434,6 +1434,11 @@
         CHROMA_420_VSP_FILTERS(_sse2);
         CHROMA_422_VSP_FILTERS(_sse2);
         CHROMA_444_VSP_FILTERS(_sse2);
+#if X86_64
+        ALL_CHROMA_420_PU(filter_vps, interp_4tap_vert_ps, sse2);
+        ALL_CHROMA_422_PU(filter_vps, interp_4tap_vert_ps, sse2);
+        ALL_CHROMA_444_PU(filter_vps, interp_4tap_vert_ps, sse2);
+#else
         p.chroma[X265_CSP_I420].pu[CHROMA_420_2x4].filter_vpp = x265_interp_4tap_vert_pp_2x4_sse2;
         p.chroma[X265_CSP_I420].pu[CHROMA_420_2x8].filter_vpp = x265_interp_4tap_vert_pp_2x8_sse2;
         p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].filter_vpp = x265_interp_4tap_vert_pp_4x2_sse2;
@@ -1460,123 +1465,6 @@
         p.chroma[X265_CSP_I444].pu[LUMA_4x4].filter_vps = x265_interp_4tap_vert_ps_4x4_sse2;
         p.chroma[X265_CSP_I444].pu[LUMA_4x8].filter_vps = x265_interp_4tap_vert_ps_4x8_sse2;
         p.chroma[X265_CSP_I444].pu[LUMA_4x16].filter_vps = x265_interp_4tap_vert_ps_4x16_sse2;
-#if X86_64
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_6x8].filter_vpp = x265_interp_4tap_vert_pp_6x8_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x2].filter_vpp = x265_interp_4tap_vert_pp_8x2_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].filter_vpp = x265_interp_4tap_vert_pp_8x4_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].filter_vpp = x265_interp_4tap_vert_pp_8x6_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].filter_vpp = x265_interp_4tap_vert_pp_8x8_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].filter_vpp = x265_interp_4tap_vert_pp_8x16_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].filter_vpp = x265_interp_4tap_vert_pp_8x32_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].filter_vpp = x265_interp_4tap_vert_pp_12x16_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].filter_vpp = x265_interp_4tap_vert_pp_16x4_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].filter_vpp = x265_interp_4tap_vert_pp_16x8_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].filter_vpp = x265_interp_4tap_vert_pp_16x12_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].filter_vpp = x265_interp_4tap_vert_pp_16x16_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].filter_vpp = x265_interp_4tap_vert_pp_16x32_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].filter_vpp = x265_interp_4tap_vert_pp_24x32_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].filter_vpp = x265_interp_4tap_vert_pp_32x8_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].filter_vpp = x265_interp_4tap_vert_pp_32x16_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].filter_vpp = x265_interp_4tap_vert_pp_32x24_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].filter_vpp = x265_interp_4tap_vert_pp_32x32_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].filter_vpp = x265_interp_4tap_vert_pp_6x16_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].filter_vpp = x265_interp_4tap_vert_pp_8x4_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].filter_vpp = x265_interp_4tap_vert_pp_8x4_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].filter_vpp = x265_interp_4tap_vert_pp_8x8_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].filter_vpp = x265_interp_4tap_vert_pp_8x12_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].filter_vpp = x265_interp_4tap_vert_pp_8x16_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].filter_vpp = x265_interp_4tap_vert_pp_8x32_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].filter_vpp = x265_interp_4tap_vert_pp_8x64_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].filter_vpp = x265_interp_4tap_vert_pp_12x32_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].filter_vpp = x265_interp_4tap_vert_pp_16x8_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].filter_vpp = x265_interp_4tap_vert_pp_16x16_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].filter_vpp = x265_interp_4tap_vert_pp_16x24_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].filter_vpp = x265_interp_4tap_vert_pp_16x32_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].filter_vpp = x265_interp_4tap_vert_pp_16x64_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].filter_vpp = x265_interp_4tap_vert_pp_24x64_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].filter_vpp = x265_interp_4tap_vert_pp_32x16_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].filter_vpp = x265_interp_4tap_vert_pp_32x32_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].filter_vpp = x265_interp_4tap_vert_pp_32x48_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].filter_vpp = x265_interp_4tap_vert_pp_32x64_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_8x4].filter_vpp = x265_interp_4tap_vert_pp_8x4_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_8x8].filter_vpp = x265_interp_4tap_vert_pp_8x8_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_8x16].filter_vpp = x265_interp_4tap_vert_pp_8x16_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_8x32].filter_vpp = x265_interp_4tap_vert_pp_8x32_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_12x16].filter_vpp = x265_interp_4tap_vert_pp_12x16_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_16x4].filter_vpp = x265_interp_4tap_vert_pp_16x4_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_16x8].filter_vpp = x265_interp_4tap_vert_pp_16x8_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_16x12].filter_vpp = x265_interp_4tap_vert_pp_16x12_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_16x16].filter_vpp = x265_interp_4tap_vert_pp_16x16_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_16x32].filter_vpp = x265_interp_4tap_vert_pp_16x32_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_16x64].filter_vpp = x265_interp_4tap_vert_pp_16x64_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_24x32].filter_vpp = x265_interp_4tap_vert_pp_24x32_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_32x8].filter_vpp = x265_interp_4tap_vert_pp_32x8_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_32x16].filter_vpp = x265_interp_4tap_vert_pp_32x16_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_32x24].filter_vpp = x265_interp_4tap_vert_pp_32x24_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_32x32].filter_vpp = x265_interp_4tap_vert_pp_32x32_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_32x64].filter_vpp = x265_interp_4tap_vert_pp_32x64_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_48x64].filter_vpp = x265_interp_4tap_vert_pp_48x64_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_64x16].filter_vpp = x265_interp_4tap_vert_pp_64x16_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_64x32].filter_vpp = x265_interp_4tap_vert_pp_64x32_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_64x48].filter_vpp = x265_interp_4tap_vert_pp_64x48_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_64x64].filter_vpp = x265_interp_4tap_vert_pp_64x64_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_6x8].filter_vps = x265_interp_4tap_vert_ps_6x8_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x2].filter_vps = x265_interp_4tap_vert_ps_8x2_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].filter_vps = x265_interp_4tap_vert_ps_8x4_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].filter_vps = x265_interp_4tap_vert_ps_8x6_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].filter_vps = x265_interp_4tap_vert_ps_8x8_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].filter_vps = x265_interp_4tap_vert_ps_8x16_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].filter_vps = x265_interp_4tap_vert_ps_8x32_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].filter_vps = x265_interp_4tap_vert_ps_12x16_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].filter_vps = x265_interp_4tap_vert_ps_16x4_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].filter_vps = x265_interp_4tap_vert_ps_16x8_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].filter_vps = x265_interp_4tap_vert_ps_16x12_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].filter_vps = x265_interp_4tap_vert_ps_16x16_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].filter_vps = x265_interp_4tap_vert_ps_16x32_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].filter_vps = x265_interp_4tap_vert_ps_24x32_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].filter_vps = x265_interp_4tap_vert_ps_32x8_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].filter_vps = x265_interp_4tap_vert_ps_32x16_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].filter_vps = x265_interp_4tap_vert_ps_32x24_sse2;
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].filter_vps = x265_interp_4tap_vert_ps_32x32_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].filter_vps = x265_interp_4tap_vert_ps_6x16_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].filter_vps = x265_interp_4tap_vert_ps_8x8_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].filter_vps = x265_interp_4tap_vert_ps_8x12_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].filter_vps = x265_interp_4tap_vert_ps_8x16_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].filter_vps = x265_interp_4tap_vert_ps_8x32_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].filter_vps = x265_interp_4tap_vert_ps_8x64_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].filter_vps = x265_interp_4tap_vert_ps_12x32_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].filter_vps = x265_interp_4tap_vert_ps_16x8_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].filter_vps = x265_interp_4tap_vert_ps_16x16_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].filter_vps = x265_interp_4tap_vert_ps_16x24_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].filter_vps = x265_interp_4tap_vert_ps_16x32_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].filter_vps = x265_interp_4tap_vert_ps_16x64_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].filter_vps = x265_interp_4tap_vert_ps_24x64_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].filter_vps = x265_interp_4tap_vert_ps_32x16_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].filter_vps = x265_interp_4tap_vert_ps_32x32_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].filter_vps = x265_interp_4tap_vert_ps_32x48_sse2;
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].filter_vps = x265_interp_4tap_vert_ps_32x64_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_8x4].filter_vps = x265_interp_4tap_vert_ps_8x4_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_8x8].filter_vps = x265_interp_4tap_vert_ps_8x8_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_8x16].filter_vps = x265_interp_4tap_vert_ps_8x16_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_8x32].filter_vps = x265_interp_4tap_vert_ps_8x32_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_12x16].filter_vps = x265_interp_4tap_vert_ps_12x16_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_16x4].filter_vps = x265_interp_4tap_vert_ps_16x4_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_16x8].filter_vps = x265_interp_4tap_vert_ps_16x8_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_16x12].filter_vps = x265_interp_4tap_vert_ps_16x12_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_16x16].filter_vps = x265_interp_4tap_vert_ps_16x16_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_16x32].filter_vps = x265_interp_4tap_vert_ps_16x32_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_16x64].filter_vps = x265_interp_4tap_vert_ps_16x64_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_24x32].filter_vps = x265_interp_4tap_vert_ps_24x32_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_32x8].filter_vps = x265_interp_4tap_vert_ps_32x8_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_32x16].filter_vps = x265_interp_4tap_vert_ps_32x16_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_32x24].filter_vps = x265_interp_4tap_vert_ps_32x24_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_32x32].filter_vps = x265_interp_4tap_vert_ps_32x32_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_32x64].filter_vps = x265_interp_4tap_vert_ps_32x64_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_48x64].filter_vps = x265_interp_4tap_vert_ps_48x64_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_64x16].filter_vps = x265_interp_4tap_vert_ps_64x16_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_64x32].filter_vps = x265_interp_4tap_vert_ps_64x32_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_64x48].filter_vps = x265_interp_4tap_vert_ps_64x48_sse2;
-        p.chroma[X265_CSP_I444].pu[LUMA_64x64].filter_vps = x265_interp_4tap_vert_ps_64x64_sse2;
 #endif
 
         ALL_LUMA_PU(luma_hpp, interp_8tap_horiz_pp, sse2);


More information about the x265-devel mailing list