[x265] [PATCH] asm: cleanups for 16bpp pixel_sub code in asm-primitives.cpp
murugan at multicorewareinc.com
murugan at multicorewareinc.com
Fri Dec 6 11:05:31 CET 2013
# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1386324321 -19800
# Fri Dec 06 15:35:21 2013 +0530
# Node ID ab1c07bf376b4bb068e3a1490716b1152aedf937
# Parent a87aa775087d5fdd3a75d5f3f599178034cf2db1
asm: cleanups for 16bpp pixel_sub code in asm-primitives.cpp
diff -r a87aa775087d -r ab1c07bf376b source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Fri Dec 06 13:59:24 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp Fri Dec 06 15:35:21 2013 +0530
@@ -300,9 +300,11 @@
p.luma_vpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_pp_ ## W ## x ## H ## cpu; \
p.luma_vps[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_ps_ ## W ## x ## H ## cpu; \
p.luma_copy_ps[LUMA_ ## W ## x ## H] = x265_blockcopy_ps_ ## W ## x ## H ## cpu; \
- p.luma_sub_ps[LUMA_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H ## cpu; \
p.luma_add_ps[LUMA_ ## W ## x ## H] = x265_pixel_add_ps_ ## W ## x ## H ## cpu;
+#define SETUP_LUMA_SUB_FUNC_DEF(W, H, cpu) \
+ p.luma_sub_ps[LUMA_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H ## cpu;
+
#define SETUP_LUMA_SP_FUNC_DEF(W, H, cpu) \
p.luma_vsp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_sp_ ## W ## x ## H ## cpu;
@@ -398,6 +400,33 @@
SETUP_LUMA_FUNC_DEF(64, 16, cpu); \
SETUP_LUMA_FUNC_DEF(16, 64, cpu);
+#define LUMA_PIXELSUB(cpu) \
+ SETUP_LUMA_SUB_FUNC_DEF(4, 4, cpu); \
+ SETUP_LUMA_SUB_FUNC_DEF(8, 8, cpu); \
+ SETUP_LUMA_SUB_FUNC_DEF(8, 4, cpu); \
+ SETUP_LUMA_SUB_FUNC_DEF(4, 8, cpu); \
+ SETUP_LUMA_SUB_FUNC_DEF(16, 16, cpu); \
+ SETUP_LUMA_SUB_FUNC_DEF(16, 8, cpu); \
+ SETUP_LUMA_SUB_FUNC_DEF(8, 16, cpu); \
+ SETUP_LUMA_SUB_FUNC_DEF(16, 12, cpu); \
+ SETUP_LUMA_SUB_FUNC_DEF(12, 16, cpu); \
+ SETUP_LUMA_SUB_FUNC_DEF(16, 4, cpu); \
+ SETUP_LUMA_SUB_FUNC_DEF(4, 16, cpu); \
+ SETUP_LUMA_SUB_FUNC_DEF(32, 32, cpu); \
+ SETUP_LUMA_SUB_FUNC_DEF(32, 16, cpu); \
+ SETUP_LUMA_SUB_FUNC_DEF(16, 32, cpu); \
+ SETUP_LUMA_SUB_FUNC_DEF(32, 24, cpu); \
+ SETUP_LUMA_SUB_FUNC_DEF(24, 32, cpu); \
+ SETUP_LUMA_SUB_FUNC_DEF(32, 8, cpu); \
+ SETUP_LUMA_SUB_FUNC_DEF(8, 32, cpu); \
+ SETUP_LUMA_SUB_FUNC_DEF(64, 64, cpu); \
+ SETUP_LUMA_SUB_FUNC_DEF(64, 32, cpu); \
+ SETUP_LUMA_SUB_FUNC_DEF(32, 64, cpu); \
+ SETUP_LUMA_SUB_FUNC_DEF(64, 48, cpu); \
+ SETUP_LUMA_SUB_FUNC_DEF(48, 64, cpu); \
+ SETUP_LUMA_SUB_FUNC_DEF(64, 16, cpu); \
+ SETUP_LUMA_SUB_FUNC_DEF(16, 64, cpu);
+
#define LUMA_SP_FILTERS(cpu) \
SETUP_LUMA_SP_FUNC_DEF(4, 4, cpu); \
SETUP_LUMA_SP_FUNC_DEF(8, 8, cpu); \
@@ -632,37 +661,8 @@
p.cvt32to16_shr = x265_cvt32to16_shr_sse2;
p.cvt16to32_shl = x265_cvt16to32_shl_sse2;
- p.chroma[X265_CSP_I420].sub_ps[LUMA_4x8] = x265_pixel_sub_ps_2x4_sse2;
- p.chroma[X265_CSP_I420].sub_ps[LUMA_4x16] = x265_pixel_sub_ps_2x8_sse2;
- p.chroma[X265_CSP_I420].sub_ps[LUMA_8x4] = x265_pixel_sub_ps_4x2_sse2;
- p.chroma[X265_CSP_I420].sub_ps[LUMA_8x8] = x265_pixel_sub_ps_4x4_sse2;
- p.chroma[X265_CSP_I420].sub_ps[LUMA_8x16] = x265_pixel_sub_ps_4x8_sse2;
- p.chroma[X265_CSP_I420].sub_ps[LUMA_8x32] = x265_pixel_sub_ps_4x16_sse2;
- p.chroma[X265_CSP_I420].sub_ps[LUMA_12x16] = x265_pixel_sub_ps_6x8_sse2;
- p.chroma[X265_CSP_I420].sub_ps[LUMA_16x4] = x265_pixel_sub_ps_8x2_sse2;
- p.chroma[X265_CSP_I420].sub_ps[LUMA_16x8] = x265_pixel_sub_ps_8x4_sse2;
- p.chroma[X265_CSP_I420].sub_ps[LUMA_16x12] = x265_pixel_sub_ps_8x6_sse2;
- p.chroma[X265_CSP_I420].sub_ps[LUMA_16x16] = x265_pixel_sub_ps_8x8_sse2;
- p.chroma[X265_CSP_I420].sub_ps[LUMA_16x32] = x265_pixel_sub_ps_8x16_sse2;
- p.chroma[X265_CSP_I420].sub_ps[LUMA_16x64] = x265_pixel_sub_ps_8x32_sse2;
- p.chroma[X265_CSP_I420].sub_ps[LUMA_24x32] = x265_pixel_sub_ps_12x16_sse2;
- p.chroma[X265_CSP_I420].sub_ps[LUMA_32x8] = x265_pixel_sub_ps_16x4_sse2;
- p.chroma[X265_CSP_I420].sub_ps[LUMA_32x16] = x265_pixel_sub_ps_16x8_sse2;
- p.chroma[X265_CSP_I420].sub_ps[LUMA_32x24] = x265_pixel_sub_ps_16x12_sse2;
- p.chroma[X265_CSP_I420].sub_ps[LUMA_32x32] = x265_pixel_sub_ps_16x16_sse2;
- p.chroma[X265_CSP_I420].sub_ps[LUMA_32x64] = x265_pixel_sub_ps_16x32_sse2;
- p.chroma[X265_CSP_I420].sub_ps[LUMA_48x64] = x265_pixel_sub_ps_24x32_sse2;
- p.chroma[X265_CSP_I420].sub_ps[LUMA_64x16] = x265_pixel_sub_ps_32x8_sse2;
- p.chroma[X265_CSP_I420].sub_ps[LUMA_64x32] = x265_pixel_sub_ps_32x16_sse2;
- p.chroma[X265_CSP_I420].sub_ps[LUMA_64x48] = x265_pixel_sub_ps_32x24_sse2;
- p.chroma[X265_CSP_I420].sub_ps[LUMA_64x64] = x265_pixel_sub_ps_32x32_sse2;
- p.luma_sub_ps[LUMA_16x64] = x265_pixel_sub_ps_16x64_sse2;
- p.luma_sub_ps[LUMA_32x64] = x265_pixel_sub_ps_32x64_sse2;
- p.luma_sub_ps[LUMA_48x64] = x265_pixel_sub_ps_48x64_sse2;
- p.luma_sub_ps[LUMA_64x16] = x265_pixel_sub_ps_64x16_sse2;
- p.luma_sub_ps[LUMA_64x32] = x265_pixel_sub_ps_64x32_sse2;
- p.luma_sub_ps[LUMA_64x48] = x265_pixel_sub_ps_64x48_sse2;
- p.luma_sub_ps[LUMA_64x64] = x265_pixel_sub_ps_64x64_sse2;
+ CHROMA_PIXELSUB_PS(_sse2);
+ LUMA_PIXELSUB(_sse2);
}
if (cpuMask & X265_CPU_SSSE3)
{
@@ -860,6 +860,7 @@
LUMA_SSE_SP(_sse4);
CHROMA_PIXELSUB_PS(_sse4);
+ LUMA_PIXELSUB(_sse4);
CHROMA_FILTERS(_sse4);
LUMA_FILTERS(_sse4);
More information about the x265-devel
mailing list