[x265] [PATCH] asm: cleanups for 16bpp pixel_sub code in asm-primitives.cpp

murugan at multicorewareinc.com murugan at multicorewareinc.com
Fri Dec 6 11:05:31 CET 2013


# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1386324321 -19800
#      Fri Dec 06 15:35:21 2013 +0530
# Node ID ab1c07bf376b4bb068e3a1490716b1152aedf937
# Parent  a87aa775087d5fdd3a75d5f3f599178034cf2db1
asm: cleanups for 16bpp pixel_sub code in asm-primitives.cpp

diff -r a87aa775087d -r ab1c07bf376b source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Fri Dec 06 13:59:24 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp	Fri Dec 06 15:35:21 2013 +0530
@@ -300,9 +300,11 @@
     p.luma_vpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_pp_ ## W ## x ## H ## cpu; \
     p.luma_vps[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_ps_ ## W ## x ## H ## cpu; \
     p.luma_copy_ps[LUMA_ ## W ## x ## H] = x265_blockcopy_ps_ ## W ## x ## H ## cpu; \
-    p.luma_sub_ps[LUMA_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H ## cpu; \
     p.luma_add_ps[LUMA_ ## W ## x ## H] = x265_pixel_add_ps_ ## W ## x ## H ## cpu;
 
+#define SETUP_LUMA_SUB_FUNC_DEF(W, H, cpu) \
+    p.luma_sub_ps[LUMA_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H ## cpu;
+
 #define SETUP_LUMA_SP_FUNC_DEF(W, H, cpu) \
     p.luma_vsp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_sp_ ## W ## x ## H ## cpu;
 
@@ -398,6 +400,33 @@
     SETUP_LUMA_FUNC_DEF(64, 16, cpu); \
     SETUP_LUMA_FUNC_DEF(16, 64, cpu);
 
+#define LUMA_PIXELSUB(cpu) \
+    SETUP_LUMA_SUB_FUNC_DEF(4,   4, cpu); \
+    SETUP_LUMA_SUB_FUNC_DEF(8,   8, cpu); \
+    SETUP_LUMA_SUB_FUNC_DEF(8,   4, cpu); \
+    SETUP_LUMA_SUB_FUNC_DEF(4,   8, cpu); \
+    SETUP_LUMA_SUB_FUNC_DEF(16, 16, cpu); \
+    SETUP_LUMA_SUB_FUNC_DEF(16,  8, cpu); \
+    SETUP_LUMA_SUB_FUNC_DEF(8,  16, cpu); \
+    SETUP_LUMA_SUB_FUNC_DEF(16, 12, cpu); \
+    SETUP_LUMA_SUB_FUNC_DEF(12, 16, cpu); \
+    SETUP_LUMA_SUB_FUNC_DEF(16,  4, cpu); \
+    SETUP_LUMA_SUB_FUNC_DEF(4,  16, cpu); \
+    SETUP_LUMA_SUB_FUNC_DEF(32, 32, cpu); \
+    SETUP_LUMA_SUB_FUNC_DEF(32, 16, cpu); \
+    SETUP_LUMA_SUB_FUNC_DEF(16, 32, cpu); \
+    SETUP_LUMA_SUB_FUNC_DEF(32, 24, cpu); \
+    SETUP_LUMA_SUB_FUNC_DEF(24, 32, cpu); \
+    SETUP_LUMA_SUB_FUNC_DEF(32,  8, cpu); \
+    SETUP_LUMA_SUB_FUNC_DEF(8,  32, cpu); \
+    SETUP_LUMA_SUB_FUNC_DEF(64, 64, cpu); \
+    SETUP_LUMA_SUB_FUNC_DEF(64, 32, cpu); \
+    SETUP_LUMA_SUB_FUNC_DEF(32, 64, cpu); \
+    SETUP_LUMA_SUB_FUNC_DEF(64, 48, cpu); \
+    SETUP_LUMA_SUB_FUNC_DEF(48, 64, cpu); \
+    SETUP_LUMA_SUB_FUNC_DEF(64, 16, cpu); \
+    SETUP_LUMA_SUB_FUNC_DEF(16, 64, cpu);
+
 #define LUMA_SP_FILTERS(cpu) \
     SETUP_LUMA_SP_FUNC_DEF(4,   4, cpu); \
     SETUP_LUMA_SP_FUNC_DEF(8,   8, cpu); \
@@ -632,37 +661,8 @@
         p.cvt32to16_shr = x265_cvt32to16_shr_sse2;
         p.cvt16to32_shl = x265_cvt16to32_shl_sse2;
 
-        p.chroma[X265_CSP_I420].sub_ps[LUMA_4x8] = x265_pixel_sub_ps_2x4_sse2;
-        p.chroma[X265_CSP_I420].sub_ps[LUMA_4x16] = x265_pixel_sub_ps_2x8_sse2;
-        p.chroma[X265_CSP_I420].sub_ps[LUMA_8x4] = x265_pixel_sub_ps_4x2_sse2;
-        p.chroma[X265_CSP_I420].sub_ps[LUMA_8x8] = x265_pixel_sub_ps_4x4_sse2;
-        p.chroma[X265_CSP_I420].sub_ps[LUMA_8x16] = x265_pixel_sub_ps_4x8_sse2;
-        p.chroma[X265_CSP_I420].sub_ps[LUMA_8x32] = x265_pixel_sub_ps_4x16_sse2;
-        p.chroma[X265_CSP_I420].sub_ps[LUMA_12x16] = x265_pixel_sub_ps_6x8_sse2;
-        p.chroma[X265_CSP_I420].sub_ps[LUMA_16x4] = x265_pixel_sub_ps_8x2_sse2;
-        p.chroma[X265_CSP_I420].sub_ps[LUMA_16x8] = x265_pixel_sub_ps_8x4_sse2;
-        p.chroma[X265_CSP_I420].sub_ps[LUMA_16x12] = x265_pixel_sub_ps_8x6_sse2;
-        p.chroma[X265_CSP_I420].sub_ps[LUMA_16x16] = x265_pixel_sub_ps_8x8_sse2;
-        p.chroma[X265_CSP_I420].sub_ps[LUMA_16x32] = x265_pixel_sub_ps_8x16_sse2;
-        p.chroma[X265_CSP_I420].sub_ps[LUMA_16x64] = x265_pixel_sub_ps_8x32_sse2;
-        p.chroma[X265_CSP_I420].sub_ps[LUMA_24x32] = x265_pixel_sub_ps_12x16_sse2;
-        p.chroma[X265_CSP_I420].sub_ps[LUMA_32x8] = x265_pixel_sub_ps_16x4_sse2;
-        p.chroma[X265_CSP_I420].sub_ps[LUMA_32x16] = x265_pixel_sub_ps_16x8_sse2;
-        p.chroma[X265_CSP_I420].sub_ps[LUMA_32x24] = x265_pixel_sub_ps_16x12_sse2;
-        p.chroma[X265_CSP_I420].sub_ps[LUMA_32x32] = x265_pixel_sub_ps_16x16_sse2;
-        p.chroma[X265_CSP_I420].sub_ps[LUMA_32x64] = x265_pixel_sub_ps_16x32_sse2;
-        p.chroma[X265_CSP_I420].sub_ps[LUMA_48x64] = x265_pixel_sub_ps_24x32_sse2;
-        p.chroma[X265_CSP_I420].sub_ps[LUMA_64x16] = x265_pixel_sub_ps_32x8_sse2;
-        p.chroma[X265_CSP_I420].sub_ps[LUMA_64x32] = x265_pixel_sub_ps_32x16_sse2;
-        p.chroma[X265_CSP_I420].sub_ps[LUMA_64x48] = x265_pixel_sub_ps_32x24_sse2;
-        p.chroma[X265_CSP_I420].sub_ps[LUMA_64x64] = x265_pixel_sub_ps_32x32_sse2;
-        p.luma_sub_ps[LUMA_16x64] = x265_pixel_sub_ps_16x64_sse2;
-        p.luma_sub_ps[LUMA_32x64] = x265_pixel_sub_ps_32x64_sse2;
-        p.luma_sub_ps[LUMA_48x64] = x265_pixel_sub_ps_48x64_sse2;
-        p.luma_sub_ps[LUMA_64x16] = x265_pixel_sub_ps_64x16_sse2;
-        p.luma_sub_ps[LUMA_64x32] = x265_pixel_sub_ps_64x32_sse2;
-        p.luma_sub_ps[LUMA_64x48] = x265_pixel_sub_ps_64x48_sse2;
-        p.luma_sub_ps[LUMA_64x64] = x265_pixel_sub_ps_64x64_sse2;
+        CHROMA_PIXELSUB_PS(_sse2);
+        LUMA_PIXELSUB(_sse2);
     }
     if (cpuMask & X265_CPU_SSSE3)
     {
@@ -860,6 +860,7 @@
         LUMA_SSE_SP(_sse4);
 
         CHROMA_PIXELSUB_PS(_sse4);
+        LUMA_PIXELSUB(_sse4);
 
         CHROMA_FILTERS(_sse4);
         LUMA_FILTERS(_sse4);


More information about the x265-devel mailing list