[x265] [PATCH] asm: cleanups for pixel_sse_sp

murugan at multicorewareinc.com murugan at multicorewareinc.com
Thu Nov 28 10:48:27 CET 2013


# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1385632076 -19800
#      Thu Nov 28 15:17:56 2013 +0530
# Node ID f0d2ef33a0bdb41b9b3d7edb9e0b7358b0783271
# Parent  7a0fe2f9074330bb3126e95194e7c4ed956c6e4d
asm: cleanups for pixel_sse_sp

diff -r 7a0fe2f90743 -r f0d2ef33a0bd source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Thu Nov 28 14:58:39 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp	Thu Nov 28 15:17:56 2013 +0530
@@ -450,6 +450,36 @@
     SETUP_PIXEL_VAR_DEF(32, 32, cpu); \
     SETUP_PIXEL_VAR_DEF(64, 64, cpu);
 
+#define SETUP_PIXEL_SSE_SP_DEF(W, H, cpu) \
+    p.sse_sp[LUMA_ ## W ## x ## H] = x265_pixel_ssd_sp_ ## W ## x ## H ## cpu;
+
+#define LUMA_SSE_SP(cpu) \
+    SETUP_PIXEL_SSE_SP_DEF(4,   4, cpu); \
+    SETUP_PIXEL_SSE_SP_DEF(8,   8, cpu); \
+    SETUP_PIXEL_SSE_SP_DEF(8,   4, cpu); \
+    SETUP_PIXEL_SSE_SP_DEF(4,   8, cpu); \
+    SETUP_PIXEL_SSE_SP_DEF(16, 16, cpu); \
+    SETUP_PIXEL_SSE_SP_DEF(16,  8, cpu); \
+    SETUP_PIXEL_SSE_SP_DEF(8,  16, cpu); \
+    SETUP_PIXEL_SSE_SP_DEF(16, 12, cpu); \
+    SETUP_PIXEL_SSE_SP_DEF(12, 16, cpu); \
+    SETUP_PIXEL_SSE_SP_DEF(16,  4, cpu); \
+    SETUP_PIXEL_SSE_SP_DEF(4,  16, cpu); \
+    SETUP_PIXEL_SSE_SP_DEF(32, 32, cpu); \
+    SETUP_PIXEL_SSE_SP_DEF(32, 16, cpu); \
+    SETUP_PIXEL_SSE_SP_DEF(16, 32, cpu); \
+    SETUP_PIXEL_SSE_SP_DEF(32, 24, cpu); \
+    SETUP_PIXEL_SSE_SP_DEF(24, 32, cpu); \
+    SETUP_PIXEL_SSE_SP_DEF(32,  8, cpu); \
+    SETUP_PIXEL_SSE_SP_DEF(8,  32, cpu); \
+    SETUP_PIXEL_SSE_SP_DEF(64, 64, cpu); \
+    SETUP_PIXEL_SSE_SP_DEF(64, 32, cpu); \
+    SETUP_PIXEL_SSE_SP_DEF(32, 64, cpu); \
+    SETUP_PIXEL_SSE_SP_DEF(64, 48, cpu); \
+    SETUP_PIXEL_SSE_SP_DEF(48, 64, cpu); \
+    SETUP_PIXEL_SSE_SP_DEF(64, 16, cpu); \
+    SETUP_PIXEL_SSE_SP_DEF(16, 64, cpu);
+
 namespace x265 {
 // private x265 namespace
 
@@ -648,31 +678,7 @@
         p.sse_pp[LUMA_64x48] = x265_pixel_ssd_64x48_sse4;
         p.sse_pp[LUMA_64x64] = x265_pixel_ssd_64x64_sse4;
 
-        p.sse_sp[LUMA_4x4] = x265_pixel_ssd_sp_4x4_sse4;
-        p.sse_sp[LUMA_4x8] = x265_pixel_ssd_sp_4x8_sse4;
-        p.sse_sp[LUMA_4x16] = x265_pixel_ssd_sp_4x16_sse4;
-        p.sse_sp[LUMA_8x4] = x265_pixel_ssd_sp_8x4_sse4;
-        p.sse_sp[LUMA_8x8] = x265_pixel_ssd_sp_8x8_sse4;
-        p.sse_sp[LUMA_8x16] = x265_pixel_ssd_sp_8x16_sse4;
-        p.sse_sp[LUMA_8x32] = x265_pixel_ssd_sp_8x32_sse4;
-        p.sse_sp[LUMA_12x16] = x265_pixel_ssd_sp_12x16_sse4;
-        p.sse_sp[LUMA_16x4] = x265_pixel_ssd_sp_16x4_sse4;
-        p.sse_sp[LUMA_16x8] = x265_pixel_ssd_sp_16x8_sse4;
-        p.sse_sp[LUMA_16x12] = x265_pixel_ssd_sp_16x12_sse4;
-        p.sse_sp[LUMA_16x16] = x265_pixel_ssd_sp_16x16_sse4;
-        p.sse_sp[LUMA_16x32] = x265_pixel_ssd_sp_16x32_sse4;
-        p.sse_sp[LUMA_16x64] = x265_pixel_ssd_sp_16x64_sse4;
-        p.sse_sp[LUMA_24x32] = x265_pixel_ssd_sp_24x32_sse4;
-        p.sse_sp[LUMA_32x8] = x265_pixel_ssd_sp_32x8_sse4;
-        p.sse_sp[LUMA_32x16] = x265_pixel_ssd_sp_32x16_sse4;
-        p.sse_sp[LUMA_32x24] = x265_pixel_ssd_sp_32x24_sse4;
-        p.sse_sp[LUMA_32x32] = x265_pixel_ssd_sp_32x32_sse4;
-        p.sse_sp[LUMA_32x64] = x265_pixel_ssd_sp_32x64_sse4;
-        p.sse_sp[LUMA_48x64] = x265_pixel_ssd_sp_48x64_sse4;
-        p.sse_sp[LUMA_64x16] = x265_pixel_ssd_sp_64x16_sse4;
-        p.sse_sp[LUMA_64x32] = x265_pixel_ssd_sp_64x32_sse4;
-        p.sse_sp[LUMA_64x48] = x265_pixel_ssd_sp_64x48_sse4;
-        p.sse_sp[LUMA_64x64] = x265_pixel_ssd_sp_64x64_sse4;
+       LUMA_SSE_SP(_sse4);
 
         CHROMA_PIXELSUB_PS(_sse4);
 
diff -r 7a0fe2f90743 -r f0d2ef33a0bd source/common/x86/pixel.h
--- a/source/common/x86/pixel.h	Thu Nov 28 14:58:39 2013 +0530
+++ b/source/common/x86/pixel.h	Thu Nov 28 15:17:56 2013 +0530
@@ -62,6 +62,9 @@
 #define DECL_X1_SS(name, suffix) \
     DECL_PIXELS(int, name, suffix, (int16_t *, intptr_t, int16_t *, intptr_t))
 
+#define DECL_X1_SP(name, suffix) \
+    DECL_PIXELS(int, name, suffix, (int16_t *, intptr_t, pixel *, intptr_t))
+
 #define DECL_X4(name, suffix) \
     DECL_PIXELS(void, name ## _x3, suffix, (pixel *, pixel *, pixel *, pixel *, intptr_t, int *)) \
     DECL_PIXELS(void, name ## _x4, suffix, (pixel *, pixel *, pixel *, pixel *, pixel *, intptr_t, int *))
@@ -98,6 +101,7 @@
 DECL_X1_SS(ssd_ss, avx)
 DECL_X1_SS(ssd_ss, xop)
 DECL_X1_SS(ssd_ss, avx2)
+DECL_X1_SP(ssd_sp, sse4)
 DECL_X1(satd, mmx2)
 DECL_X1(satd, sse2)
 DECL_X1(satd, ssse3)
@@ -401,30 +405,4 @@
 void x265_dequant_normal_sse4(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);
 void x265_weight_pp_sse4(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
 void x265_weight_sp_sse4(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
-
-int x265_pixel_ssd_sp_4x4_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_4x8_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_4x16_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_8x4_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_8x8_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_8x16_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_8x32_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_12x16_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_16x4_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_16x8_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_16x12_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_16x16_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_16x32_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_16x64_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_24x32_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_32x8_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_32x16_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_32x24_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_32x32_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_32x64_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_48x64_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_64x16_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_64x32_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_64x48_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_64x64_sse4(int16_t *, intptr_t, pixel *, intptr_t);
 #endif // ifndef X265_I386_PIXEL_H


More information about the x265-devel mailing list