[x265] [PATCH] asm: cleanups for pixel_sse_sp
murugan at multicorewareinc.com
murugan at multicorewareinc.com
Thu Nov 28 10:48:27 CET 2013
# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1385632076 -19800
# Thu Nov 28 15:17:56 2013 +0530
# Node ID f0d2ef33a0bdb41b9b3d7edb9e0b7358b0783271
# Parent 7a0fe2f9074330bb3126e95194e7c4ed956c6e4d
asm: cleanups for pixel_sse_sp
diff -r 7a0fe2f90743 -r f0d2ef33a0bd source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Thu Nov 28 14:58:39 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp Thu Nov 28 15:17:56 2013 +0530
@@ -450,6 +450,36 @@
SETUP_PIXEL_VAR_DEF(32, 32, cpu); \
SETUP_PIXEL_VAR_DEF(64, 64, cpu);
+#define SETUP_PIXEL_SSE_SP_DEF(W, H, cpu) \
+ p.sse_sp[LUMA_ ## W ## x ## H] = x265_pixel_ssd_sp_ ## W ## x ## H ## cpu;
+
+#define LUMA_SSE_SP(cpu) \
+ SETUP_PIXEL_SSE_SP_DEF(4, 4, cpu); \
+ SETUP_PIXEL_SSE_SP_DEF(8, 8, cpu); \
+ SETUP_PIXEL_SSE_SP_DEF(8, 4, cpu); \
+ SETUP_PIXEL_SSE_SP_DEF(4, 8, cpu); \
+ SETUP_PIXEL_SSE_SP_DEF(16, 16, cpu); \
+ SETUP_PIXEL_SSE_SP_DEF(16, 8, cpu); \
+ SETUP_PIXEL_SSE_SP_DEF(8, 16, cpu); \
+ SETUP_PIXEL_SSE_SP_DEF(16, 12, cpu); \
+ SETUP_PIXEL_SSE_SP_DEF(12, 16, cpu); \
+ SETUP_PIXEL_SSE_SP_DEF(16, 4, cpu); \
+ SETUP_PIXEL_SSE_SP_DEF(4, 16, cpu); \
+ SETUP_PIXEL_SSE_SP_DEF(32, 32, cpu); \
+ SETUP_PIXEL_SSE_SP_DEF(32, 16, cpu); \
+ SETUP_PIXEL_SSE_SP_DEF(16, 32, cpu); \
+ SETUP_PIXEL_SSE_SP_DEF(32, 24, cpu); \
+ SETUP_PIXEL_SSE_SP_DEF(24, 32, cpu); \
+ SETUP_PIXEL_SSE_SP_DEF(32, 8, cpu); \
+ SETUP_PIXEL_SSE_SP_DEF(8, 32, cpu); \
+ SETUP_PIXEL_SSE_SP_DEF(64, 64, cpu); \
+ SETUP_PIXEL_SSE_SP_DEF(64, 32, cpu); \
+ SETUP_PIXEL_SSE_SP_DEF(32, 64, cpu); \
+ SETUP_PIXEL_SSE_SP_DEF(64, 48, cpu); \
+ SETUP_PIXEL_SSE_SP_DEF(48, 64, cpu); \
+ SETUP_PIXEL_SSE_SP_DEF(64, 16, cpu); \
+ SETUP_PIXEL_SSE_SP_DEF(16, 64, cpu);
+
namespace x265 {
// private x265 namespace
@@ -648,31 +678,7 @@
p.sse_pp[LUMA_64x48] = x265_pixel_ssd_64x48_sse4;
p.sse_pp[LUMA_64x64] = x265_pixel_ssd_64x64_sse4;
- p.sse_sp[LUMA_4x4] = x265_pixel_ssd_sp_4x4_sse4;
- p.sse_sp[LUMA_4x8] = x265_pixel_ssd_sp_4x8_sse4;
- p.sse_sp[LUMA_4x16] = x265_pixel_ssd_sp_4x16_sse4;
- p.sse_sp[LUMA_8x4] = x265_pixel_ssd_sp_8x4_sse4;
- p.sse_sp[LUMA_8x8] = x265_pixel_ssd_sp_8x8_sse4;
- p.sse_sp[LUMA_8x16] = x265_pixel_ssd_sp_8x16_sse4;
- p.sse_sp[LUMA_8x32] = x265_pixel_ssd_sp_8x32_sse4;
- p.sse_sp[LUMA_12x16] = x265_pixel_ssd_sp_12x16_sse4;
- p.sse_sp[LUMA_16x4] = x265_pixel_ssd_sp_16x4_sse4;
- p.sse_sp[LUMA_16x8] = x265_pixel_ssd_sp_16x8_sse4;
- p.sse_sp[LUMA_16x12] = x265_pixel_ssd_sp_16x12_sse4;
- p.sse_sp[LUMA_16x16] = x265_pixel_ssd_sp_16x16_sse4;
- p.sse_sp[LUMA_16x32] = x265_pixel_ssd_sp_16x32_sse4;
- p.sse_sp[LUMA_16x64] = x265_pixel_ssd_sp_16x64_sse4;
- p.sse_sp[LUMA_24x32] = x265_pixel_ssd_sp_24x32_sse4;
- p.sse_sp[LUMA_32x8] = x265_pixel_ssd_sp_32x8_sse4;
- p.sse_sp[LUMA_32x16] = x265_pixel_ssd_sp_32x16_sse4;
- p.sse_sp[LUMA_32x24] = x265_pixel_ssd_sp_32x24_sse4;
- p.sse_sp[LUMA_32x32] = x265_pixel_ssd_sp_32x32_sse4;
- p.sse_sp[LUMA_32x64] = x265_pixel_ssd_sp_32x64_sse4;
- p.sse_sp[LUMA_48x64] = x265_pixel_ssd_sp_48x64_sse4;
- p.sse_sp[LUMA_64x16] = x265_pixel_ssd_sp_64x16_sse4;
- p.sse_sp[LUMA_64x32] = x265_pixel_ssd_sp_64x32_sse4;
- p.sse_sp[LUMA_64x48] = x265_pixel_ssd_sp_64x48_sse4;
- p.sse_sp[LUMA_64x64] = x265_pixel_ssd_sp_64x64_sse4;
+ LUMA_SSE_SP(_sse4);
CHROMA_PIXELSUB_PS(_sse4);
diff -r 7a0fe2f90743 -r f0d2ef33a0bd source/common/x86/pixel.h
--- a/source/common/x86/pixel.h Thu Nov 28 14:58:39 2013 +0530
+++ b/source/common/x86/pixel.h Thu Nov 28 15:17:56 2013 +0530
@@ -62,6 +62,9 @@
#define DECL_X1_SS(name, suffix) \
DECL_PIXELS(int, name, suffix, (int16_t *, intptr_t, int16_t *, intptr_t))
+#define DECL_X1_SP(name, suffix) \
+ DECL_PIXELS(int, name, suffix, (int16_t *, intptr_t, pixel *, intptr_t))
+
#define DECL_X4(name, suffix) \
DECL_PIXELS(void, name ## _x3, suffix, (pixel *, pixel *, pixel *, pixel *, intptr_t, int *)) \
DECL_PIXELS(void, name ## _x4, suffix, (pixel *, pixel *, pixel *, pixel *, pixel *, intptr_t, int *))
@@ -98,6 +101,7 @@
DECL_X1_SS(ssd_ss, avx)
DECL_X1_SS(ssd_ss, xop)
DECL_X1_SS(ssd_ss, avx2)
+DECL_X1_SP(ssd_sp, sse4)
DECL_X1(satd, mmx2)
DECL_X1(satd, sse2)
DECL_X1(satd, ssse3)
@@ -401,30 +405,4 @@
void x265_dequant_normal_sse4(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);
void x265_weight_pp_sse4(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
void x265_weight_sp_sse4(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
-
-int x265_pixel_ssd_sp_4x4_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_4x8_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_4x16_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_8x4_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_8x8_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_8x16_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_8x32_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_12x16_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_16x4_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_16x8_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_16x12_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_16x16_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_16x32_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_16x64_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_24x32_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_32x8_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_32x16_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_32x24_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_32x32_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_32x64_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_48x64_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_64x16_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_64x32_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_64x48_sse4(int16_t *, intptr_t, pixel *, intptr_t);
-int x265_pixel_ssd_sp_64x64_sse4(int16_t *, intptr_t, pixel *, intptr_t);
#endif // ifndef X265_I386_PIXEL_H
More information about the x265-devel
mailing list