[x265] [PATCH] asm: Unit test code for pixelsub_ps function
murugan at multicorewareinc.com
murugan at multicorewareinc.com
Tue Nov 12 14:41:10 CET 2013
# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1384263623 -19800
# Tue Nov 12 19:10:23 2013 +0530
# Node ID b1e0fe97bbfa7bf367d7318f057690c64f1f1f19
# Parent 7a8118d07276312b2971b292d689805074abd28a
asm: Unit test code for pixelsub_ps function
diff -r 7a8118d07276 -r b1e0fe97bbfa source/common/pixel.cpp
--- a/source/common/pixel.cpp Tue Nov 12 17:06:34 2013 +0530
+++ b/source/common/pixel.cpp Tue Nov 12 19:10:23 2013 +0530
@@ -778,6 +778,22 @@
b += strideb;
}
}
+
+template<int bx, int by>
+void pixel_sub_ps_c(int16_t *a, intptr_t dstride, pixel *b0, pixel *b1, intptr_t sstride0, intptr_t sstride1)
+{
+ for (int y = 0; y < by; y++)
+ {
+ for (int x = 0; x < bx; x++)
+ {
+ a[x] = (int16_t)(b0[x] - b1[x]);
+ }
+
+ b0 += sstride0;
+ b1 += sstride1;
+ a += dstride;
+ }
+}
} // end anonymous namespace
namespace x265 {
@@ -821,12 +837,14 @@
#define CHROMA(W, H) \
p.chroma_copy_pp[CHROMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
p.chroma_copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
- p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>;
+ p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>;\
+ p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>;
#define LUMA(W, H) \
p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
- p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>;
+ p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>;\
+ p.luma_sub_ps[LUMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>;
LUMA(4, 4);
LUMA(8, 8);
diff -r 7a8118d07276 -r b1e0fe97bbfa source/common/primitives.h
--- a/source/common/primitives.h Tue Nov 12 17:06:34 2013 +0530
+++ b/source/common/primitives.h Tue Nov 12 19:10:23 2013 +0530
@@ -207,6 +207,8 @@
typedef void (*copy_sp_t)(pixel *dst, intptr_t dstStride, int16_t *src, intptr_t srcStride);
typedef void (*copy_ps_t)(int16_t *dst, intptr_t dstStride, pixel *src, intptr_t srcStride);
+typedef void (*pixel_sub_ps_t)(int16_t *dst, intptr_t dstride, pixel *src0, pixel *src1, intptr_t sstride0, intptr_t sstride1);
+
/* Define a structure containing function pointers to optimized encoder
* primitives. Each pointer can reference either an assembly routine,
* a vectorized primitive, or a C function. */
@@ -237,6 +239,9 @@
copy_ps_t luma_copy_ps[NUM_LUMA_PARTITIONS];
copy_ps_t chroma_copy_ps[NUM_CHROMA_PARTITIONS];
+ pixel_sub_ps_t luma_sub_ps[NUM_LUMA_PARTITIONS];
+ pixel_sub_ps_t chroma_sub_ps[NUM_CHROMA_PARTITIONS];
+
ipfilter_ps_t ipfilter_ps[NUM_IPFILTER_P_S];
ipfilter_sp_t ipfilter_sp[NUM_IPFILTER_S_P];
ipfilter_ss_t ipfilter_ss[NUM_IPFILTER_S_S];
diff -r 7a8118d07276 -r b1e0fe97bbfa source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Tue Nov 12 17:06:34 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp Tue Nov 12 19:10:23 2013 +0530
@@ -133,7 +133,8 @@
#define SETUP_CHROMA_FUNC_DEF(W, H, cpu) \
p.chroma_hpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
- p.chroma_vpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu;
+ p.chroma_vpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu;\
+ p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H ## cpu;
#define SETUP_CHROMA_BLOCKCOPY_FUNC_DEF(W, H, cpu) \
p.chroma_copy_pp[CHROMA_ ## W ## x ## H] = x265_blockcopy_pp_ ## W ## x ## H ## cpu;
@@ -194,7 +195,8 @@
p.luma_hpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_pp_ ## W ## x ## H ## cpu; \
p.luma_hps[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_ps_ ## W ## x ## H ## cpu; \
p.luma_vpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_pp_ ## W ## x ## H ## cpu; \
- p.luma_vps[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_ps_ ## W ## x ## H ## cpu;
+ p.luma_vps[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_ps_ ## W ## x ## H ## cpu;\
+ p.luma_sub_ps[LUMA_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H ## cpu;
#define SETUP_LUMA_BLOCKCOPY_FUNC_DEF(W, H, cpu) \
p.luma_copy_pp[LUMA_ ## W ## x ## H] = x265_blockcopy_pp_ ## W ## x ## H ## cpu;
diff -r 7a8118d07276 -r b1e0fe97bbfa source/common/x86/pixel.h
--- a/source/common/x86/pixel.h Tue Nov 12 17:06:34 2013 +0530
+++ b/source/common/x86/pixel.h Tue Nov 12 19:10:23 2013 +0530
@@ -266,11 +266,77 @@
DECL_ADS(2, avx2)
DECL_ADS(1, avx2)
+#define SETUP_CHROMA_PIXELSUB_PS_FUNC(W, H, cpu) \
+ void x265_pixel_sub_ps_ ## W ## x ## H ## cpu(int16_t *dest, intptr_t destride, pixel *src0, pixel *src1, intptr_t srcstride0, intptr_t srcstride1);
+
+#define CHROMA_PIXELSUB_DEF(cpu) \
+ SETUP_CHROMA_PIXELSUB_PS_FUNC(4, 4, cpu); \
+ SETUP_CHROMA_PIXELSUB_PS_FUNC(4, 2, cpu); \
+ SETUP_CHROMA_PIXELSUB_PS_FUNC(2, 4, cpu); \
+ SETUP_CHROMA_PIXELSUB_PS_FUNC(8, 8, cpu); \
+ SETUP_CHROMA_PIXELSUB_PS_FUNC(8, 4, cpu); \
+ SETUP_CHROMA_PIXELSUB_PS_FUNC(4, 8, cpu); \
+ SETUP_CHROMA_PIXELSUB_PS_FUNC(8, 6, cpu); \
+ SETUP_CHROMA_PIXELSUB_PS_FUNC(6, 8, cpu); \
+ SETUP_CHROMA_PIXELSUB_PS_FUNC(8, 2, cpu); \
+ SETUP_CHROMA_PIXELSUB_PS_FUNC(2, 8, cpu); \
+ SETUP_CHROMA_PIXELSUB_PS_FUNC(16, 16, cpu); \
+ SETUP_CHROMA_PIXELSUB_PS_FUNC(16, 8, cpu); \
+ SETUP_CHROMA_PIXELSUB_PS_FUNC(8, 16, cpu); \
+ SETUP_CHROMA_PIXELSUB_PS_FUNC(16, 12, cpu); \
+ SETUP_CHROMA_PIXELSUB_PS_FUNC(12, 16, cpu); \
+ SETUP_CHROMA_PIXELSUB_PS_FUNC(16, 4, cpu); \
+ SETUP_CHROMA_PIXELSUB_PS_FUNC(4, 16, cpu); \
+ SETUP_CHROMA_PIXELSUB_PS_FUNC(32, 32, cpu); \
+ SETUP_CHROMA_PIXELSUB_PS_FUNC(32, 16, cpu); \
+ SETUP_CHROMA_PIXELSUB_PS_FUNC(16, 32, cpu); \
+ SETUP_CHROMA_PIXELSUB_PS_FUNC(32, 24, cpu); \
+ SETUP_CHROMA_PIXELSUB_PS_FUNC(24, 32, cpu); \
+ SETUP_CHROMA_PIXELSUB_PS_FUNC(32, 8, cpu); \
+ SETUP_CHROMA_PIXELSUB_PS_FUNC(8, 32, cpu);
+
+#define SETUP_LUMA_PIXELSUB_PS_FUNC(W, H, cpu) \
+ void x265_pixel_sub_ps_ ## W ## x ## H ## cpu(int16_t *dest, intptr_t destride, pixel *src0, pixel *src1, intptr_t srcstride0, intptr_t srcstride1);
+
+#define LUMA_PIXELSUB_DEF(cpu) \
+ SETUP_LUMA_PIXELSUB_PS_FUNC(4, 4, cpu); \
+ SETUP_LUMA_PIXELSUB_PS_FUNC(8, 8, cpu); \
+ SETUP_LUMA_PIXELSUB_PS_FUNC(8, 4, cpu); \
+ SETUP_LUMA_PIXELSUB_PS_FUNC(4, 8, cpu); \
+ SETUP_LUMA_PIXELSUB_PS_FUNC(16, 16, cpu); \
+ SETUP_LUMA_PIXELSUB_PS_FUNC(16, 8, cpu); \
+ SETUP_LUMA_PIXELSUB_PS_FUNC(8, 16, cpu); \
+ SETUP_LUMA_PIXELSUB_PS_FUNC(16, 12, cpu); \
+ SETUP_LUMA_PIXELSUB_PS_FUNC(12, 16, cpu); \
+ SETUP_LUMA_PIXELSUB_PS_FUNC(16, 4, cpu); \
+ SETUP_LUMA_PIXELSUB_PS_FUNC(4, 16, cpu); \
+ SETUP_LUMA_PIXELSUB_PS_FUNC(32, 32, cpu); \
+ SETUP_LUMA_PIXELSUB_PS_FUNC(32, 16, cpu); \
+ SETUP_LUMA_PIXELSUB_PS_FUNC(16, 32, cpu); \
+ SETUP_LUMA_PIXELSUB_PS_FUNC(32, 24, cpu); \
+ SETUP_LUMA_PIXELSUB_PS_FUNC(24, 32, cpu); \
+ SETUP_LUMA_PIXELSUB_PS_FUNC(32, 8, cpu); \
+ SETUP_LUMA_PIXELSUB_PS_FUNC(8, 32, cpu); \
+ SETUP_LUMA_PIXELSUB_PS_FUNC(64, 64, cpu); \
+ SETUP_LUMA_PIXELSUB_PS_FUNC(64, 32, cpu); \
+ SETUP_LUMA_PIXELSUB_PS_FUNC(32, 64, cpu); \
+ SETUP_LUMA_PIXELSUB_PS_FUNC(64, 48, cpu); \
+ SETUP_LUMA_PIXELSUB_PS_FUNC(48, 64, cpu); \
+ SETUP_LUMA_PIXELSUB_PS_FUNC(64, 16, cpu); \
+ SETUP_LUMA_PIXELSUB_PS_FUNC(16, 64, cpu);
+
+CHROMA_PIXELSUB_DEF(_sse4);
+LUMA_PIXELSUB_DEF(_sse4);
+
#undef DECL_PIXELS
#undef DECL_SUF
#undef DECL_HEVC_SSD
#undef DECL_X1
#undef DECL_X4
#undef DECL_ADS
+#undef SETUP_CHROMA_PIXELSUB_PS_FUNC
+#undef SETUP_LUMA_PIXELSUB_PS_FUNC
+#undef CHROMA_PIXELSUB_DEF
+#undef LUMA_PIXELSUB_DEF
#endif // ifndef X265_I386_PIXEL_H
diff -r 7a8118d07276 -r b1e0fe97bbfa source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp Tue Nov 12 17:06:34 2013 +0530
+++ b/source/test/pixelharness.cpp Tue Nov 12 19:10:23 2013 +0530
@@ -586,6 +586,29 @@
return true;
}
+bool PixelHarness::check_pixel_sub_ps(pixel_sub_ps_t ref, pixel_sub_ps_t opt)
+{
+ ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
+ ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
+
+ memset(ref_dest, 0xCD, sizeof(ref_dest));
+ memset(opt_dest, 0xCD, sizeof(opt_dest));
+
+ int j = 0;
+ for (int i = 0; i < 1; i++)
+ {
+ opt(opt_dest, 64, pbuf2 + j, pbuf1 + j, STRIDE, STRIDE);
+ ref(ref_dest, 64, pbuf2 + j, pbuf1 + j, STRIDE, STRIDE);
+
+ if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
+ return false;
+
+ j += INCR;
+ }
+
+ return true;
+}
+
bool PixelHarness::testPartition(int part, const EncoderPrimitives& ref, const EncoderPrimitives& opt)
{
if (opt.satd[part])
@@ -722,6 +745,24 @@
return false;
}
}
+
+ if (opt.luma_sub_ps[part])
+ {
+ if (!check_pixel_sub_ps(ref.luma_sub_ps[part], opt.luma_sub_ps[part]))
+ {
+ printf("luma_sub_ps[%s] failed\n", lumaPartStr[part]);
+ return false;
+ }
+ }
+
+ if (opt.chroma_sub_ps[part])
+ {
+ if (!check_pixel_sub_ps(ref.chroma_sub_ps[part], opt.chroma_sub_ps[part]))
+ {
+ printf("chroma_sub_ps[%s] failed\n", chromaPartStr[part]);
+ return false;
+ }
+ }
return true;
}
@@ -968,6 +1009,18 @@
printf("ccpy_ps[%s]", chromaPartStr[part]);
REPORT_SPEEDUP(opt.chroma_copy_ps[part], ref.chroma_copy_ps[part], sbuf1, 64, pbuf1, 128);
}
+
+ if (opt.luma_sub_ps[part])
+ {
+ printf("luma_sub_ps[%s]", lumaPartStr[part]);
+ REPORT_SPEEDUP(opt.luma_sub_ps[part], ref.luma_sub_ps[part], (int16_t*)pbuf1, FENC_STRIDE, pbuf2, pbuf1, STRIDE, STRIDE);
+ }
+
+ if (opt.chroma_sub_ps[part])
+ {
+ printf("chroma_sub_ps[%s]", chromaPartStr[part]);
+ REPORT_SPEEDUP(opt.chroma_sub_ps[part], ref.chroma_sub_ps[part], (int16_t*)pbuf1, FENC_STRIDE, pbuf2, pbuf1, STRIDE, STRIDE);
+ }
}
void PixelHarness::measureSpeed(const EncoderPrimitives& ref, const EncoderPrimitives& opt)
diff -r 7a8118d07276 -r b1e0fe97bbfa source/test/pixelharness.h
--- a/source/test/pixelharness.h Tue Nov 12 17:06:34 2013 +0530
+++ b/source/test/pixelharness.h Tue Nov 12 19:10:23 2013 +0530
@@ -60,6 +60,8 @@
bool check_block_copy_ps(copy_ps_t ref, copy_ps_t opt);
bool check_blockfill_s(blockfill_s_t ref, blockfill_s_t opt);
+
+ bool check_pixel_sub_ps(pixel_sub_ps_t ref, pixel_sub_ps_t opt);
public:
PixelHarness();
More information about the x265-devel
mailing list