[x265] [PATCH] asm: Unit test code for pixelsub_ps function

murugan at multicorewareinc.com murugan at multicorewareinc.com
Tue Nov 12 14:41:10 CET 2013


# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1384263623 -19800
#      Tue Nov 12 19:10:23 2013 +0530
# Node ID b1e0fe97bbfa7bf367d7318f057690c64f1f1f19
# Parent  7a8118d07276312b2971b292d689805074abd28a
asm: Unit test code for pixelsub_ps function

diff -r 7a8118d07276 -r b1e0fe97bbfa source/common/pixel.cpp
--- a/source/common/pixel.cpp	Tue Nov 12 17:06:34 2013 +0530
+++ b/source/common/pixel.cpp	Tue Nov 12 19:10:23 2013 +0530
@@ -778,6 +778,22 @@
         b += strideb;
     }
 }
+
+template<int bx, int by>
+void pixel_sub_ps_c(int16_t *a, intptr_t dstride, pixel *b0, pixel *b1, intptr_t sstride0, intptr_t sstride1)
+{
+    for (int y = 0; y < by; y++)
+    {
+        for (int x = 0; x < bx; x++)
+        {
+            a[x] = (int16_t)(b0[x] - b1[x]);
+        }
+
+        b0 += sstride0;
+        b1 += sstride1;
+        a += dstride;
+    }
+}
 }  // end anonymous namespace
 
 namespace x265 {
@@ -821,12 +837,14 @@
 #define CHROMA(W, H) \
     p.chroma_copy_pp[CHROMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
     p.chroma_copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
-    p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>;
+    p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>;\
+    p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>;
 
 #define LUMA(W, H) \
     p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
     p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
-    p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>;
+    p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>;\
+    p.luma_sub_ps[LUMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>;
 
     LUMA(4, 4);
     LUMA(8, 8);
diff -r 7a8118d07276 -r b1e0fe97bbfa source/common/primitives.h
--- a/source/common/primitives.h	Tue Nov 12 17:06:34 2013 +0530
+++ b/source/common/primitives.h	Tue Nov 12 19:10:23 2013 +0530
@@ -207,6 +207,8 @@
 typedef void (*copy_sp_t)(pixel *dst, intptr_t dstStride, int16_t *src, intptr_t srcStride);
 typedef void (*copy_ps_t)(int16_t *dst, intptr_t dstStride, pixel *src, intptr_t srcStride);
 
+typedef void (*pixel_sub_ps_t)(int16_t *dst, intptr_t dstride, pixel *src0, pixel *src1, intptr_t sstride0, intptr_t sstride1);
+
 /* Define a structure containing function pointers to optimized encoder
  * primitives.  Each pointer can reference either an assembly routine,
  * a vectorized primitive, or a C function. */
@@ -237,6 +239,9 @@
     copy_ps_t       luma_copy_ps[NUM_LUMA_PARTITIONS];
     copy_ps_t       chroma_copy_ps[NUM_CHROMA_PARTITIONS];
 
+    pixel_sub_ps_t  luma_sub_ps[NUM_LUMA_PARTITIONS];
+    pixel_sub_ps_t  chroma_sub_ps[NUM_CHROMA_PARTITIONS];
+
     ipfilter_ps_t   ipfilter_ps[NUM_IPFILTER_P_S];
     ipfilter_sp_t   ipfilter_sp[NUM_IPFILTER_S_P];
     ipfilter_ss_t   ipfilter_ss[NUM_IPFILTER_S_S];
diff -r 7a8118d07276 -r b1e0fe97bbfa source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Tue Nov 12 17:06:34 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp	Tue Nov 12 19:10:23 2013 +0530
@@ -133,7 +133,8 @@
 
 #define SETUP_CHROMA_FUNC_DEF(W, H, cpu) \
     p.chroma_hpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
-    p.chroma_vpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu;
+    p.chroma_vpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## H ## cpu;\
+    p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H ## cpu;
 
 #define SETUP_CHROMA_BLOCKCOPY_FUNC_DEF(W, H, cpu) \
     p.chroma_copy_pp[CHROMA_ ## W ## x ## H] = x265_blockcopy_pp_ ## W ## x ## H ## cpu;
@@ -194,7 +195,8 @@
     p.luma_hpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_pp_ ## W ## x ## H ## cpu; \
     p.luma_hps[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_ps_ ## W ## x ## H ## cpu; \
     p.luma_vpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_pp_ ## W ## x ## H ## cpu; \
-    p.luma_vps[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_ps_ ## W ## x ## H ## cpu;
+    p.luma_vps[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_ps_ ## W ## x ## H ## cpu;\
+    p.luma_sub_ps[LUMA_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H ## cpu;
 
 #define SETUP_LUMA_BLOCKCOPY_FUNC_DEF(W, H, cpu) \
     p.luma_copy_pp[LUMA_ ## W ## x ## H] = x265_blockcopy_pp_ ## W ## x ## H ## cpu;
diff -r 7a8118d07276 -r b1e0fe97bbfa source/common/x86/pixel.h
--- a/source/common/x86/pixel.h	Tue Nov 12 17:06:34 2013 +0530
+++ b/source/common/x86/pixel.h	Tue Nov 12 19:10:23 2013 +0530
@@ -266,11 +266,77 @@
 DECL_ADS(2, avx2)
 DECL_ADS(1, avx2)
 
+#define SETUP_CHROMA_PIXELSUB_PS_FUNC(W, H, cpu) \
+    void x265_pixel_sub_ps_ ## W ## x ## H ## cpu(int16_t *dest, intptr_t destride, pixel *src0, pixel *src1, intptr_t srcstride0, intptr_t srcstride1);
+
+#define CHROMA_PIXELSUB_DEF(cpu) \
+    SETUP_CHROMA_PIXELSUB_PS_FUNC(4, 4, cpu); \
+    SETUP_CHROMA_PIXELSUB_PS_FUNC(4, 2, cpu); \
+    SETUP_CHROMA_PIXELSUB_PS_FUNC(2, 4, cpu); \
+    SETUP_CHROMA_PIXELSUB_PS_FUNC(8, 8, cpu); \
+    SETUP_CHROMA_PIXELSUB_PS_FUNC(8, 4, cpu); \
+    SETUP_CHROMA_PIXELSUB_PS_FUNC(4, 8, cpu); \
+    SETUP_CHROMA_PIXELSUB_PS_FUNC(8, 6, cpu); \
+    SETUP_CHROMA_PIXELSUB_PS_FUNC(6, 8, cpu); \
+    SETUP_CHROMA_PIXELSUB_PS_FUNC(8, 2, cpu); \
+    SETUP_CHROMA_PIXELSUB_PS_FUNC(2, 8, cpu); \
+    SETUP_CHROMA_PIXELSUB_PS_FUNC(16, 16, cpu); \
+    SETUP_CHROMA_PIXELSUB_PS_FUNC(16, 8, cpu); \
+    SETUP_CHROMA_PIXELSUB_PS_FUNC(8, 16, cpu); \
+    SETUP_CHROMA_PIXELSUB_PS_FUNC(16, 12, cpu); \
+    SETUP_CHROMA_PIXELSUB_PS_FUNC(12, 16, cpu); \
+    SETUP_CHROMA_PIXELSUB_PS_FUNC(16, 4, cpu); \
+    SETUP_CHROMA_PIXELSUB_PS_FUNC(4, 16, cpu); \
+    SETUP_CHROMA_PIXELSUB_PS_FUNC(32, 32, cpu); \
+    SETUP_CHROMA_PIXELSUB_PS_FUNC(32, 16, cpu); \
+    SETUP_CHROMA_PIXELSUB_PS_FUNC(16, 32, cpu); \
+    SETUP_CHROMA_PIXELSUB_PS_FUNC(32, 24, cpu); \
+    SETUP_CHROMA_PIXELSUB_PS_FUNC(24, 32, cpu); \
+    SETUP_CHROMA_PIXELSUB_PS_FUNC(32, 8, cpu); \
+    SETUP_CHROMA_PIXELSUB_PS_FUNC(8, 32, cpu);
+
+#define SETUP_LUMA_PIXELSUB_PS_FUNC(W, H, cpu) \
+    void x265_pixel_sub_ps_ ## W ## x ## H ## cpu(int16_t *dest, intptr_t destride, pixel *src0, pixel *src1, intptr_t srcstride0, intptr_t srcstride1);
+
+#define LUMA_PIXELSUB_DEF(cpu) \
+    SETUP_LUMA_PIXELSUB_PS_FUNC(4,   4, cpu); \
+    SETUP_LUMA_PIXELSUB_PS_FUNC(8,   8, cpu); \
+    SETUP_LUMA_PIXELSUB_PS_FUNC(8,   4, cpu); \
+    SETUP_LUMA_PIXELSUB_PS_FUNC(4,   8, cpu); \
+    SETUP_LUMA_PIXELSUB_PS_FUNC(16, 16, cpu); \
+    SETUP_LUMA_PIXELSUB_PS_FUNC(16,  8, cpu); \
+    SETUP_LUMA_PIXELSUB_PS_FUNC(8,  16, cpu); \
+    SETUP_LUMA_PIXELSUB_PS_FUNC(16, 12, cpu); \
+    SETUP_LUMA_PIXELSUB_PS_FUNC(12, 16, cpu); \
+    SETUP_LUMA_PIXELSUB_PS_FUNC(16,  4, cpu); \
+    SETUP_LUMA_PIXELSUB_PS_FUNC(4,  16, cpu); \
+    SETUP_LUMA_PIXELSUB_PS_FUNC(32, 32, cpu); \
+    SETUP_LUMA_PIXELSUB_PS_FUNC(32, 16, cpu); \
+    SETUP_LUMA_PIXELSUB_PS_FUNC(16, 32, cpu); \
+    SETUP_LUMA_PIXELSUB_PS_FUNC(32, 24, cpu); \
+    SETUP_LUMA_PIXELSUB_PS_FUNC(24, 32, cpu); \
+    SETUP_LUMA_PIXELSUB_PS_FUNC(32,  8, cpu); \
+    SETUP_LUMA_PIXELSUB_PS_FUNC(8,  32, cpu); \
+    SETUP_LUMA_PIXELSUB_PS_FUNC(64, 64, cpu); \
+    SETUP_LUMA_PIXELSUB_PS_FUNC(64, 32, cpu); \
+    SETUP_LUMA_PIXELSUB_PS_FUNC(32, 64, cpu); \
+    SETUP_LUMA_PIXELSUB_PS_FUNC(64, 48, cpu); \
+    SETUP_LUMA_PIXELSUB_PS_FUNC(48, 64, cpu); \
+    SETUP_LUMA_PIXELSUB_PS_FUNC(64, 16, cpu); \
+    SETUP_LUMA_PIXELSUB_PS_FUNC(16, 64, cpu);
+
+CHROMA_PIXELSUB_DEF(_sse4);
+LUMA_PIXELSUB_DEF(_sse4);
+
 #undef DECL_PIXELS
 #undef DECL_SUF
 #undef DECL_HEVC_SSD
 #undef DECL_X1
 #undef DECL_X4
 #undef DECL_ADS
+#undef SETUP_CHROMA_PIXELSUB_PS_FUNC
+#undef SETUP_LUMA_PIXELSUB_PS_FUNC
+#undef CHROMA_PIXELSUB_DEF
+#undef LUMA_PIXELSUB_DEF
 
 #endif // ifndef X265_I386_PIXEL_H
diff -r 7a8118d07276 -r b1e0fe97bbfa source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp	Tue Nov 12 17:06:34 2013 +0530
+++ b/source/test/pixelharness.cpp	Tue Nov 12 19:10:23 2013 +0530
@@ -586,6 +586,29 @@
     return true;
 }
 
+bool PixelHarness::check_pixel_sub_ps(pixel_sub_ps_t ref, pixel_sub_ps_t opt)
+{
+    ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
+    ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
+
+    memset(ref_dest, 0xCD, sizeof(ref_dest));
+    memset(opt_dest, 0xCD, sizeof(opt_dest));
+
+    int j = 0;
+    for (int i = 0; i < 1; i++)
+    {
+        opt(opt_dest, 64, pbuf2 + j, pbuf1 + j, STRIDE, STRIDE);
+        ref(ref_dest, 64, pbuf2 + j, pbuf1 + j, STRIDE, STRIDE);
+
+        if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
+            return false;
+
+        j += INCR;
+    }
+
+    return true;
+}
+
 bool PixelHarness::testPartition(int part, const EncoderPrimitives& ref, const EncoderPrimitives& opt)
 {
     if (opt.satd[part])
@@ -722,6 +745,24 @@
             return false;
         }
     }
+
+    if (opt.luma_sub_ps[part])
+    {
+        if (!check_pixel_sub_ps(ref.luma_sub_ps[part], opt.luma_sub_ps[part]))
+        {
+            printf("luma_sub_ps[%s] failed\n", lumaPartStr[part]);
+            return false;
+        }
+    }
+
+    if (opt.chroma_sub_ps[part])
+    {
+        if (!check_pixel_sub_ps(ref.chroma_sub_ps[part], opt.chroma_sub_ps[part]))
+        {
+            printf("chroma_sub_ps[%s] failed\n", chromaPartStr[part]);
+            return false;
+        }
+    }
     return true;
 }
 
@@ -968,6 +1009,18 @@
         printf("ccpy_ps[%s]", chromaPartStr[part]);
         REPORT_SPEEDUP(opt.chroma_copy_ps[part], ref.chroma_copy_ps[part], sbuf1, 64, pbuf1, 128);
     }
+
+    if (opt.luma_sub_ps[part])
+    {
+        printf("luma_sub_ps[%s]", lumaPartStr[part]);
+        REPORT_SPEEDUP(opt.luma_sub_ps[part], ref.luma_sub_ps[part], (int16_t*)pbuf1, FENC_STRIDE, pbuf2, pbuf1, STRIDE, STRIDE);
+    }
+
+    if (opt.chroma_sub_ps[part])
+    {
+        printf("chroma_sub_ps[%s]", chromaPartStr[part]);
+        REPORT_SPEEDUP(opt.chroma_sub_ps[part], ref.chroma_sub_ps[part], (int16_t*)pbuf1, FENC_STRIDE, pbuf2, pbuf1, STRIDE, STRIDE);
+    }
 }
 
 void PixelHarness::measureSpeed(const EncoderPrimitives& ref, const EncoderPrimitives& opt)
diff -r 7a8118d07276 -r b1e0fe97bbfa source/test/pixelharness.h
--- a/source/test/pixelharness.h	Tue Nov 12 17:06:34 2013 +0530
+++ b/source/test/pixelharness.h	Tue Nov 12 19:10:23 2013 +0530
@@ -60,6 +60,8 @@
     bool check_block_copy_ps(copy_ps_t ref, copy_ps_t opt);
 
     bool check_blockfill_s(blockfill_s_t ref, blockfill_s_t opt);
+
+    bool check_pixel_sub_ps(pixel_sub_ps_t ref, pixel_sub_ps_t opt);
 public:
 
     PixelHarness();


More information about the x265-devel mailing list