[x265] [PATCH] added cvt32to16_shr_sse2 function to testbench

dnyaneshwar at multicorewareinc.com dnyaneshwar at multicorewareinc.com
Fri Oct 18 10:49:12 CEST 2013


# HG changeset patch
# User Dnyaneshwar Gorade <dnyaneshwar at multicorewareinc.com>
# Date 1382086085 -19800
#      Fri Oct 18 14:18:05 2013 +0530
# Node ID 6d9bd6b6209e45cb49da804b23ad78424914b323
# Parent  d6d7187c5f4ea0978ebbddc1a559cea3712bf345
added cvt32to16_shr_sse2 function to testbench.
Speed up measured is almost 14x.

diff -r d6d7187c5f4e -r 6d9bd6b6209e source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp	Fri Oct 18 00:42:36 2013 -0500
+++ b/source/test/pixelharness.cpp	Fri Oct 18 14:18:05 2013 +0530
@@ -45,10 +45,12 @@
     pbuf3 = (pixel*)X265_MALLOC(pixel, bufsize);
     pbuf4 = (pixel*)X265_MALLOC(pixel, bufsize);
 
+    ibuf1 = (int*)X265_MALLOC(int, bufsize);
+
     sbuf1 = (short*)X265_MALLOC(short, bufsize);
     sbuf2 = (short*)X265_MALLOC(short, bufsize);
 
-    if (!pbuf1 || !pbuf2 || !pbuf3 || !pbuf4 || !sbuf1 || !sbuf2)
+    if (!pbuf1 || !pbuf2 || !pbuf3 || !pbuf4 || !sbuf1 || !sbuf2 || !ibuf1)
     {
         fprintf(stderr, "malloc failed, unable to initiate tests!\n");
         exit(1);
@@ -63,6 +65,8 @@
 
         sbuf1[i] = (rand() & (2 * SHORT_MAX + 1)) - SHORT_MAX - 1; //max(SHORT_MIN, min(rand(), SHORT_MAX));
         sbuf2[i] = (rand() & (2 * SHORT_MAX + 1)) - SHORT_MAX - 1; //max(SHORT_MIN, min(rand(), SHORT_MAX));
+
+        ibuf1[i] = (rand() & (2 * SHORT_MAX + 1)) - SHORT_MAX - 1;
     }
 }
 
@@ -481,6 +485,22 @@
     return true;
 }
 
+bool PixelHarness::check_cvt32to16_shr_t(cvt32to16_shr_t ref, cvt32to16_shr_t opt)
+{
+    int shift = (rand() % 7 + 1);
+
+    ALIGN_VAR_16(short, ref_dest[64 * 64]);
+    ALIGN_VAR_16(short, opt_dest[64 * 64]);
+
+    opt(opt_dest, ibuf1, STRIDE, shift, STRIDE);
+    ref(ref_dest, ibuf1, STRIDE, shift, STRIDE);
+
+        if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(short)))
+            return false;
+
+        return true;
+}
+
 bool PixelHarness::testPartition(int part, const EncoderPrimitives& ref, const EncoderPrimitives& opt)
 {
     if (opt.satd[part])
@@ -615,6 +635,15 @@
         }
     }
 
+    if (opt.cvt32to16_shr)
+    {
+        if (!check_cvt32to16_shr_t(ref.cvt32to16_shr, opt.cvt32to16_shr))
+        {
+            printf("cvt32to16 failed!\n");
+            return false;
+        }
+    }
+
     if (opt.blockcpy_pp)
     {
         if (!check_block_copy(ref.blockcpy_pp, opt.blockcpy_pp))
@@ -810,6 +839,12 @@
         }
     }
 
+    if (opt.cvt32to16_shr)
+    {
+        printf("cvt32to16 conversion");
+        REPORT_SPEEDUP(opt.cvt32to16_shr, ref.cvt32to16_shr, sbuf1, ibuf1, 64, 5, 64);
+    }
+
     if (opt.blockcpy_pp)
     {
         printf("block cpy");
diff -r d6d7187c5f4e -r 6d9bd6b6209e source/test/pixelharness.h
--- a/source/test/pixelharness.h	Fri Oct 18 00:42:36 2013 -0500
+++ b/source/test/pixelharness.h	Fri Oct 18 14:18:05 2013 +0530
@@ -33,6 +33,8 @@
 
     pixel *pbuf1, *pbuf2, *pbuf3, *pbuf4;
 
+    int *ibuf1;
+
     short *sbuf1, *sbuf2;
 
     bool check_pixelcmp(pixelcmp_t ref, pixelcmp_t opt);
@@ -52,6 +54,7 @@
     bool check_pixeladd_ss(pixeladd_ss_t ref, pixeladd_ss_t opt);
     bool check_pixeladd_pp(pixeladd_pp_t ref, pixeladd_pp_t opt);
     bool check_downscale_t(downscale_t ref, downscale_t opt);
+    bool check_cvt32to16_shr_t(cvt32to16_shr_t ref, cvt32to16_shr_t opt);
 
 public:
 


More information about the x265-devel mailing list