[x265] [PATCH Review Only] added cvt32to16_shr function to testbench

Thu Oct 17 17:32:00 CEST 2013

# HG changeset patch
# User Dnyaneshwar Gorade <dnyaneshwar at multicorewareinc.com>
# Date 1382023822 -19800
#      Thu Oct 17 21:00:22 2013 +0530
# Node ID 4dbd17ef69db91b5604f9c5cc6a4a62f15b91ab0
# Parent  f6d04c660b9bb1b0cf6274faf514be77148aa312
added cvt32to16_shr function to testbench.

diff -r f6d04c660b9b -r 4dbd17ef69db source/Lib/TLibCommon/TComTrQuant.cpp

--- a/source/Lib/TLibCommon/TComTrQuant.cpp	Thu Oct 17 20:34:48 2013 +0530
+++ b/source/Lib/TLibCommon/TComTrQuant.cpp	Thu Oct 17 21:00:22 2013 +0530
@@ -493,7 +493,7 @@
         transformSkipShift = shift;
         for (j = 0; j < height; j++)
         {
-            primitives.cvt32to16_shr(&residual[j * stride], &coef[j * width], shift, width);
+            primitives.cvt32to16_shr(residual, coef, stride, shift, width);
         }
     }
     else
diff -r f6d04c660b9b -r 4dbd17ef69db source/common/pixel.cpp
--- a/source/common/pixel.cpp	Thu Oct 17 20:34:48 2013 +0530
+++ b/source/common/pixel.cpp	Thu Oct 17 21:00:22 2013 +0530
@@ -439,13 +439,18 @@
     }
 }
 
-void convert32to16_shr(short *dst, int *src, int shift, int num)
+void convert32to16_shr(short *dst, int *src, intptr_t stride, int shift, int size)
 {
     int round = 1 << (shift - 1);
 
-    for (int i = 0; i < num; i++)
+    for (int i = 0; i < size; i++)
     {
-        dst[i] = (short)((src[i] + round) >> shift);
+        for (int j = 0; j < size; j++)
+        {
+            dst[j] = (short)((src[j] + round) >> shift);
+        }
+        src += size;
+        dst += stride;
     }
 }
 
diff -r f6d04c660b9b -r 4dbd17ef69db source/common/primitives.h
--- a/source/common/primitives.h	Thu Oct 17 20:34:48 2013 +0530
+++ b/source/common/primitives.h	Thu Oct 17 21:00:22 2013 +0530
@@ -179,7 +179,7 @@
 
 typedef void (*cvt16to32_shl_t)(int *dst, short *src, intptr_t, int, int);
 typedef void (*cvt16to16_shl_t)(short *dst, short *src, int, int, intptr_t, int);
-typedef void (*cvt32to16_shr_t)(short *dst, int *src, int, int);
+typedef void (*cvt32to16_shr_t)(short *dst, int *src, intptr_t, int, int);
 
 typedef void (*dct_t)(short *src, int *dst, intptr_t stride);
 typedef void (*idct_t)(int *src, short *dst, intptr_t stride);
diff -r f6d04c660b9b -r 4dbd17ef69db source/common/vec/pixel-sse3.cpp
--- a/source/common/vec/pixel-sse3.cpp	Thu Oct 17 20:34:48 2013 +0530
+++ b/source/common/vec/pixel-sse3.cpp	Thu Oct 17 21:00:22 2013 +0530
@@ -31,23 +31,25 @@
 using namespace x265;
 
 namespace {
-void convert32to16_shr(short *dst, int *org, int shift, int num)
+void convert32to16_shr(short *dst, int *org, intptr_t stride, int shift, int size)
 {
-    int i;
+    int i, j;
     __m128i round = _mm_set1_epi32(1 << (shift - 1));
 
-    for (i = 0; i < num; i += 4)
+    for (i = 0; i < size; i++)
     {
-        __m128i im32;
-        __m128i im16;
-
-        im32 = _mm_loadu_si128((__m128i const*)org);
-        im32 = _mm_sra_epi32(_mm_add_epi32(im32, round), _mm_cvtsi32_si128(shift));
-        im16 = _mm_packs_epi32(im32, im32);
-        _mm_storeu_si128((__m128i*)dst, im16);
-
-        org += 4;
-        dst += 4;
+        for (j = 0; j < size; j += 4)
+        {
+            __m128i im32;
+            __m128i im16;
+ 
+            im32 = _mm_loadu_si128((__m128i const*)(org + j));
+            im32 = _mm_sra_epi32(_mm_add_epi32(im32, round), _mm_cvtsi32_si128(shift));
+            im16 = _mm_packs_epi32(im32, im32);
+            _mm_storel_epi64((__m128i*)(dst + j), im16);
+        }
+        org += size;
+        dst += stride;
     }
 }
 
@@ -639,6 +641,7 @@
     //p.cvt32to16_shr = convert32to16_shr;
     p.cvt16to32_shl = convert16to32_shl;
     p.cvt16to16_shl = convert16to16_shl;
+    p.cvt32to16_shr = convert32to16_shr;
 
 #if !HIGH_BIT_DEPTH
     p.transpose[0] = transpose4;
diff -r f6d04c660b9b -r 4dbd17ef69db source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp	Thu Oct 17 20:34:48 2013 +0530
+++ b/source/test/pixelharness.cpp	Thu Oct 17 21:00:22 2013 +0530
@@ -494,6 +494,39 @@
     return true;
 }
 
+bool PixelHarness::check_cvt32to16_shr_t(cvt32to16_shr_t ref, cvt32to16_shr_t opt)
+{
+    int bufsize = STRIDE * STRIDE;
+    int* src = (int*)X265_MALLOC(int, bufsize);
+
+    int  shift =  (rand() % 7 + 1);
+
+    if (!src)
+    {
+        fprintf(stderr, "malloc failed, unable to initiate tests!\n");
+        exit(1);
+    }
+
+    for (int i = 0; i < bufsize; i++)
+    {
+        src[i] = (rand() & (2 * SHORT_MAX + 1)) - SHORT_MAX - 1;
+    }
+
+    ALIGN_VAR_16(short, ref_dest[64 * 64]);
+    ALIGN_VAR_16(short, opt_dest[64 * 64]);
+
+    memset(ref_dest, 0, 64 * 64 * sizeof(short));
+    memset(opt_dest, 0, 64 * 64 * sizeof(short));
+
+    opt(opt_dest, src, STRIDE, shift, STRIDE);
+    ref(ref_dest, src, STRIDE, shift, STRIDE);
+
+        if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(short)))
+            return false;
+
+        return true;
+}
+
 bool PixelHarness::check_pixelavg_pp(pixelavg_pp_t ref, pixelavg_pp_t opt)
 {
     ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
@@ -665,7 +698,14 @@
             }
         }
     }
-
+    if(opt.cvt32to16_shr)
+    {
+        if (!check_cvt32to16_shr_t(ref.cvt32to16_shr, opt.cvt32to16_shr))
+        {
+            printf("cvt32to16 failed!\n");
+            return false;
+        }
+    }
     if (opt.blockcpy_pp)
     {
         if (!check_block_copy(ref.blockcpy_pp, opt.blockcpy_pp))
diff -r f6d04c660b9b -r 4dbd17ef69db source/test/pixelharness.h
--- a/source/test/pixelharness.h	Thu Oct 17 20:34:48 2013 +0530
+++ b/source/test/pixelharness.h	Thu Oct 17 21:00:22 2013 +0530
@@ -53,6 +53,7 @@
     bool check_pixeladd_pp(pixeladd_pp_t ref, pixeladd_pp_t opt);
     bool check_downscale_t(downscale_t ref, downscale_t opt);
     bool check_pixelavg_pp(pixelavg_pp_t ref, pixelavg_pp_t opt);
+    bool check_cvt32to16_shr_t(cvt32to16_shr_t ref, cvt32to16_shr_t opt);
 
 public: