[x265] [PATCH Review Only] added cvt32to16_shr function to testbench
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Thu Oct 17 17:32:00 CEST 2013
# HG changeset patch
# User Dnyaneshwar Gorade <dnyaneshwar at multicorewareinc.com>
# Date 1382023822 -19800
# Thu Oct 17 21:00:22 2013 +0530
# Node ID 4dbd17ef69db91b5604f9c5cc6a4a62f15b91ab0
# Parent f6d04c660b9bb1b0cf6274faf514be77148aa312
added cvt32to16_shr function to testbench.
diff -r f6d04c660b9b -r 4dbd17ef69db source/Lib/TLibCommon/TComTrQuant.cpp
--- a/source/Lib/TLibCommon/TComTrQuant.cpp Thu Oct 17 20:34:48 2013 +0530
+++ b/source/Lib/TLibCommon/TComTrQuant.cpp Thu Oct 17 21:00:22 2013 +0530
@@ -493,7 +493,7 @@
transformSkipShift = shift;
for (j = 0; j < height; j++)
{
- primitives.cvt32to16_shr(&residual[j * stride], &coef[j * width], shift, width);
+ primitives.cvt32to16_shr(residual, coef, stride, shift, width);
}
}
else
diff -r f6d04c660b9b -r 4dbd17ef69db source/common/pixel.cpp
--- a/source/common/pixel.cpp Thu Oct 17 20:34:48 2013 +0530
+++ b/source/common/pixel.cpp Thu Oct 17 21:00:22 2013 +0530
@@ -439,13 +439,18 @@
}
}
-void convert32to16_shr(short *dst, int *src, int shift, int num)
+void convert32to16_shr(short *dst, int *src, intptr_t stride, int shift, int size)
{
int round = 1 << (shift - 1);
- for (int i = 0; i < num; i++)
+ for (int i = 0; i < size; i++)
{
- dst[i] = (short)((src[i] + round) >> shift);
+ for (int j = 0; j < size; j++)
+ {
+ dst[j] = (short)((src[j] + round) >> shift);
+ }
+ src += size;
+ dst += stride;
}
}
diff -r f6d04c660b9b -r 4dbd17ef69db source/common/primitives.h
--- a/source/common/primitives.h Thu Oct 17 20:34:48 2013 +0530
+++ b/source/common/primitives.h Thu Oct 17 21:00:22 2013 +0530
@@ -179,7 +179,7 @@
typedef void (*cvt16to32_shl_t)(int *dst, short *src, intptr_t, int, int);
typedef void (*cvt16to16_shl_t)(short *dst, short *src, int, int, intptr_t, int);
-typedef void (*cvt32to16_shr_t)(short *dst, int *src, int, int);
+typedef void (*cvt32to16_shr_t)(short *dst, int *src, intptr_t, int, int);
typedef void (*dct_t)(short *src, int *dst, intptr_t stride);
typedef void (*idct_t)(int *src, short *dst, intptr_t stride);
diff -r f6d04c660b9b -r 4dbd17ef69db source/common/vec/pixel-sse3.cpp
--- a/source/common/vec/pixel-sse3.cpp Thu Oct 17 20:34:48 2013 +0530
+++ b/source/common/vec/pixel-sse3.cpp Thu Oct 17 21:00:22 2013 +0530
@@ -31,23 +31,25 @@
using namespace x265;
namespace {
-void convert32to16_shr(short *dst, int *org, int shift, int num)
+void convert32to16_shr(short *dst, int *org, intptr_t stride, int shift, int size)
{
- int i;
+ int i, j;
__m128i round = _mm_set1_epi32(1 << (shift - 1));
- for (i = 0; i < num; i += 4)
+ for (i = 0; i < size; i++)
{
- __m128i im32;
- __m128i im16;
-
- im32 = _mm_loadu_si128((__m128i const*)org);
- im32 = _mm_sra_epi32(_mm_add_epi32(im32, round), _mm_cvtsi32_si128(shift));
- im16 = _mm_packs_epi32(im32, im32);
- _mm_storeu_si128((__m128i*)dst, im16);
-
- org += 4;
- dst += 4;
+ for (j = 0; j < size; j += 4)
+ {
+ __m128i im32;
+ __m128i im16;
+
+ im32 = _mm_loadu_si128((__m128i const*)(org + j));
+ im32 = _mm_sra_epi32(_mm_add_epi32(im32, round), _mm_cvtsi32_si128(shift));
+ im16 = _mm_packs_epi32(im32, im32);
+ _mm_storel_epi64((__m128i*)(dst + j), im16);
+ }
+ org += size;
+ dst += stride;
}
}
@@ -639,6 +641,7 @@
//p.cvt32to16_shr = convert32to16_shr;
p.cvt16to32_shl = convert16to32_shl;
p.cvt16to16_shl = convert16to16_shl;
+ p.cvt32to16_shr = convert32to16_shr;
#if !HIGH_BIT_DEPTH
p.transpose[0] = transpose4;
diff -r f6d04c660b9b -r 4dbd17ef69db source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp Thu Oct 17 20:34:48 2013 +0530
+++ b/source/test/pixelharness.cpp Thu Oct 17 21:00:22 2013 +0530
@@ -494,6 +494,39 @@
return true;
}
+bool PixelHarness::check_cvt32to16_shr_t(cvt32to16_shr_t ref, cvt32to16_shr_t opt)
+{
+ int bufsize = STRIDE * STRIDE;
+ int* src = (int*)X265_MALLOC(int, bufsize);
+
+ int shift = (rand() % 7 + 1);
+
+ if (!src)
+ {
+ fprintf(stderr, "malloc failed, unable to initiate tests!\n");
+ exit(1);
+ }
+
+ for (int i = 0; i < bufsize; i++)
+ {
+ src[i] = (rand() & (2 * SHORT_MAX + 1)) - SHORT_MAX - 1;
+ }
+
+ ALIGN_VAR_16(short, ref_dest[64 * 64]);
+ ALIGN_VAR_16(short, opt_dest[64 * 64]);
+
+ memset(ref_dest, 0, 64 * 64 * sizeof(short));
+ memset(opt_dest, 0, 64 * 64 * sizeof(short));
+
+ opt(opt_dest, src, STRIDE, shift, STRIDE);
+ ref(ref_dest, src, STRIDE, shift, STRIDE);
+
+ if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(short)))
+ return false;
+
+ return true;
+}
+
bool PixelHarness::check_pixelavg_pp(pixelavg_pp_t ref, pixelavg_pp_t opt)
{
ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
@@ -665,7 +698,14 @@
}
}
}
-
+ if(opt.cvt32to16_shr)
+ {
+ if (!check_cvt32to16_shr_t(ref.cvt32to16_shr, opt.cvt32to16_shr))
+ {
+ printf("cvt32to16 failed!\n");
+ return false;
+ }
+ }
if (opt.blockcpy_pp)
{
if (!check_block_copy(ref.blockcpy_pp, opt.blockcpy_pp))
diff -r f6d04c660b9b -r 4dbd17ef69db source/test/pixelharness.h
--- a/source/test/pixelharness.h Thu Oct 17 20:34:48 2013 +0530
+++ b/source/test/pixelharness.h Thu Oct 17 21:00:22 2013 +0530
@@ -53,6 +53,7 @@
bool check_pixeladd_pp(pixeladd_pp_t ref, pixeladd_pp_t opt);
bool check_downscale_t(downscale_t ref, downscale_t opt);
bool check_pixelavg_pp(pixelavg_pp_t ref, pixelavg_pp_t opt);
+ bool check_cvt32to16_shr_t(cvt32to16_shr_t ref, cvt32to16_shr_t opt);
public:
More information about the x265-devel
mailing list