[x265] [PATCH Review Only] added cvt32to16_shr function to testbench
Steve Borho
steve at borho.org
Thu Oct 17 20:46:58 CEST 2013
On Thu, Oct 17, 2013 at 10:32 AM, <dnyaneshwar at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Dnyaneshwar Gorade <dnyaneshwar at multicorewareinc.com>
> # Date 1382023822 -19800
> # Thu Oct 17 21:00:22 2013 +0530
> # Node ID 4dbd17ef69db91b5604f9c5cc6a4a62f15b91ab0
> # Parent f6d04c660b9bb1b0cf6274faf514be77148aa312
> added cvt32to16_shr function to testbench.
>
> diff -r f6d04c660b9b -r 4dbd17ef69db source/Lib/TLibCommon/TComTrQuant.cpp
> --- a/source/Lib/TLibCommon/TComTrQuant.cpp Thu Oct 17 20:34:48 2013
> +0530
> +++ b/source/Lib/TLibCommon/TComTrQuant.cpp Thu Oct 17 21:00:22 2013
> +0530
> @@ -493,7 +493,7 @@
> transformSkipShift = shift;
> for (j = 0; j < height; j++)
> {
> - primitives.cvt32to16_shr(&residual[j * stride], &coef[j *
> width], shift, width);
> + primitives.cvt32to16_shr(residual, coef, stride, shift,
> width);
>
you should explain in the commit message why a stride argument is now
required
> }
> }
> else
> diff -r f6d04c660b9b -r 4dbd17ef69db source/common/pixel.cpp
> --- a/source/common/pixel.cpp Thu Oct 17 20:34:48 2013 +0530
> +++ b/source/common/pixel.cpp Thu Oct 17 21:00:22 2013 +0530
> @@ -439,13 +439,18 @@
> }
> }
>
> -void convert32to16_shr(short *dst, int *src, int shift, int num)
> +void convert32to16_shr(short *dst, int *src, intptr_t stride, int shift,
> int size)
> {
> int round = 1 << (shift - 1);
>
> - for (int i = 0; i < num; i++)
> + for (int i = 0; i < size; i++)
> {
> - dst[i] = (short)((src[i] + round) >> shift);
> + for (int j = 0; j < size; j++)
> + {
> + dst[j] = (short)((src[j] + round) >> shift);
> + }
> + src += size;
> + dst += stride;
> }
> }
>
> diff -r f6d04c660b9b -r 4dbd17ef69db source/common/primitives.h
> --- a/source/common/primitives.h Thu Oct 17 20:34:48 2013 +0530
> +++ b/source/common/primitives.h Thu Oct 17 21:00:22 2013 +0530
> @@ -179,7 +179,7 @@
>
> typedef void (*cvt16to32_shl_t)(int *dst, short *src, intptr_t, int, int);
> typedef void (*cvt16to16_shl_t)(short *dst, short *src, int, int,
> intptr_t, int);
> -typedef void (*cvt32to16_shr_t)(short *dst, int *src, int, int);
> +typedef void (*cvt32to16_shr_t)(short *dst, int *src, intptr_t, int, int);
>
> typedef void (*dct_t)(short *src, int *dst, intptr_t stride);
> typedef void (*idct_t)(int *src, short *dst, intptr_t stride);
> diff -r f6d04c660b9b -r 4dbd17ef69db source/common/vec/pixel-sse3.cpp
> --- a/source/common/vec/pixel-sse3.cpp Thu Oct 17 20:34:48 2013 +0530
> +++ b/source/common/vec/pixel-sse3.cpp Thu Oct 17 21:00:22 2013 +0530
> @@ -31,23 +31,25 @@
> using namespace x265;
>
> namespace {
> -void convert32to16_shr(short *dst, int *org, int shift, int num)
> +void convert32to16_shr(short *dst, int *org, intptr_t stride, int shift,
> int size)
> {
> - int i;
> + int i, j;
> __m128i round = _mm_set1_epi32(1 << (shift - 1));
>
> - for (i = 0; i < num; i += 4)
> + for (i = 0; i < size; i++)
> {
> - __m128i im32;
> - __m128i im16;
> -
> - im32 = _mm_loadu_si128((__m128i const*)org);
> - im32 = _mm_sra_epi32(_mm_add_epi32(im32, round),
> _mm_cvtsi32_si128(shift));
> - im16 = _mm_packs_epi32(im32, im32);
> - _mm_storeu_si128((__m128i*)dst, im16);
> -
> - org += 4;
> - dst += 4;
> + for (j = 0; j < size; j += 4)
> + {
> + __m128i im32;
> + __m128i im16;
> +
> + im32 = _mm_loadu_si128((__m128i const*)(org + j));
> + im32 = _mm_sra_epi32(_mm_add_epi32(im32, round),
> _mm_cvtsi32_si128(shift));
> + im16 = _mm_packs_epi32(im32, im32);
> + _mm_storel_epi64((__m128i*)(dst + j), im16);
> + }
> + org += size;
> + dst += stride;
> }
> }
>
> @@ -639,6 +641,7 @@
> //p.cvt32to16_shr = convert32to16_shr;
> p.cvt16to32_shl = convert16to32_shl;
> p.cvt16to16_shl = convert16to16_shl;
> + p.cvt32to16_shr = convert32to16_shr;
>
> #if !HIGH_BIT_DEPTH
> p.transpose[0] = transpose4;
> diff -r f6d04c660b9b -r 4dbd17ef69db source/test/pixelharness.cpp
> --- a/source/test/pixelharness.cpp Thu Oct 17 20:34:48 2013 +0530
> +++ b/source/test/pixelharness.cpp Thu Oct 17 21:00:22 2013 +0530
> @@ -494,6 +494,39 @@
> return true;
> }
>
> +bool PixelHarness::check_cvt32to16_shr_t(cvt32to16_shr_t ref,
> cvt32to16_shr_t opt)
> +{
> + int bufsize = STRIDE * STRIDE;
> + int* src = (int*)X265_MALLOC(int, bufsize);
>
Does this really need a new buffer? Can you just use the test buffers
already allocated? If not, then all this needs to be in the pixel harness
constructor
> +
> + int shift = (rand() % 7 + 1);
> +
> + if (!src)
> + {
> + fprintf(stderr, "malloc failed, unable to initiate tests!\n");
> + exit(1);
> + }
> +
> + for (int i = 0; i < bufsize; i++)
> + {
> + src[i] = (rand() & (2 * SHORT_MAX + 1)) - SHORT_MAX - 1;
> + }
> +
> + ALIGN_VAR_16(short, ref_dest[64 * 64]);
> + ALIGN_VAR_16(short, opt_dest[64 * 64]);
> +
> + memset(ref_dest, 0, 64 * 64 * sizeof(short));
> + memset(opt_dest, 0, 64 * 64 * sizeof(short));
> +
> + opt(opt_dest, src, STRIDE, shift, STRIDE);
> + ref(ref_dest, src, STRIDE, shift, STRIDE);
> +
> + if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(short)))
> + return false;
>
white-space
> +
> + return true;
> +}
> +
> bool PixelHarness::check_pixelavg_pp(pixelavg_pp_t ref, pixelavg_pp_t opt)
> {
> ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
> @@ -665,7 +698,14 @@
> }
> }
> }
> -
> + if(opt.cvt32to16_shr)
>
white-space
> + {
> + if (!check_cvt32to16_shr_t(ref.cvt32to16_shr, opt.cvt32to16_shr))
> + {
> + printf("cvt32to16 failed!\n");
> + return false;
> + }
> + }
>
You also need to add this primitive to the function which compares
performance between optimized and C ref
> if (opt.blockcpy_pp)
> {
> if (!check_block_copy(ref.blockcpy_pp, opt.blockcpy_pp))
> diff -r f6d04c660b9b -r 4dbd17ef69db source/test/pixelharness.h
> --- a/source/test/pixelharness.h Thu Oct 17 20:34:48 2013 +0530
> +++ b/source/test/pixelharness.h Thu Oct 17 21:00:22 2013 +0530
> @@ -53,6 +53,7 @@
> bool check_pixeladd_pp(pixeladd_pp_t ref, pixeladd_pp_t opt);
> bool check_downscale_t(downscale_t ref, downscale_t opt);
> bool check_pixelavg_pp(pixelavg_pp_t ref, pixelavg_pp_t opt);
> + bool check_cvt32to16_shr_t(cvt32to16_shr_t ref, cvt32to16_shr_t opt);
>
> public:
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
--
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131017/19e026c3/attachment-0001.html>
More information about the x265-devel
mailing list