[x265] [PATCH] added cvt32to16_shr_sse2 function to testbench
Steve Borho
steve at borho.org
Fri Oct 18 11:24:42 CEST 2013
On Fri, Oct 18, 2013 at 3:49 AM, <dnyaneshwar at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Dnyaneshwar Gorade <dnyaneshwar at multicorewareinc.com>
> # Date 1382086085 -19800
> # Fri Oct 18 14:18:05 2013 +0530
> # Node ID 6d9bd6b6209e45cb49da804b23ad78424914b323
> # Parent d6d7187c5f4ea0978ebbddc1a559cea3712bf345
> added cvt32to16_shr_sse2 function to testbench.
> Speed up measured is almost 14x.
>
pushed with minor improvements, please review
> diff -r d6d7187c5f4e -r 6d9bd6b6209e source/test/pixelharness.cpp
> --- a/source/test/pixelharness.cpp Fri Oct 18 00:42:36 2013 -0500
> +++ b/source/test/pixelharness.cpp Fri Oct 18 14:18:05 2013 +0530
> @@ -45,10 +45,12 @@
> pbuf3 = (pixel*)X265_MALLOC(pixel, bufsize);
> pbuf4 = (pixel*)X265_MALLOC(pixel, bufsize);
>
> + ibuf1 = (int*)X265_MALLOC(int, bufsize);
> +
> sbuf1 = (short*)X265_MALLOC(short, bufsize);
> sbuf2 = (short*)X265_MALLOC(short, bufsize);
>
> - if (!pbuf1 || !pbuf2 || !pbuf3 || !pbuf4 || !sbuf1 || !sbuf2)
> + if (!pbuf1 || !pbuf2 || !pbuf3 || !pbuf4 || !sbuf1 || !sbuf2 ||
> !ibuf1)
> {
> fprintf(stderr, "malloc failed, unable to initiate tests!\n");
> exit(1);
> @@ -63,6 +65,8 @@
>
> sbuf1[i] = (rand() & (2 * SHORT_MAX + 1)) - SHORT_MAX - 1;
> //max(SHORT_MIN, min(rand(), SHORT_MAX));
> sbuf2[i] = (rand() & (2 * SHORT_MAX + 1)) - SHORT_MAX - 1;
> //max(SHORT_MIN, min(rand(), SHORT_MAX));
> +
> + ibuf1[i] = (rand() & (2 * SHORT_MAX + 1)) - SHORT_MAX - 1;
> }
> }
>
> @@ -481,6 +485,22 @@
> return true;
> }
>
> +bool PixelHarness::check_cvt32to16_shr_t(cvt32to16_shr_t ref,
> cvt32to16_shr_t opt)
> +{
> + int shift = (rand() % 7 + 1);
> +
> + ALIGN_VAR_16(short, ref_dest[64 * 64]);
> + ALIGN_VAR_16(short, opt_dest[64 * 64]);
> +
> + opt(opt_dest, ibuf1, STRIDE, shift, STRIDE);
> + ref(ref_dest, ibuf1, STRIDE, shift, STRIDE);
> +
> + if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(short)))
> + return false;
> +
> + return true;
> +}
> +
> bool PixelHarness::testPartition(int part, const EncoderPrimitives& ref,
> const EncoderPrimitives& opt)
> {
> if (opt.satd[part])
> @@ -615,6 +635,15 @@
> }
> }
>
> + if (opt.cvt32to16_shr)
> + {
> + if (!check_cvt32to16_shr_t(ref.cvt32to16_shr, opt.cvt32to16_shr))
> + {
> + printf("cvt32to16 failed!\n");
> + return false;
> + }
> + }
> +
> if (opt.blockcpy_pp)
> {
> if (!check_block_copy(ref.blockcpy_pp, opt.blockcpy_pp))
> @@ -810,6 +839,12 @@
> }
> }
>
> + if (opt.cvt32to16_shr)
> + {
> + printf("cvt32to16 conversion");
> + REPORT_SPEEDUP(opt.cvt32to16_shr, ref.cvt32to16_shr, sbuf1,
> ibuf1, 64, 5, 64);
> + }
> +
> if (opt.blockcpy_pp)
> {
> printf("block cpy");
> diff -r d6d7187c5f4e -r 6d9bd6b6209e source/test/pixelharness.h
> --- a/source/test/pixelharness.h Fri Oct 18 00:42:36 2013 -0500
> +++ b/source/test/pixelharness.h Fri Oct 18 14:18:05 2013 +0530
> @@ -33,6 +33,8 @@
>
> pixel *pbuf1, *pbuf2, *pbuf3, *pbuf4;
>
> + int *ibuf1;
> +
> short *sbuf1, *sbuf2;
>
> bool check_pixelcmp(pixelcmp_t ref, pixelcmp_t opt);
> @@ -52,6 +54,7 @@
> bool check_pixeladd_ss(pixeladd_ss_t ref, pixeladd_ss_t opt);
> bool check_pixeladd_pp(pixeladd_pp_t ref, pixeladd_pp_t opt);
> bool check_downscale_t(downscale_t ref, downscale_t opt);
> + bool check_cvt32to16_shr_t(cvt32to16_shr_t ref, cvt32to16_shr_t opt);
>
> public:
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
--
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131018/4e97ba31/attachment.html>
More information about the x265-devel
mailing list