[x265] [PATCH 2 of 4] Templating weightUnidir primitive to support pixel inputs
Steve Borho
steve at borho.org
Wed Oct 2 23:10:14 CEST 2013
On Wed, Oct 2, 2013 at 6:47 AM, Shazeb Khan <shazeb at multicorewareinc.com>wrote:
> # HG changeset patch
> # User Shazeb Nawaz Khan <shazeb at multicorewareinc.com>
> # Date 1380713440 -19800
> # Node ID 09de1a4441d6b00c36c79c6ed90e296cbe144004
> # Parent c5dc3e37d76772b5e26c6996ab0df5ce325f54a7
>
> Templating weightUnidir primitive to support pixel inputs
>
queued, but it required a lot of fixups because the patch was line-wrapped
like crazy by your MTA
it's best to use hg email or the thg email tool to send patches.
> diff -r c5dc3e37d767 -r 09de1a4441d6
> source/Lib/TLibCommon/TComWeightPrediction.cpp
> --- a/source/Lib/TLibCommon/TComWeightPrediction.cpp Tue Oct 01
> 17:28:19 2013 +0530
> +++ b/source/Lib/TLibCommon/TComWeightPrediction.cpp Wed Oct 02
> 17:00:40 2013 +0530
> @@ -431,7 +431,7 @@
> srcStride = srcYuv0->m_width;
> dstStride = outDstYuv->getStride();
>
> - primitives.weightpUni(srcY0, dstY, srcStride, dstStride, width,
> height, w0, round, shift, offset);
> + primitives.weightpUni((uint16_t *)srcY0, dstY, srcStride,
> dstStride, width, height, w0, round, shift, offset);
> }
>
> // Chroma U : --------------------------------------------
> @@ -447,7 +447,7 @@
> width >>= 1;
> height >>= 1;
>
> - primitives.weightpUni(srcU0, dstU, srcStride, dstStride, width,
> height, w0, round, shift, offset);
> + primitives.weightpUni((uint16_t *)srcU0, dstU, srcStride, dstStride,
> width, height, w0, round, shift, offset);
>
> // Chroma V : --------------------------------------------
> w0 = wp0[2].w;
> @@ -455,7 +455,7 @@
> shift = wp0[2].shift + shiftNum;
> round = shift ? (1 << (shift - 1)) : 0;
>
> - primitives.weightpUni(srcV0, dstV, srcStride, dstStride, width,
> height, w0, round, shift, offset);
> + primitives.weightpUni((uint16_t *)srcV0, dstV, srcStride, dstStride,
> width, height, w0, round, shift, offset);
> }
>
> //=======================================================
> diff -r c5dc3e37d767 -r 09de1a4441d6 source/common/pixel.cpp
> --- a/source/common/pixel.cpp Tue Oct 01 17:28:19 2013 +0530
> +++ b/source/common/pixel.cpp Wed Oct 02 17:00:40 2013 +0530
> @@ -514,7 +514,8 @@
>
> }
> }
>
> -void weightUnidir(short *src, pixel *dst, intptr_t srcStride, intptr_t
> dstStride, int width, int height, int w0, int round, int shift, int offset)
> +template <typename T>
> +void weightUnidir(T *src, pixel *dst, intptr_t srcStride, intptr_t
> dstStride, int width, int height, int w0, int round, int shift, int offset)
>
> {
> int x, y;
> for (y = height - 1; y >= 0; y--)
> @@ -842,7 +843,8 @@
>
> p.transpose[3] = transpose<32>;
> p.transpose[4] = transpose<64>;
>
> - p.weightpUni = weightUnidir;
> + p.weightpUniPixel = weightUnidir<pixel>;
> + p.weightpUni = weightUnidir<uint16_t>;
>
>
> p.pixelsub_sp = pixelsub_sp_c;
> p.pixeladd_pp = pixeladd_pp_c;
> diff -r c5dc3e37d767 -r 09de1a4441d6 source/common/primitives.h
> --- a/source/common/primitives.h Tue Oct 01 17:28:19 2013 +0530
> +++ b/source/common/primitives.h Wed Oct 02 17:00:40 2013 +0530
> @@ -228,8 +228,8 @@
>
> typedef void (*filterRowV_N_t)(short *midA, intptr_t midStride, pixel
> *dstA, pixel *dstB, pixel *dstC, intptr_t dstStride, int width, int height,
> int marginX, int marginY, int row, int isLastRow);
> typedef void (*extendCURowBorder_t)(pixel* txt, intptr_t stride, int
> width, int height, int marginX);
>
> -
> -typedef void (*weightpUni_t)(short *src, pixel *dst, intptr_t srcStride,
> intptr_t dstStride, int width, int height, int w0, int round, int shift,
> int offset);
> +typedef void (*weightpUniPixel_t)(pixel *src, pixel *dst, intptr_t
> srcStride, intptr_t dstStride, int width, int height, int w0, int round,
> int shift, int offset);
> +typedef void (*weightpUni_t)(uint16_t *src, pixel *dst, intptr_t
> srcStride, intptr_t dstStride, int width, int height, int w0, int round,
> int shift, int offset);
>
> typedef void (*scale_t)(pixel *dst, pixel *src, intptr_t stride);
> typedef void (*downscale_t)(pixel *src0, pixel *dstf, pixel *dsth, pixel
> *dstv, pixel *dstc,
> intptr_t src_stride, intptr_t dst_stride, int
> width, int height);
> @@ -286,6 +286,7 @@
>
> calcrecon_t calcrecon[NUM_SQUARE_BLOCKS];
> transpose_t transpose[NUM_SQUARE_BLOCKS];
>
> + weightpUniPixel_t weightpUniPixel;
>
> weightpUni_t weightpUni;
> pixelsub_sp_t pixelsub_sp;
> pixeladd_ss_t pixeladd_ss;
> diff -r c5dc3e37d767 -r 09de1a4441d6 source/common/vec/pixel.inc
> --- a/source/common/vec/pixel.inc Tue Oct 01 17:28:19 2013 +0530
> +++ b/source/common/vec/pixel.inc Wed Oct 02 17:00:40 2013 +0530
>
> @@ -469,7 +469,9 @@
> p.transpose[2] = transpose16;
> p.transpose[3] = transpose32;
> p.transpose[4] = transpose<64>;
> - p.weightpUni = weightUnidir;
> + p.weightpUniPixel = weightUnidir<pixel>;
> + p.weightpUni = weightUnidir<uint16_t>;
> +
> #endif
>
> #if !HIGH_BIT_DEPTH
> diff -r c5dc3e37d767 -r 09de1a4441d6 source/common/vec/pixel8.inc
> --- a/source/common/vec/pixel8.inc Tue Oct 01 17:28:19 2013 +0530
> +++ b/source/common/vec/pixel8.inc Wed Oct 02 17:00:40 2013 +0530
> @@ -8573,7 +8573,8 @@
>
> }
> }
>
> -void weightUnidir(short *src, pixel *dst, intptr_t srcStride, intptr_t
> dstStride, int width, int height, int w0, int round, int shift, int offset)
> +template <typename T>
> +void weightUnidir(T *src, pixel *dst, intptr_t srcStride, intptr_t
> dstStride, int width, int height, int w0, int round, int shift, int offset)
>
> {
> int x, y;
> Vec8s tmp;
> diff -r c5dc3e37d767 -r 09de1a4441d6 source/test/pixelharness.cpp
> --- a/source/test/pixelharness.cpp Tue Oct 01 17:28:19 2013 +0530
> +++ b/source/test/pixelharness.cpp Wed Oct 02 17:00:40 2013 +0530
> @@ -343,8 +343,8 @@
> int offset = (rand() % 256) - 128;
> for (int i = 0; i < ITERS; i++)
> {
> - opt(sbuf1 + j, opt_dest, 64, 64, width, height, w0, round, shift,
> offset);
> - ref(sbuf1 + j, ref_dest, 64, 64, width, height, w0, round, shift,
> offset);
> + opt((uint16_t*)sbuf1 + j, opt_dest, 64, 64, width, height, w0,
> round, shift, offset);
> + ref((uint16_t*)sbuf1 + j, ref_dest, 64, 64, width, height, w0,
> round, shift, offset);
>
> if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
> return false;
> @@ -355,6 +355,34 @@
> return true;
> }
>
> +bool PixelHarness::check_weightpUni(weightpUniPixel_t ref,
> weightpUniPixel_t opt)
> +{
> + ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
> + ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
> +
> + memset(ref_dest, 0, 64 * 64 * sizeof(pixel));
> + memset(opt_dest, 0, 64 * 64 * sizeof(pixel));
> + int j = 0;
> + int width = (2 * rand()) % 64;
> + int height = 8;
> + int w0 = rand() % 256;
> + int shift = rand() % 12;
> + int round = shift ? (1 << (shift - 1)) : 0;
> + int offset = (rand() % 256) - 128;
> + for (int i = 0; i < ITERS; i++)
> + {
> + opt((pixel *)sbuf1 + j, opt_dest, 64, 64, width, height, w0,
> round, shift, offset);
> + ref((pixel *)sbuf1 + j, ref_dest, 64, 64, width, height, w0,
> round, shift, offset);
> +
> + if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
> + return false;
> +
> + j += INCR;
> + }
> +
> + return true;
> +}
> +
> bool PixelHarness::check_pixelsub_sp(pixelsub_sp_t ref, pixelsub_sp_t opt)
> {
> ALIGN_VAR_16(short, ref_dest[64 * 64]);
> @@ -604,6 +632,24 @@
> }
> }
>
> + if (opt.weightpUniPixel)
> + {
> + if (!check_weightpUni(ref.weightpUniPixel, opt.weightpUniPixel))
> + {
> + printf("Weighted Prediction for Unidir failed!\n");
> + return false;
> + }
> + }
> +
> + if (opt.weightpUniPixel)
> + {
> + if (!check_weightpUni(ref.weightpUniPixel, opt.weightpUniPixel))
> + {
> + printf("Weighted Prediction for Unidir failed!\n");
> + return false;
> + }
> + }
> +
> if (opt.weightpUni)
> {
> if (!check_weightpUni(ref.weightpUni, opt.weightpUni))
> @@ -751,10 +797,16 @@
> REPORT_SPEEDUP(opt.blockcpy_sc, ref.blockcpy_sc, 64, 64,
> (short*)pbuf1, FENC_STRIDE, (uint8_t*)pbuf2, STRIDE);
> }
>
> + if (opt.weightpUniPixel)
> + {
> + printf("WeightpUni");
> + REPORT_SPEEDUP(opt.weightpUniPixel, ref.weightpUniPixel, pbuf1,
> pbuf2, 64, 64, 32, 32, 128, 1 << 9, 10, 100);
> + }
> +
> if (opt.weightpUni)
> {
> printf("WeightpUni");
> - REPORT_SPEEDUP(opt.weightpUni, ref.weightpUni, sbuf1, pbuf1, 64,
> 64, 32, 32, 128, 1 << 9, 10, 100);
> + REPORT_SPEEDUP(opt.weightpUni, ref.weightpUni, (uint16_t*)sbuf1,
> pbuf1, 64, 64, 32, 32, 128, 1 << 9, 10, 100);
> }
>
> if (opt.pixelsub_sp)
> diff -r c5dc3e37d767 -r 09de1a4441d6 source/test/pixelharness.h
> --- a/source/test/pixelharness.h Tue Oct 01 17:28:19 2013 +0530
> +++ b/source/test/pixelharness.h Wed Oct 02 17:00:40 2013 +0530
> @@ -46,6 +46,7 @@
> bool check_block_copy_s_c(blockcpy_sc_t ref, blockcpy_sc_t opt);
> bool check_calresidual(calcresidual_t ref, calcresidual_t opt);
> bool check_calcrecon(calcrecon_t ref, calcrecon_t opt);
> + bool check_weightpUni(weightpUniPixel_t ref, weightpUniPixel_t opt);
> bool check_weightpUni(weightpUni_t ref, weightpUni_t opt);
> bool check_pixelsub_sp(pixelsub_sp_t ref, pixelsub_sp_t opt);
> bool check_pixeladd_ss(pixeladd_ss_t ref, pixeladd_ss_t opt);
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
--
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131002/b76bde2a/attachment-0001.html>
More information about the x265-devel
mailing list