[x265] [PATCH 2 of 4] Templating weightUnidir primitive to support pixel inputs
Steve Borho
steve at borho.org
Tue Oct 1 20:01:02 CEST 2013
On Tue, Oct 1, 2013 at 2:51 AM, <shazeb at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Shazeb Nawaz Khan <shazeb at multicorewareinc.com>
> # Date 1380607560 -19800
> # Tue Oct 01 11:36:00 2013 +0530
> # Node ID 07d712e6265cb6f052a55fe7a1448d48b5339acc
> # Parent b089a7ff0d73efa6b9da3ee98ba6c8191ff3652b
> Templating weightUnidir primitive to support pixel inputs
>
> To be used for full-pel planes
>
> diff -r b089a7ff0d73 -r 07d712e6265c source/common/pixel.cpp
> --- a/source/common/pixel.cpp Tue Oct 01 11:20:02 2013 +0530
> +++ b/source/common/pixel.cpp Tue Oct 01 11:36:00 2013 +0530
> @@ -514,17 +514,19 @@
> }
> }
>
> -void weightUnidir(short *src, pixel *dst, intptr_t srcStride, intptr_t
> dstStride, int width, int height, int w0, int round, int shift, int offset)
> +template <typename T>
> +void weightUnidir(void *srcAbstract, pixel *dst, intptr_t srcStride,
> intptr_t dstStride, int width, int height, int w0, int round, int shift,
> int offset)
> {
> + T *src = static_cast<T *>(srcAbstract);
> int x, y;
> for (y = height - 1; y >= 0; y--)
> {
> for (x = width - 1; x >= 0; )
> {
> // note: luma min width is 4
> - dst[x] = (pixel) Clip3(0, ((1 << X265_DEPTH) - 1), ((w0 *
> (src[x] + IF_INTERNAL_OFFS) + round) >> shift) + offset);
> + dst[x] = (pixel) Clip3(0, ((1 << X265_DEPTH) - 1), ((w0 *
> (uint16_t) (src[x] + IF_INTERNAL_OFFS) + round) >> shift) + offset);
>
FYI: this routine was previously relying on the C behavior of upcasting
integer operands. When you multiply an int with a char, the char is
up-casted to an int implicitly prior to the multiplcation.
> x--;
> - dst[x] = (pixel) Clip3(0, ((1 << X265_DEPTH) - 1), ((w0 *
> (src[x] + IF_INTERNAL_OFFS) + round) >> shift) + offset);
> + dst[x] = (pixel) Clip3(0, ((1 << X265_DEPTH) - 1), ((w0 *
> (uint16_t) (src[x] + IF_INTERNAL_OFFS) + round) >> shift) + offset);
> x--;
> }
>
> @@ -842,7 +844,8 @@
> p.transpose[3] = transpose<32>;
> p.transpose[4] = transpose<64>;
>
> - p.weightpUni = weightUnidir;
> + p.weightpUniPixel = weightUnidir<pixel>;
> + p.weightpUni = weightUnidir<short>;
>
don't use short, use uint16_t
>
> p.pixelsub_sp = pixelsub_sp_c;
> p.pixeladd_pp = pixeladd_pp_c;
> diff -r b089a7ff0d73 -r 07d712e6265c source/common/primitives.h
> --- a/source/common/primitives.h Tue Oct 01 11:20:02 2013 +0530
> +++ b/source/common/primitives.h Tue Oct 01 11:36:00 2013 +0530
> @@ -228,8 +228,7 @@
> typedef void (*filterRowV_N_t)(short *midA, intptr_t midStride, pixel
> *dstA, pixel *dstB, pixel *dstC, intptr_t dstStride, int width, int height,
> int marginX, int marginY, int row, int isLastRow);
> typedef void (*extendCURowBorder_t)(pixel* txt, intptr_t stride, int
> width, int height, int marginX);
>
> -
> -typedef void (*weightpUni_t)(short *src, pixel *dst, intptr_t srcStride,
> intptr_t dstStride, int width, int height, int w0, int round, int shift,
> int offset);
>
Don't use void here. There should be two different function defines, one
for pixel inputs and one for uint16_t inputs.
> +typedef void (*weightpUni_t)(void *src, pixel *dst, intptr_t srcStride,
> intptr_t dstStride, int width, int height, int w0, int round, int shift,
> int offset);
> typedef void (*scale_t)(pixel *dst, pixel *src, intptr_t stride);
> typedef void (*downscale_t)(pixel *src0, pixel *dstf, pixel *dsth, pixel
> *dstv, pixel *dstc,
> intptr_t src_stride, intptr_t dst_stride, int
> width, int height);
> @@ -286,6 +285,7 @@
> calcrecon_t calcrecon[NUM_SQUARE_BLOCKS];
> transpose_t transpose[NUM_SQUARE_BLOCKS];
>
> + weightpUni_t weightpUniPixel;
> weightpUni_t weightpUni;
> pixelsub_sp_t pixelsub_sp;
> pixeladd_ss_t pixeladd_ss;
> diff -r b089a7ff0d73 -r 07d712e6265c source/common/vec/pixel.inc
> --- a/source/common/vec/pixel.inc Tue Oct 01 11:20:02 2013 +0530
> +++ b/source/common/vec/pixel.inc Tue Oct 01 11:36:00 2013 +0530
> @@ -469,7 +469,9 @@
> p.transpose[2] = transpose16;
> p.transpose[3] = transpose32;
> p.transpose[4] = transpose<64>;
> - p.weightpUni = weightUnidir;
> + p.weightpUniPixel = weightUnidir<pixel>;
> + p.weightpUni = weightUnidir<short>;
> +
> #endif
>
> #if !HIGH_BIT_DEPTH
> diff -r b089a7ff0d73 -r 07d712e6265c source/common/vec/pixel8.inc
> --- a/source/common/vec/pixel8.inc Tue Oct 01 11:20:02 2013 +0530
> +++ b/source/common/vec/pixel8.inc Tue Oct 01 11:36:00 2013 +0530
> @@ -8573,8 +8573,10 @@
> }
> }
>
> -void weightUnidir(short *src, pixel *dst, intptr_t srcStride, intptr_t
> dstStride, int width, int height, int w0, int round, int shift, int offset)
> +template <typename T>
> +void weightUnidir(void *srcAbstract, pixel *dst, intptr_t srcStride,
> intptr_t dstStride, int width, int height, int w0, int round, int shift,
> int offset)
> {
> + T* src = static_cast<T *> (srcAbstract);
> int x, y;
> Vec8s tmp;
>
I'm surprised this could actually work; usually we need different functions
when the source type changes because you you entirely different load
instructions.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131001/2135bb28/attachment.html>
More information about the x265-devel
mailing list