[x265] [PATCH 2 of 4] Templating weightUnidir primitive to support pixel inputs

Tue Oct 1 20:01:02 CEST 2013

On Tue, Oct 1, 2013 at 2:51 AM, <shazeb at multicorewareinc.com> wrote:

> # HG changeset patch
> # User Shazeb Nawaz Khan <shazeb at multicorewareinc.com>
> # Date 1380607560 -19800
> #      Tue Oct 01 11:36:00 2013 +0530
> # Node ID 07d712e6265cb6f052a55fe7a1448d48b5339acc
> # Parent  b089a7ff0d73efa6b9da3ee98ba6c8191ff3652b
> Templating weightUnidir primitive to support pixel inputs
>
> To be used for full-pel planes
>
> diff -r b089a7ff0d73 -r 07d712e6265c source/common/pixel.cpp
> --- a/source/common/pixel.cpp   Tue Oct 01 11:20:02 2013 +0530
> +++ b/source/common/pixel.cpp   Tue Oct 01 11:36:00 2013 +0530
> @@ -514,17 +514,19 @@
>      }
>  }
>
> -void weightUnidir(short *src, pixel *dst, intptr_t srcStride, intptr_t
> dstStride, int width, int height, int w0, int round, int shift, int offset)
> +template <typename T>
> +void weightUnidir(void *srcAbstract, pixel *dst, intptr_t srcStride,
> intptr_t dstStride, int width, int height, int w0, int round, int shift,
> int offset)
>  {
> +    T *src = static_cast<T *>(srcAbstract);
>      int x, y;
>      for (y = height - 1; y >= 0; y--)
>      {
>          for (x = width - 1; x >= 0; )
>          {
>              // note: luma min width is 4
> -            dst[x] = (pixel) Clip3(0, ((1 << X265_DEPTH) - 1), ((w0 *
> (src[x] + IF_INTERNAL_OFFS) + round) >> shift) + offset);
> +            dst[x] = (pixel) Clip3(0, ((1 << X265_DEPTH) - 1), ((w0 *
> (uint16_t) (src[x] + IF_INTERNAL_OFFS) + round) >> shift) + offset);
>

FYI: this routine was previously relying on the C behavior of upcasting
integer operands.  When you multiply an int with a char, the char is
up-casted to an int implicitly prior to the multiplcation.

>              x--;
> -            dst[x] = (pixel) Clip3(0, ((1 << X265_DEPTH) - 1), ((w0 *
> (src[x] + IF_INTERNAL_OFFS) + round) >> shift) + offset);
> +            dst[x] = (pixel) Clip3(0, ((1 << X265_DEPTH) - 1), ((w0 *
> (uint16_t) (src[x] + IF_INTERNAL_OFFS) + round) >> shift) + offset);
>              x--;
>          }
>
> @@ -842,7 +844,8 @@
>      p.transpose[3] = transpose<32>;
>      p.transpose[4] = transpose<64>;
>
> -    p.weightpUni = weightUnidir;
> +    p.weightpUniPixel = weightUnidir<pixel>;
> +    p.weightpUni = weightUnidir<short>;
>

don't use short, use uint16_t

>
>      p.pixelsub_sp = pixelsub_sp_c;
>      p.pixeladd_pp = pixeladd_pp_c;
> diff -r b089a7ff0d73 -r 07d712e6265c source/common/primitives.h
> --- a/source/common/primitives.h        Tue Oct 01 11:20:02 2013 +0530
> +++ b/source/common/primitives.h        Tue Oct 01 11:36:00 2013 +0530
> @@ -228,8 +228,7 @@
>  typedef void (*filterRowV_N_t)(short *midA, intptr_t midStride, pixel
> *dstA, pixel *dstB, pixel *dstC, intptr_t dstStride, int width, int height,
> int marginX, int marginY, int row, int isLastRow);
>  typedef void (*extendCURowBorder_t)(pixel* txt, intptr_t stride, int
> width, int height, int marginX);
>
> -
> -typedef void (*weightpUni_t)(short *src, pixel *dst, intptr_t srcStride,
> intptr_t dstStride, int width, int height, int w0, int round, int shift,
> int offset);
>

Don't use void here.  There should be two different function defines, one
for pixel inputs and one for uint16_t inputs.

> +typedef void (*weightpUni_t)(void *src, pixel *dst, intptr_t srcStride,
> intptr_t dstStride, int width, int height, int w0, int round, int shift,
> int offset);
>  typedef void (*scale_t)(pixel *dst, pixel *src, intptr_t stride);
>  typedef void (*downscale_t)(pixel *src0, pixel *dstf, pixel *dsth, pixel
> *dstv, pixel *dstc,
>                              intptr_t src_stride, intptr_t dst_stride, int
> width, int height);
> @@ -286,6 +285,7 @@
>      calcrecon_t     calcrecon[NUM_SQUARE_BLOCKS];
>      transpose_t     transpose[NUM_SQUARE_BLOCKS];
>
> +    weightpUni_t    weightpUniPixel;
>      weightpUni_t    weightpUni;
>      pixelsub_sp_t   pixelsub_sp;
>      pixeladd_ss_t   pixeladd_ss;
> diff -r b089a7ff0d73 -r 07d712e6265c source/common/vec/pixel.inc
> --- a/source/common/vec/pixel.inc       Tue Oct 01 11:20:02 2013 +0530
> +++ b/source/common/vec/pixel.inc       Tue Oct 01 11:36:00 2013 +0530
> @@ -469,7 +469,9 @@
>      p.transpose[2] = transpose16;
>      p.transpose[3] = transpose32;
>      p.transpose[4] = transpose<64>;
> -    p.weightpUni = weightUnidir;
> +    p.weightpUniPixel = weightUnidir<pixel>;
> +    p.weightpUni = weightUnidir<short>;
> +
>  #endif
>
>  #if !HIGH_BIT_DEPTH
> diff -r b089a7ff0d73 -r 07d712e6265c source/common/vec/pixel8.inc
> --- a/source/common/vec/pixel8.inc      Tue Oct 01 11:20:02 2013 +0530
> +++ b/source/common/vec/pixel8.inc      Tue Oct 01 11:36:00 2013 +0530
> @@ -8573,8 +8573,10 @@
>      }
>  }
>
> -void weightUnidir(short *src, pixel *dst, intptr_t srcStride, intptr_t
> dstStride, int width, int height, int w0, int round, int shift, int offset)
> +template <typename T>
> +void weightUnidir(void *srcAbstract, pixel *dst, intptr_t srcStride,
> intptr_t dstStride, int width, int height, int w0, int round, int shift,
> int offset)
>  {
> +    T* src = static_cast<T *> (srcAbstract);
>      int x, y;
>      Vec8s tmp;
>

I'm surprised this could actually work; usually we need different functions
when the source type changes because you you entirely different load
instructions.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131001/2135bb28/attachment.html>