[x265] [PATCH] primitives: added C primitives for upShift/downShift input pixels

Steve Borho steve at borho.org
Thu Mar 13 21:22:34 CET 2014


On Thu, Mar 13, 2014 at 5:45 AM,  <murugan at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Murugan Vairavel <murugan at multicorewareinc.com>
> # Date 1394693311 -19800
> #      Thu Mar 13 12:18:31 2014 +0530
> # Node ID 481bca8b54d000d1d5fd2bcff242e5d97b7551e7
> # Parent  5328eec595543c1294cb34b133b4e36f14e2bb79
> primitives: added C primitives for upShift/downShift input pixels

I find it hard to believe that doing three passes over the picture is
a performance improvement.

> diff -r 5328eec59554 -r 481bca8b54d0 source/Lib/TLibCommon/TComPicYuv.cpp
> --- a/source/Lib/TLibCommon/TComPicYuv.cpp      Wed Mar 12 16:01:25 2014 -0500
> +++ b/source/Lib/TLibCommon/TComPicYuv.cpp      Thu Mar 13 12:18:31 2014 +0530
> @@ -169,11 +169,11 @@
>      int height = m_picHeight - pady;
>
>      /* internal pad to multiple of 16x16 blocks */
> -    uint8_t rem = width & 15;
> +    uint8_t rem = height & 15;
> +    pady = rem ? 16 - rem : pady;
>
> +    rem = width & 15;
>      padx = rem ? 16 - rem : padx;
> -    rem = height & 15;
> -    pady = rem ? 16 - rem : pady;
>
>      /* add one more row and col of pad for downscale interpolation, fixes
>       * warnings from valgrind about using uninitialized pixels */
> @@ -193,29 +193,44 @@
>          uint8_t *uChar = (uint8_t*)pic.planes[1];
>          uint8_t *vChar = (uint8_t*)pic.planes[2];
>
> -        for (int r = 0; r < height; r++)
> +        int lumaWidth = width - rem;
> +        int chromaWidth = width >> m_hChromaShift;
> +        uint8_t chromaRem = chromaWidth & 15;
> +        chromaWidth = chromaWidth - chromaRem;
> +
> +        primitives.upShift(yChar, pic.stride[0] / sizeof(*yChar), yPixel, getStride(), lumaWidth, height);
> +        primitives.upShift(uChar, pic.stride[1] / sizeof(*uChar), uPixel, getCStride(), chromaWidth, height >> m_vChromaShift);
> +        primitives.upShift(vChar, pic.stride[2] / sizeof(*vChar), vPixel, getCStride(), chromaWidth, height >> m_vChromaShift);
> +
> +        if (rem)
>          {
> -            for (int c = 0; c < width; c++)
> +            for (int r = 0; r < height; r++)
>              {
> -                yPixel[c] = ((pixel)yChar[c]) << 2;
> +                for (int c = lumaWidth; c < width; c++)
> +                {
> +                    yPixel[c] = ((pixel)yChar[c]) << 2;
> +                }
> +
> +                yPixel += getStride();
> +                yChar += pic.stride[0] / sizeof(*yChar);
>              }
> -
> -            yPixel += getStride();
> -            yChar += pic.stride[0] / sizeof(*yChar);
>          }
>
> -        for (int r = 0; r < height >> m_vChromaShift; r++)
> +        if (chromaRem)
>          {
> -            for (int c = 0; c < width >> m_hChromaShift; c++)
> +            for (int r = 0; r < height >> m_vChromaShift; r++)
>              {
> -                uPixel[c] = ((pixel)uChar[c]) << 2;
> -                vPixel[c] = ((pixel)vChar[c]) << 2;
> +                for (int c = chromaWidth; c < width >> m_hChromaShift; c++)
> +                {
> +                    uPixel[c] = ((pixel)uChar[c]) << 2;
> +                    vPixel[c] = ((pixel)vChar[c]) << 2;
> +                }
> +
> +                uPixel += getCStride();
> +                vPixel += getCStride();
> +                uChar += pic.stride[1] / sizeof(*uChar);
> +                vChar += pic.stride[2] / sizeof(*vChar);
>              }
> -
> -            uPixel += getCStride();
> -            vPixel += getCStride();
> -            uChar += pic.stride[1] / sizeof(*uChar);
> -            vChar += pic.stride[2] / sizeof(*vChar);
>          }
>      }
>      else if (pic.bitDepth == 8)
> @@ -266,31 +281,48 @@
>          /* defensive programming, mask off bits that are supposed to be zero */
>          uint16_t mask = (1 << X265_DEPTH) - 1;
>          int shift = X265_MAX(0, pic.bitDepth - X265_DEPTH);
> +        int lumaWidth = width - rem;
> +        int chromaWidth = width >> m_hChromaShift;
> +        uint8_t chromaRem = chromaWidth & 15;
> +        chromaWidth = chromaWidth - chromaRem;
>
>          /* shift and mask pixels to final size */
> -        for (int r = 0; r < height; r++)
> +
> +        primitives.downShift(yShort, pic.stride[0] / sizeof(*yShort), yPixel, getStride(), lumaWidth, height, shift, mask);
> +        primitives.downShift(uShort, pic.stride[1] / sizeof(*uShort), uPixel, getCStride(), chromaWidth, height >> m_vChromaShift, shift, mask);
> +        primitives.downShift(vShort, pic.stride[2] / sizeof(*vShort), vPixel, getCStride(), chromaWidth, height >> m_vChromaShift, shift, mask);
> +
> +        /*Handles remining part of luma component if the width not multiple of 16*/
> +        if (rem)
>          {
> -            for (int c = 0; c < width; c++)
> +            for (int r = 0; r < height; r++)
>              {
> -                yPixel[c] = (pixel)((yShort[c] >> shift) & mask);
> +                for (int c = lumaWidth; c < width; c++)
> +                {
> +                    yPixel[c] = (pixel)((yShort[c] >> shift) & mask);
> +                }
> +
> +                yPixel += getStride();
> +                yShort += pic.stride[0] / sizeof(*yShort);
>              }
> -
> -            yPixel += getStride();
> -            yShort += pic.stride[0] / sizeof(*yShort);
>          }
>
> -        for (int r = 0; r < height >> m_vChromaShift; r++)
> +        /*Handles remining part of chroma component if the width not multiple of 16*/
> +        if (chromaRem)
>          {
> -            for (int c = 0; c < width >> m_hChromaShift; c++)
> +            for (int r = 0; r < height >> m_vChromaShift; r++)
>              {
> -                uPixel[c] = (pixel)((uShort[c] >> shift) & mask);
> -                vPixel[c] = (pixel)((vShort[c] >> shift) & mask);
> +                for (int c = chromaWidth; c < width >> m_hChromaShift; c++)
> +                {
> +                    uPixel[c] = (pixel)((uShort[c] >> shift) & mask);
> +                    vPixel[c] = (pixel)((vShort[c] >> shift) & mask);
> +                }
> +
> +                uPixel += getCStride();
> +                vPixel += getCStride();
> +                uShort += pic.stride[1] / sizeof(*uShort);
> +                vShort += pic.stride[2] / sizeof(*vShort);
>              }
> -
> -            uPixel += getCStride();
> -            vPixel += getCStride();
> -            uShort += pic.stride[1] / sizeof(*uShort);
> -            vShort += pic.stride[2] / sizeof(*vShort);
>          }
>      }
>
> diff -r 5328eec59554 -r 481bca8b54d0 source/common/pixel.cpp
> --- a/source/common/pixel.cpp   Wed Mar 12 16:01:25 2014 -0500
> +++ b/source/common/pixel.cpp   Thu Mar 13 12:18:31 2014 +0530
> @@ -852,6 +852,34 @@
>          dst  += dstStride;
>      }
>  }
> +
> +void upShift(uint8_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int width, int height)
> +{
> +    for (int r = 0; r < height; r++)
> +    {
> +        for (int c = 0; c < width; c++)
> +        {
> +            dst[c] = ((pixel)src[c]) << 2;
> +        }
> +
> +        dst += dstStride;
> +        src += srcStride;
> +    }
> +}
> +
> +void downShift(uint16_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask)
> +{
> +    for (int r = 0; r < height; r++)
> +    {
> +        for (int c = 0; c < width; c++)
> +        {
> +            dst[c] = (pixel)((src[c] >> shift) & mask);
> +        }
> +
> +        dst += dstStride;
> +        src += srcStride;
> +    }
> +}
>  }  // end anonymous namespace
>
>  namespace x265 {
> @@ -1099,5 +1127,7 @@
>      p.var[BLOCK_32x32] = pixel_var<32>;
>      p.var[BLOCK_64x64] = pixel_var<64>;
>      p.plane_copy_deinterleave_c = plane_copy_deinterleave_chroma;
> +    p.upShift = upShift;
> +    p.downShift = downShift;
>  }
>  }
> diff -r 5328eec59554 -r 481bca8b54d0 source/common/primitives.h
> --- a/source/common/primitives.h        Wed Mar 12 16:01:25 2014 -0500
> +++ b/source/common/primitives.h        Thu Mar 13 12:18:31 2014 +0530
> @@ -163,6 +163,8 @@
>  typedef void (*addAvg_t)(int16_t* src0, int16_t* src1, pixel* dst, intptr_t src0Stride, intptr_t src1Stride, intptr_t dstStride);
>
>  typedef void (*saoCuOrgE0_t)(pixel * rec, int8_t * offsetEo, int lcuWidth, int8_t signLeft);
> +typedef void (*planecopy_cp) (uint8_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int width, int height);
> +typedef void (*planecopy_sp) (uint16_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask);
>
>  /* Define a structure containing function pointers to optimized encoder
>   * primitives.  Each pointer can reference either an assembly routine,
> @@ -233,6 +235,8 @@
>      extendCURowBorder_t extendRowBorder;
>      // sao primitives
>      saoCuOrgE0_t      saoCuOrgE0;
> +    planecopy_cp         upShift;
> +    planecopy_sp      downShift;
>
>      struct
>      {
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel



-- 
Steve Borho


More information about the x265-devel mailing list