[x265] [PATCH] primitives: added C primitives for upShift/downShift input pixels
Steve Borho
steve at borho.org
Thu Mar 13 21:22:34 CET 2014
On Thu, Mar 13, 2014 at 5:45 AM, <murugan at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Murugan Vairavel <murugan at multicorewareinc.com>
> # Date 1394693311 -19800
> # Thu Mar 13 12:18:31 2014 +0530
> # Node ID 481bca8b54d000d1d5fd2bcff242e5d97b7551e7
> # Parent 5328eec595543c1294cb34b133b4e36f14e2bb79
> primitives: added C primitives for upShift/downShift input pixels
I find it hard to believe that doing three passes over the picture is
a performance improvement.
> diff -r 5328eec59554 -r 481bca8b54d0 source/Lib/TLibCommon/TComPicYuv.cpp
> --- a/source/Lib/TLibCommon/TComPicYuv.cpp Wed Mar 12 16:01:25 2014 -0500
> +++ b/source/Lib/TLibCommon/TComPicYuv.cpp Thu Mar 13 12:18:31 2014 +0530
> @@ -169,11 +169,11 @@
> int height = m_picHeight - pady;
>
> /* internal pad to multiple of 16x16 blocks */
> - uint8_t rem = width & 15;
> + uint8_t rem = height & 15;
> + pady = rem ? 16 - rem : pady;
>
> + rem = width & 15;
> padx = rem ? 16 - rem : padx;
> - rem = height & 15;
> - pady = rem ? 16 - rem : pady;
>
> /* add one more row and col of pad for downscale interpolation, fixes
> * warnings from valgrind about using uninitialized pixels */
> @@ -193,29 +193,44 @@
> uint8_t *uChar = (uint8_t*)pic.planes[1];
> uint8_t *vChar = (uint8_t*)pic.planes[2];
>
> - for (int r = 0; r < height; r++)
> + int lumaWidth = width - rem;
> + int chromaWidth = width >> m_hChromaShift;
> + uint8_t chromaRem = chromaWidth & 15;
> + chromaWidth = chromaWidth - chromaRem;
> +
> + primitives.upShift(yChar, pic.stride[0] / sizeof(*yChar), yPixel, getStride(), lumaWidth, height);
> + primitives.upShift(uChar, pic.stride[1] / sizeof(*uChar), uPixel, getCStride(), chromaWidth, height >> m_vChromaShift);
> + primitives.upShift(vChar, pic.stride[2] / sizeof(*vChar), vPixel, getCStride(), chromaWidth, height >> m_vChromaShift);
> +
> + if (rem)
> {
> - for (int c = 0; c < width; c++)
> + for (int r = 0; r < height; r++)
> {
> - yPixel[c] = ((pixel)yChar[c]) << 2;
> + for (int c = lumaWidth; c < width; c++)
> + {
> + yPixel[c] = ((pixel)yChar[c]) << 2;
> + }
> +
> + yPixel += getStride();
> + yChar += pic.stride[0] / sizeof(*yChar);
> }
> -
> - yPixel += getStride();
> - yChar += pic.stride[0] / sizeof(*yChar);
> }
>
> - for (int r = 0; r < height >> m_vChromaShift; r++)
> + if (chromaRem)
> {
> - for (int c = 0; c < width >> m_hChromaShift; c++)
> + for (int r = 0; r < height >> m_vChromaShift; r++)
> {
> - uPixel[c] = ((pixel)uChar[c]) << 2;
> - vPixel[c] = ((pixel)vChar[c]) << 2;
> + for (int c = chromaWidth; c < width >> m_hChromaShift; c++)
> + {
> + uPixel[c] = ((pixel)uChar[c]) << 2;
> + vPixel[c] = ((pixel)vChar[c]) << 2;
> + }
> +
> + uPixel += getCStride();
> + vPixel += getCStride();
> + uChar += pic.stride[1] / sizeof(*uChar);
> + vChar += pic.stride[2] / sizeof(*vChar);
> }
> -
> - uPixel += getCStride();
> - vPixel += getCStride();
> - uChar += pic.stride[1] / sizeof(*uChar);
> - vChar += pic.stride[2] / sizeof(*vChar);
> }
> }
> else if (pic.bitDepth == 8)
> @@ -266,31 +281,48 @@
> /* defensive programming, mask off bits that are supposed to be zero */
> uint16_t mask = (1 << X265_DEPTH) - 1;
> int shift = X265_MAX(0, pic.bitDepth - X265_DEPTH);
> + int lumaWidth = width - rem;
> + int chromaWidth = width >> m_hChromaShift;
> + uint8_t chromaRem = chromaWidth & 15;
> + chromaWidth = chromaWidth - chromaRem;
>
> /* shift and mask pixels to final size */
> - for (int r = 0; r < height; r++)
> +
> + primitives.downShift(yShort, pic.stride[0] / sizeof(*yShort), yPixel, getStride(), lumaWidth, height, shift, mask);
> + primitives.downShift(uShort, pic.stride[1] / sizeof(*uShort), uPixel, getCStride(), chromaWidth, height >> m_vChromaShift, shift, mask);
> + primitives.downShift(vShort, pic.stride[2] / sizeof(*vShort), vPixel, getCStride(), chromaWidth, height >> m_vChromaShift, shift, mask);
> +
> + /*Handles remining part of luma component if the width not multiple of 16*/
> + if (rem)
> {
> - for (int c = 0; c < width; c++)
> + for (int r = 0; r < height; r++)
> {
> - yPixel[c] = (pixel)((yShort[c] >> shift) & mask);
> + for (int c = lumaWidth; c < width; c++)
> + {
> + yPixel[c] = (pixel)((yShort[c] >> shift) & mask);
> + }
> +
> + yPixel += getStride();
> + yShort += pic.stride[0] / sizeof(*yShort);
> }
> -
> - yPixel += getStride();
> - yShort += pic.stride[0] / sizeof(*yShort);
> }
>
> - for (int r = 0; r < height >> m_vChromaShift; r++)
> + /*Handles remining part of chroma component if the width not multiple of 16*/
> + if (chromaRem)
> {
> - for (int c = 0; c < width >> m_hChromaShift; c++)
> + for (int r = 0; r < height >> m_vChromaShift; r++)
> {
> - uPixel[c] = (pixel)((uShort[c] >> shift) & mask);
> - vPixel[c] = (pixel)((vShort[c] >> shift) & mask);
> + for (int c = chromaWidth; c < width >> m_hChromaShift; c++)
> + {
> + uPixel[c] = (pixel)((uShort[c] >> shift) & mask);
> + vPixel[c] = (pixel)((vShort[c] >> shift) & mask);
> + }
> +
> + uPixel += getCStride();
> + vPixel += getCStride();
> + uShort += pic.stride[1] / sizeof(*uShort);
> + vShort += pic.stride[2] / sizeof(*vShort);
> }
> -
> - uPixel += getCStride();
> - vPixel += getCStride();
> - uShort += pic.stride[1] / sizeof(*uShort);
> - vShort += pic.stride[2] / sizeof(*vShort);
> }
> }
>
> diff -r 5328eec59554 -r 481bca8b54d0 source/common/pixel.cpp
> --- a/source/common/pixel.cpp Wed Mar 12 16:01:25 2014 -0500
> +++ b/source/common/pixel.cpp Thu Mar 13 12:18:31 2014 +0530
> @@ -852,6 +852,34 @@
> dst += dstStride;
> }
> }
> +
> +void upShift(uint8_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int width, int height)
> +{
> + for (int r = 0; r < height; r++)
> + {
> + for (int c = 0; c < width; c++)
> + {
> + dst[c] = ((pixel)src[c]) << 2;
> + }
> +
> + dst += dstStride;
> + src += srcStride;
> + }
> +}
> +
> +void downShift(uint16_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask)
> +{
> + for (int r = 0; r < height; r++)
> + {
> + for (int c = 0; c < width; c++)
> + {
> + dst[c] = (pixel)((src[c] >> shift) & mask);
> + }
> +
> + dst += dstStride;
> + src += srcStride;
> + }
> +}
> } // end anonymous namespace
>
> namespace x265 {
> @@ -1099,5 +1127,7 @@
> p.var[BLOCK_32x32] = pixel_var<32>;
> p.var[BLOCK_64x64] = pixel_var<64>;
> p.plane_copy_deinterleave_c = plane_copy_deinterleave_chroma;
> + p.upShift = upShift;
> + p.downShift = downShift;
> }
> }
> diff -r 5328eec59554 -r 481bca8b54d0 source/common/primitives.h
> --- a/source/common/primitives.h Wed Mar 12 16:01:25 2014 -0500
> +++ b/source/common/primitives.h Thu Mar 13 12:18:31 2014 +0530
> @@ -163,6 +163,8 @@
> typedef void (*addAvg_t)(int16_t* src0, int16_t* src1, pixel* dst, intptr_t src0Stride, intptr_t src1Stride, intptr_t dstStride);
>
> typedef void (*saoCuOrgE0_t)(pixel * rec, int8_t * offsetEo, int lcuWidth, int8_t signLeft);
> +typedef void (*planecopy_cp) (uint8_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int width, int height);
> +typedef void (*planecopy_sp) (uint16_t *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask);
>
> /* Define a structure containing function pointers to optimized encoder
> * primitives. Each pointer can reference either an assembly routine,
> @@ -233,6 +235,8 @@
> extendCURowBorder_t extendRowBorder;
> // sao primitives
> saoCuOrgE0_t saoCuOrgE0;
> + planecopy_cp upShift;
> + planecopy_sp downShift;
>
> struct
> {
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list