[x265] [PATCH] encoder:auto-padding to min CU size and set conformance window

Sun Oct 20 23:08:29 CEST 2013

On Fri, Oct 18, 2013 at 6:57 AM, Gopu Govindaswamy <
gopu at multicorewareinc.com> wrote:

> # HG changeset patch
> # User Gopu Govindaswamy <gopu at multicorewareinc.com>
> # Date 1382097422 -19800
> # Node ID bb56d24edc2b2565bad644719b5e0194cf3fa845
> # Parent  48afd41e075345cc485ffbe14b33fafc87a4e1ba
> encoder:auto-padding to min CU size and set conformance window
>
> diff -r 48afd41e0753 -r bb56d24edc2b source/Lib/TLibCommon/TComPicYuv.cpp
> --- a/source/Lib/TLibCommon/TComPicYuv.cpp      Fri Oct 18 10:29:53 2013
> +0530
> +++ b/source/Lib/TLibCommon/TComPicYuv.cpp      Fri Oct 18 17:27:02 2013
> +0530
> @@ -74,7 +74,7 @@
>      m_cuWidth  = maxCUWidth;
>      m_cuHeight = maxCUHeight;
>
> -    int numCuInWidth  = (m_picWidth + m_cuWidth - 1)  / m_cuWidth;
> +    int numCuInWidth  = (m_picWidth + m_cuWidth - 1) / m_cuWidth;
>      int numCuInHeight = (m_picHeight + m_cuHeight - 1) / m_cuHeight;
>
>      m_numCuInWidth = numCuInWidth;
> @@ -88,9 +88,9 @@
>      m_chromaMarginY = m_lumaMarginY >> 1;
>      m_strideC = (m_picWidth >> 1) + (m_chromaMarginX << 1);
>
> -    m_picBufY = (Pel*)X265_MALLOC(Pel, (m_picWidth + (m_lumaMarginX <<
> 1)) * (m_picHeight + (m_lumaMarginY << 1)));
> -    m_picBufU = (Pel*)X265_MALLOC(Pel, ((m_picWidth >> 1) +
> (m_chromaMarginX << 1)) * ((m_picHeight >> 1) + (m_chromaMarginY << 1)));
> -    m_picBufV = (Pel*)X265_MALLOC(Pel, ((m_picWidth >> 1) +
> (m_chromaMarginX << 1)) * ((m_picHeight >> 1) + (m_chromaMarginY << 1)));
> +    m_picBufY = (Pel*)X265_MALLOC(Pel, (m_picWidth + (m_lumaMarginX <<
> 1)) * (m_picHeight + (m_lumaMarginY << 1)) + (m_picHeight * m_picWidth));
> +    m_picBufU = (Pel*)X265_MALLOC(Pel, ((m_picWidth >> 1) +
> (m_chromaMarginX << 1)) * ((m_picHeight >> 1) + (m_chromaMarginY << 1)) +
> (m_picHeight * m_picWidth));
> +    m_picBufV = (Pel*)X265_MALLOC(Pel, ((m_picWidth >> 1) +
> (m_chromaMarginX << 1)) * ((m_picHeight >> 1) + (m_chromaMarginY << 1)) +
> (m_picHeight * m_picWidth));
>

this is doubling the size of each picture malloc, and is completely
unnecessary.  m_picWidth and m_picHeight are already padded,

>      m_picOrgY = m_picBufY + m_lumaMarginY   * getStride()  +
> m_lumaMarginX;
>      m_picOrgU = m_picBufU + m_chromaMarginY * getCStride() +
> m_chromaMarginX;
> @@ -357,7 +357,7 @@
>   * Upscale pixels from 8bits to 16 bits when required, but do not modify
> pixels.
>   * This new routine is GPL
>   */
> -void TComPicYuv::copyFromPicture(const x265_picture_t& pic)
> +void TComPicYuv::copyFromPicture(const x265_picture_t& pic, uint32_t hpad)
>  {
>      Pel *Y = getLumaAddr();
>      Pel *U = getCbAddr();
> @@ -402,25 +402,52 @@
>  #endif // if HIGH_BIT_DEPTH
>      {
>          int width = m_picWidth * (pic.bitDepth > 8 ? 2 : 1);
> +        int height = m_picHeight - hpad;
> +        uint32_t pady = width - pic.stride[0];
>

you can't make any assumptions that pic.stride[0] is the same as the
param.inputWidth the user passed in.  You need to pass both x and y pad
into this function.  hpad and pady are an odd pair of names.  how does pady
relate to the horizontal (or X) padding?

> +        // copy pixels by row into encoder's buffer
> +        for (int r = 0; r < height; r++)
> +        {
> +            memcpy(Y, y, pic.stride[0]);
> +            /* Extend the row at right is not multiple of the minimum CU
> size */
> +            if (pady)
> +                ::memset(Y + pic.stride[0], Y[pic.stride[0] - 1], pady);
>
> -        // copy pixels by row into encoder's buffer
> -        for (int r = 0; r < m_picHeight; r++)
> -        {
> -            memcpy(Y, y, width);
> -
> -            Y += getStride();
> +            Y += getStride(); //width + margin + padsize
>              y += pic.stride[0];
>          }
>
> -        for (int r = 0; r < m_picHeight >> 1; r++)
> +        uint32_t padu = (width >> 1) - pic.stride[1];
> +        uint32_t padv = (width >> 1) - pic.stride[2];
>

ditto here

> +        for (int r = 0; r < height >> 1; r++)
>          {
> -            memcpy(U, u, width >> 1);
> -            memcpy(V, v, width >> 1);
> +            memcpy(U, u, pic.stride[1]);
> +            memcpy(V, v, pic.stride[2]);
> +            /* Extend the row at right is not multiple of the minimum CU
> size */
> +            if (padu)
> +                ::memset(U + pic.stride[1], U[pic.stride[1] - 1], padu);
> +            if (padv)
> +                ::memset(V + pic.stride[2], V[pic.stride[2] - 1], padv);
>
>              U += getCStride();
>              V += getCStride();
>              u += pic.stride[1];
>              v += pic.stride[2];
>          }
> +
> +        /* extend the top if height is not multiple of the minimum CU
> size */
> +        if (hpad)
> +        {
> +            Pel *Y = getLumaAddr();
> +            Pel *U = getCbAddr();
> +            Pel *V = getCrAddr();
> +            for (uint32_t i = 0; i < hpad; i++)
> +                memcpy(Y - (i + 1) * getStride(), Y, getStride() *
> sizeof(pixel));
> +
> +            for (uint32_t y = 0; y < hpad; y++)
> +            {
> +                memcpy(U - (y + 1) * getCStride(), U, getCStride() *
> sizeof(pixel));
> +                memcpy(V - (y + 1) * getCStride(), V, getCStride() *
> sizeof(pixel));
> +            }
> +        }
>      }
>  }
> diff -r 48afd41e0753 -r bb56d24edc2b source/Lib/TLibCommon/TComPicYuv.h
> --- a/source/Lib/TLibCommon/TComPicYuv.h        Fri Oct 18 10:29:53 2013
> +0530
> +++ b/source/Lib/TLibCommon/TComPicYuv.h        Fri Oct 18 17:27:02 2013
> +0530
> @@ -178,7 +178,7 @@
>      void  copyToPicLuma(TComPicYuv* destYuv);
>      void  copyToPicCb(TComPicYuv* destYuv);
>      void  copyToPicCr(TComPicYuv* destYuv);
> -    void  copyFromPicture(const x265_picture_t&);
> +    void  copyFromPicture(const x265_picture_t&, uint32_t hpad);
>
>      MotionReference* generateMotionReference(wpScalingParam *w);
>
> diff -r 48afd41e0753 -r bb56d24edc2b source/common/common.cpp
> --- a/source/common/common.cpp  Fri Oct 18 10:29:53 2013 +0530
> +++ b/source/common/common.cpp  Fri Oct 18 17:27:02 2013 +0530
> @@ -250,10 +250,6 @@
>            "Minimum partition width size should be larger than or equal to
> 8");
>      CHECK(param->maxCUSize < 16,
>            "Maximum partition width size should be larger than or equal to
> 16");
> -    CHECK((param->sourceWidth  % (param->maxCUSize >> (maxCUDepth - 1)))
> != 0,
> -          "Resulting coded frame width must be a multiple of the minimum
> CU size");
> -    CHECK((param->sourceHeight % (param->maxCUSize >> (maxCUDepth - 1)))
> != 0,
> -          "Resulting coded frame height must be a multiple of the minimum
> CU size");
>
>      CHECK((1u << tuQTMaxLog2Size) > param->maxCUSize,
>            "QuadtreeTULog2MaxSize must be log2(maxCUSize) or smaller.");
> diff -r 48afd41e0753 -r bb56d24edc2b source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp        Fri Oct 18 10:29:53 2013 +0530
> +++ b/source/encoder/encoder.cpp        Fri Oct 18 17:27:02 2013 +0530
> @@ -179,7 +179,7 @@
>
>          /* Copy input picture into a TComPic, send to lookahead */
>          pic->getSlice()->setPOC(++m_pocLast);
> -        pic->getPicYuvOrg()->copyFromPicture(*pic_in);
> +        pic->getPicYuvOrg()->copyFromPicture(*pic_in, getPad(1));
>

you could just pass m_pad

>          pic->m_userData = pic_in->userData;
>          pic->m_pts = pic_in->pts;
>
> @@ -971,9 +971,30 @@
>      m_maxCuDQPDepth = 0;
>      m_maxNumOffsetsPerPic = 2048;
>      m_log2ParallelMergeLevelMinus2 = 0;
> -    m_conformanceWindow.setWindow(0, 0, 0, 0);
> -    int nullpad[2] = { 0, 0 };
> -    setPad(nullpad);
> +    int pad[2] = { 0, 0 };
> +
> +    //======== set pad size if width is not multiple of the minimum CU
> size =========
> +    uint32_t maxCUDepth = (uint32_t)g_convertToBit[_param->maxCUSize];
>

making a tempvar here for minCUdepth would make this more clear

+    if ((_param->sourceWidth % (_param->maxCUSize >> (maxCUDepth - 1))) !=
> 0)
> +    {
> +        uint32_t padsize = 0;
> +        uint32_t rem = _param->sourceWidth % (_param->maxCUSize >>
> (maxCUDepth - 1));
> +        padsize = (_param->maxCUSize >> (maxCUDepth - 1)) - rem;
> +        _param->sourceWidth += padsize;
> +        pad[0] = padsize;
> +    }
> +
> +    //======== set pad size if height is not multiple of the minimum CU
> size =========
> +    if ((_param->sourceHeight % (_param->maxCUSize >> (maxCUDepth - 1)))
> != 0)
> +    {
> +        uint32_t padsize = 0;
> +        uint32_t rem = _param->sourceHeight % (_param->maxCUSize >>
> (maxCUDepth - 1));
> +        padsize = (_param->maxCUSize >> (maxCUDepth - 1)) - rem;
> +        _param->sourceHeight += padsize;
> +        pad[1] = padsize;
> +    }
> +    setPad(pad);
>

this is ok; logic wise, but you might as well directly operate on m_pad
from TEncCfg and remove the setPad()/getPad() methods

> +    m_conformanceWindow.setWindow(0, getPad(0), getPad(1), 0);
>

And directly set m_conformanceWindow and remove the set/get methods

>
>      m_progressiveSourceFlag = true;
>      m_interlacedSourceFlag = false;
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>

-- 
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131020/daccd760/attachment-0001.html>