[x265] [PATCH] encoder:auto-padding to min CU size and set conformance window
Steve Borho
steve at borho.org
Sun Oct 20 23:08:29 CEST 2013
On Fri, Oct 18, 2013 at 6:57 AM, Gopu Govindaswamy <
gopu at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Gopu Govindaswamy <gopu at multicorewareinc.com>
> # Date 1382097422 -19800
> # Node ID bb56d24edc2b2565bad644719b5e0194cf3fa845
> # Parent 48afd41e075345cc485ffbe14b33fafc87a4e1ba
> encoder:auto-padding to min CU size and set conformance window
>
> diff -r 48afd41e0753 -r bb56d24edc2b source/Lib/TLibCommon/TComPicYuv.cpp
> --- a/source/Lib/TLibCommon/TComPicYuv.cpp Fri Oct 18 10:29:53 2013
> +0530
> +++ b/source/Lib/TLibCommon/TComPicYuv.cpp Fri Oct 18 17:27:02 2013
> +0530
> @@ -74,7 +74,7 @@
> m_cuWidth = maxCUWidth;
> m_cuHeight = maxCUHeight;
>
> - int numCuInWidth = (m_picWidth + m_cuWidth - 1) / m_cuWidth;
> + int numCuInWidth = (m_picWidth + m_cuWidth - 1) / m_cuWidth;
> int numCuInHeight = (m_picHeight + m_cuHeight - 1) / m_cuHeight;
>
> m_numCuInWidth = numCuInWidth;
> @@ -88,9 +88,9 @@
> m_chromaMarginY = m_lumaMarginY >> 1;
> m_strideC = (m_picWidth >> 1) + (m_chromaMarginX << 1);
>
> - m_picBufY = (Pel*)X265_MALLOC(Pel, (m_picWidth + (m_lumaMarginX <<
> 1)) * (m_picHeight + (m_lumaMarginY << 1)));
> - m_picBufU = (Pel*)X265_MALLOC(Pel, ((m_picWidth >> 1) +
> (m_chromaMarginX << 1)) * ((m_picHeight >> 1) + (m_chromaMarginY << 1)));
> - m_picBufV = (Pel*)X265_MALLOC(Pel, ((m_picWidth >> 1) +
> (m_chromaMarginX << 1)) * ((m_picHeight >> 1) + (m_chromaMarginY << 1)));
> + m_picBufY = (Pel*)X265_MALLOC(Pel, (m_picWidth + (m_lumaMarginX <<
> 1)) * (m_picHeight + (m_lumaMarginY << 1)) + (m_picHeight * m_picWidth));
> + m_picBufU = (Pel*)X265_MALLOC(Pel, ((m_picWidth >> 1) +
> (m_chromaMarginX << 1)) * ((m_picHeight >> 1) + (m_chromaMarginY << 1)) +
> (m_picHeight * m_picWidth));
> + m_picBufV = (Pel*)X265_MALLOC(Pel, ((m_picWidth >> 1) +
> (m_chromaMarginX << 1)) * ((m_picHeight >> 1) + (m_chromaMarginY << 1)) +
> (m_picHeight * m_picWidth));
>
this is doubling the size of each picture malloc, and is completely
unnecessary. m_picWidth and m_picHeight are already padded,
> m_picOrgY = m_picBufY + m_lumaMarginY * getStride() +
> m_lumaMarginX;
> m_picOrgU = m_picBufU + m_chromaMarginY * getCStride() +
> m_chromaMarginX;
> @@ -357,7 +357,7 @@
> * Upscale pixels from 8bits to 16 bits when required, but do not modify
> pixels.
> * This new routine is GPL
> */
> -void TComPicYuv::copyFromPicture(const x265_picture_t& pic)
> +void TComPicYuv::copyFromPicture(const x265_picture_t& pic, uint32_t hpad)
> {
> Pel *Y = getLumaAddr();
> Pel *U = getCbAddr();
> @@ -402,25 +402,52 @@
> #endif // if HIGH_BIT_DEPTH
> {
> int width = m_picWidth * (pic.bitDepth > 8 ? 2 : 1);
> + int height = m_picHeight - hpad;
> + uint32_t pady = width - pic.stride[0];
>
you can't make any assumptions that pic.stride[0] is the same as the
param.inputWidth the user passed in. You need to pass both x and y pad
into this function. hpad and pady are an odd pair of names. how does pady
relate to the horizontal (or X) padding?
> + // copy pixels by row into encoder's buffer
> + for (int r = 0; r < height; r++)
> + {
> + memcpy(Y, y, pic.stride[0]);
> + /* Extend the row at right is not multiple of the minimum CU
> size */
> + if (pady)
> + ::memset(Y + pic.stride[0], Y[pic.stride[0] - 1], pady);
>
> - // copy pixels by row into encoder's buffer
> - for (int r = 0; r < m_picHeight; r++)
> - {
> - memcpy(Y, y, width);
> -
> - Y += getStride();
> + Y += getStride(); //width + margin + padsize
> y += pic.stride[0];
> }
>
> - for (int r = 0; r < m_picHeight >> 1; r++)
> + uint32_t padu = (width >> 1) - pic.stride[1];
> + uint32_t padv = (width >> 1) - pic.stride[2];
>
ditto here
> + for (int r = 0; r < height >> 1; r++)
> {
> - memcpy(U, u, width >> 1);
> - memcpy(V, v, width >> 1);
> + memcpy(U, u, pic.stride[1]);
> + memcpy(V, v, pic.stride[2]);
> + /* Extend the row at right is not multiple of the minimum CU
> size */
> + if (padu)
> + ::memset(U + pic.stride[1], U[pic.stride[1] - 1], padu);
> + if (padv)
> + ::memset(V + pic.stride[2], V[pic.stride[2] - 1], padv);
>
> U += getCStride();
> V += getCStride();
> u += pic.stride[1];
> v += pic.stride[2];
> }
> +
> + /* extend the top if height is not multiple of the minimum CU
> size */
> + if (hpad)
> + {
> + Pel *Y = getLumaAddr();
> + Pel *U = getCbAddr();
> + Pel *V = getCrAddr();
> + for (uint32_t i = 0; i < hpad; i++)
> + memcpy(Y - (i + 1) * getStride(), Y, getStride() *
> sizeof(pixel));
> +
> + for (uint32_t y = 0; y < hpad; y++)
> + {
> + memcpy(U - (y + 1) * getCStride(), U, getCStride() *
> sizeof(pixel));
> + memcpy(V - (y + 1) * getCStride(), V, getCStride() *
> sizeof(pixel));
> + }
> + }
> }
> }
> diff -r 48afd41e0753 -r bb56d24edc2b source/Lib/TLibCommon/TComPicYuv.h
> --- a/source/Lib/TLibCommon/TComPicYuv.h Fri Oct 18 10:29:53 2013
> +0530
> +++ b/source/Lib/TLibCommon/TComPicYuv.h Fri Oct 18 17:27:02 2013
> +0530
> @@ -178,7 +178,7 @@
> void copyToPicLuma(TComPicYuv* destYuv);
> void copyToPicCb(TComPicYuv* destYuv);
> void copyToPicCr(TComPicYuv* destYuv);
> - void copyFromPicture(const x265_picture_t&);
> + void copyFromPicture(const x265_picture_t&, uint32_t hpad);
>
> MotionReference* generateMotionReference(wpScalingParam *w);
>
> diff -r 48afd41e0753 -r bb56d24edc2b source/common/common.cpp
> --- a/source/common/common.cpp Fri Oct 18 10:29:53 2013 +0530
> +++ b/source/common/common.cpp Fri Oct 18 17:27:02 2013 +0530
> @@ -250,10 +250,6 @@
> "Minimum partition width size should be larger than or equal to
> 8");
> CHECK(param->maxCUSize < 16,
> "Maximum partition width size should be larger than or equal to
> 16");
> - CHECK((param->sourceWidth % (param->maxCUSize >> (maxCUDepth - 1)))
> != 0,
> - "Resulting coded frame width must be a multiple of the minimum
> CU size");
> - CHECK((param->sourceHeight % (param->maxCUSize >> (maxCUDepth - 1)))
> != 0,
> - "Resulting coded frame height must be a multiple of the minimum
> CU size");
>
> CHECK((1u << tuQTMaxLog2Size) > param->maxCUSize,
> "QuadtreeTULog2MaxSize must be log2(maxCUSize) or smaller.");
> diff -r 48afd41e0753 -r bb56d24edc2b source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp Fri Oct 18 10:29:53 2013 +0530
> +++ b/source/encoder/encoder.cpp Fri Oct 18 17:27:02 2013 +0530
> @@ -179,7 +179,7 @@
>
> /* Copy input picture into a TComPic, send to lookahead */
> pic->getSlice()->setPOC(++m_pocLast);
> - pic->getPicYuvOrg()->copyFromPicture(*pic_in);
> + pic->getPicYuvOrg()->copyFromPicture(*pic_in, getPad(1));
>
you could just pass m_pad
> pic->m_userData = pic_in->userData;
> pic->m_pts = pic_in->pts;
>
> @@ -971,9 +971,30 @@
> m_maxCuDQPDepth = 0;
> m_maxNumOffsetsPerPic = 2048;
> m_log2ParallelMergeLevelMinus2 = 0;
> - m_conformanceWindow.setWindow(0, 0, 0, 0);
> - int nullpad[2] = { 0, 0 };
> - setPad(nullpad);
> + int pad[2] = { 0, 0 };
> +
> + //======== set pad size if width is not multiple of the minimum CU
> size =========
> + uint32_t maxCUDepth = (uint32_t)g_convertToBit[_param->maxCUSize];
>
making a tempvar here for minCUdepth would make this more clear
+ if ((_param->sourceWidth % (_param->maxCUSize >> (maxCUDepth - 1))) !=
> 0)
> + {
> + uint32_t padsize = 0;
> + uint32_t rem = _param->sourceWidth % (_param->maxCUSize >>
> (maxCUDepth - 1));
> + padsize = (_param->maxCUSize >> (maxCUDepth - 1)) - rem;
> + _param->sourceWidth += padsize;
> + pad[0] = padsize;
> + }
> +
> + //======== set pad size if height is not multiple of the minimum CU
> size =========
> + if ((_param->sourceHeight % (_param->maxCUSize >> (maxCUDepth - 1)))
> != 0)
> + {
> + uint32_t padsize = 0;
> + uint32_t rem = _param->sourceHeight % (_param->maxCUSize >>
> (maxCUDepth - 1));
> + padsize = (_param->maxCUSize >> (maxCUDepth - 1)) - rem;
> + _param->sourceHeight += padsize;
> + pad[1] = padsize;
> + }
> + setPad(pad);
>
this is ok; logic wise, but you might as well directly operate on m_pad
from TEncCfg and remove the setPad()/getPad() methods
> + m_conformanceWindow.setWindow(0, getPad(0), getPad(1), 0);
>
And directly set m_conformanceWindow and remove the set/get methods
>
> m_progressiveSourceFlag = true;
> m_interlacedSourceFlag = false;
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
--
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131020/daccd760/attachment-0001.html>
More information about the x265-devel
mailing list