[x265] [PATCH] encoder:auto-padding to min CU size and set conformance window

Fri Oct 18 13:57:17 CEST 2013

# HG changeset patch
# User Gopu Govindaswamy <gopu at multicorewareinc.com>
# Date 1382097422 -19800
# Node ID bb56d24edc2b2565bad644719b5e0194cf3fa845
# Parent  48afd41e075345cc485ffbe14b33fafc87a4e1ba
encoder:auto-padding to min CU size and set conformance window

diff -r 48afd41e0753 -r bb56d24edc2b source/Lib/TLibCommon/TComPicYuv.cpp

--- a/source/Lib/TLibCommon/TComPicYuv.cpp	Fri Oct 18 10:29:53 2013 +0530
+++ b/source/Lib/TLibCommon/TComPicYuv.cpp	Fri Oct 18 17:27:02 2013 +0530
@@ -74,7 +74,7 @@
     m_cuWidth  = maxCUWidth;
     m_cuHeight = maxCUHeight;
 
-    int numCuInWidth  = (m_picWidth + m_cuWidth - 1)  / m_cuWidth;
+    int numCuInWidth  = (m_picWidth + m_cuWidth - 1) / m_cuWidth;
     int numCuInHeight = (m_picHeight + m_cuHeight - 1) / m_cuHeight;
 
     m_numCuInWidth = numCuInWidth;
@@ -88,9 +88,9 @@
     m_chromaMarginY = m_lumaMarginY >> 1;
     m_strideC = (m_picWidth >> 1) + (m_chromaMarginX << 1);
 
-    m_picBufY = (Pel*)X265_MALLOC(Pel, (m_picWidth + (m_lumaMarginX << 1)) * (m_picHeight + (m_lumaMarginY << 1)));
-    m_picBufU = (Pel*)X265_MALLOC(Pel, ((m_picWidth >> 1) + (m_chromaMarginX << 1)) * ((m_picHeight >> 1) + (m_chromaMarginY << 1)));
-    m_picBufV = (Pel*)X265_MALLOC(Pel, ((m_picWidth >> 1) + (m_chromaMarginX << 1)) * ((m_picHeight >> 1) + (m_chromaMarginY << 1)));
+    m_picBufY = (Pel*)X265_MALLOC(Pel, (m_picWidth + (m_lumaMarginX << 1)) * (m_picHeight + (m_lumaMarginY << 1)) + (m_picHeight * m_picWidth));
+    m_picBufU = (Pel*)X265_MALLOC(Pel, ((m_picWidth >> 1) + (m_chromaMarginX << 1)) * ((m_picHeight >> 1) + (m_chromaMarginY << 1)) + (m_picHeight * m_picWidth));
+    m_picBufV = (Pel*)X265_MALLOC(Pel, ((m_picWidth >> 1) + (m_chromaMarginX << 1)) * ((m_picHeight >> 1) + (m_chromaMarginY << 1)) + (m_picHeight * m_picWidth));
 
     m_picOrgY = m_picBufY + m_lumaMarginY   * getStride()  + m_lumaMarginX;
     m_picOrgU = m_picBufU + m_chromaMarginY * getCStride() + m_chromaMarginX;
@@ -357,7 +357,7 @@
  * Upscale pixels from 8bits to 16 bits when required, but do not modify pixels.
  * This new routine is GPL
  */
-void TComPicYuv::copyFromPicture(const x265_picture_t& pic)
+void TComPicYuv::copyFromPicture(const x265_picture_t& pic, uint32_t hpad)
 {
     Pel *Y = getLumaAddr();
     Pel *U = getCbAddr();
@@ -402,25 +402,52 @@
 #endif // if HIGH_BIT_DEPTH
     {
         int width = m_picWidth * (pic.bitDepth > 8 ? 2 : 1);
+        int height = m_picHeight - hpad;
+        uint32_t pady = width - pic.stride[0];
+        // copy pixels by row into encoder's buffer
+        for (int r = 0; r < height; r++)
+        {
+            memcpy(Y, y, pic.stride[0]);
+            /* Extend the row at right is not multiple of the minimum CU size */
+            if (pady)
+                ::memset(Y + pic.stride[0], Y[pic.stride[0] - 1], pady);
 
-        // copy pixels by row into encoder's buffer
-        for (int r = 0; r < m_picHeight; r++)
-        {
-            memcpy(Y, y, width);
-
-            Y += getStride();
+            Y += getStride(); //width + margin + padsize
             y += pic.stride[0];
         }
 
-        for (int r = 0; r < m_picHeight >> 1; r++)
+        uint32_t padu = (width >> 1) - pic.stride[1];
+        uint32_t padv = (width >> 1) - pic.stride[2];
+        for (int r = 0; r < height >> 1; r++)
         {
-            memcpy(U, u, width >> 1);
-            memcpy(V, v, width >> 1);
+            memcpy(U, u, pic.stride[1]);
+            memcpy(V, v, pic.stride[2]);
+            /* Extend the row at right is not multiple of the minimum CU size */
+            if (padu)
+                ::memset(U + pic.stride[1], U[pic.stride[1] - 1], padu);
+            if (padv)
+                ::memset(V + pic.stride[2], V[pic.stride[2] - 1], padv);
 
             U += getCStride();
             V += getCStride();
             u += pic.stride[1];
             v += pic.stride[2];
         }
+
+        /* extend the top if height is not multiple of the minimum CU size */
+        if (hpad)
+        {
+            Pel *Y = getLumaAddr();
+            Pel *U = getCbAddr();
+            Pel *V = getCrAddr();
+            for (uint32_t i = 0; i < hpad; i++)
+                memcpy(Y - (i + 1) * getStride(), Y, getStride() * sizeof(pixel));
+
+            for (uint32_t y = 0; y < hpad; y++)
+            {
+                memcpy(U - (y + 1) * getCStride(), U, getCStride() * sizeof(pixel));
+                memcpy(V - (y + 1) * getCStride(), V, getCStride() * sizeof(pixel));
+            }
+        }
     }
 }
diff -r 48afd41e0753 -r bb56d24edc2b source/Lib/TLibCommon/TComPicYuv.h
--- a/source/Lib/TLibCommon/TComPicYuv.h	Fri Oct 18 10:29:53 2013 +0530
+++ b/source/Lib/TLibCommon/TComPicYuv.h	Fri Oct 18 17:27:02 2013 +0530
@@ -178,7 +178,7 @@
     void  copyToPicLuma(TComPicYuv* destYuv);
     void  copyToPicCb(TComPicYuv* destYuv);
     void  copyToPicCr(TComPicYuv* destYuv);
-    void  copyFromPicture(const x265_picture_t&);
+    void  copyFromPicture(const x265_picture_t&, uint32_t hpad);
 
     MotionReference* generateMotionReference(wpScalingParam *w);
 
diff -r 48afd41e0753 -r bb56d24edc2b source/common/common.cpp
--- a/source/common/common.cpp	Fri Oct 18 10:29:53 2013 +0530
+++ b/source/common/common.cpp	Fri Oct 18 17:27:02 2013 +0530
@@ -250,10 +250,6 @@
           "Minimum partition width size should be larger than or equal to 8");
     CHECK(param->maxCUSize < 16,
           "Maximum partition width size should be larger than or equal to 16");
-    CHECK((param->sourceWidth  % (param->maxCUSize >> (maxCUDepth - 1))) != 0,
-          "Resulting coded frame width must be a multiple of the minimum CU size");
-    CHECK((param->sourceHeight % (param->maxCUSize >> (maxCUDepth - 1))) != 0,
-          "Resulting coded frame height must be a multiple of the minimum CU size");
 
     CHECK((1u << tuQTMaxLog2Size) > param->maxCUSize,
           "QuadtreeTULog2MaxSize must be log2(maxCUSize) or smaller.");
diff -r 48afd41e0753 -r bb56d24edc2b source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Fri Oct 18 10:29:53 2013 +0530
+++ b/source/encoder/encoder.cpp	Fri Oct 18 17:27:02 2013 +0530
@@ -179,7 +179,7 @@
 
         /* Copy input picture into a TComPic, send to lookahead */
         pic->getSlice()->setPOC(++m_pocLast);
-        pic->getPicYuvOrg()->copyFromPicture(*pic_in);
+        pic->getPicYuvOrg()->copyFromPicture(*pic_in, getPad(1));
         pic->m_userData = pic_in->userData;
         pic->m_pts = pic_in->pts;
 
@@ -971,9 +971,30 @@
     m_maxCuDQPDepth = 0;
     m_maxNumOffsetsPerPic = 2048;
     m_log2ParallelMergeLevelMinus2 = 0;
-    m_conformanceWindow.setWindow(0, 0, 0, 0);
-    int nullpad[2] = { 0, 0 };
-    setPad(nullpad);
+    int pad[2] = { 0, 0 };
+
+    //======== set pad size if width is not multiple of the minimum CU size =========
+    uint32_t maxCUDepth = (uint32_t)g_convertToBit[_param->maxCUSize];
+    if ((_param->sourceWidth % (_param->maxCUSize >> (maxCUDepth - 1))) != 0)
+    {
+        uint32_t padsize = 0;
+        uint32_t rem = _param->sourceWidth % (_param->maxCUSize >> (maxCUDepth - 1));
+        padsize = (_param->maxCUSize >> (maxCUDepth - 1)) - rem;
+        _param->sourceWidth += padsize;
+        pad[0] = padsize;
+    }
+
+    //======== set pad size if height is not multiple of the minimum CU size =========
+    if ((_param->sourceHeight % (_param->maxCUSize >> (maxCUDepth - 1))) != 0)
+    {
+        uint32_t padsize = 0;
+        uint32_t rem = _param->sourceHeight % (_param->maxCUSize >> (maxCUDepth - 1));
+        padsize = (_param->maxCUSize >> (maxCUDepth - 1)) - rem;
+        _param->sourceHeight += padsize;
+        pad[1] = padsize;
+    }
+    setPad(pad);
+    m_conformanceWindow.setWindow(0, getPad(0), getPad(1), 0);
 
     m_progressiveSourceFlag = true;
     m_interlacedSourceFlag = false;