[x265] [PATCH 1 of 3] do border extension on CU level and new counter for reconColCount

Min Chen chenm003 at 163.com
Tue Dec 22 02:49:08 CET 2015


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1450727186 21600
# Node ID bcc6d005cd852c043413d1f90aca05366acec514
# Parent  942587f1ab4484ce69a818ce9c8adc59c38fe239
do border extension on CU level and new counter for reconColCount
---
 source/common/frame.cpp         |   11 +++
 source/common/frame.h           |    2 +
 source/encoder/dpb.cpp          |    6 ++
 source/encoder/frameencoder.cpp |   13 +++
 source/encoder/framefilter.cpp  |  186 +++++++++++++++++++++++++++------------
 source/encoder/framefilter.h    |   17 ++++
 6 files changed, 177 insertions(+), 58 deletions(-)

diff -r 942587f1ab44 -r bcc6d005cd85 source/common/frame.cpp
--- a/source/common/frame.cpp	Wed Dec 16 09:08:00 2015 +0530
+++ b/source/common/frame.cpp	Mon Dec 21 13:46:26 2015 -0600
@@ -33,6 +33,7 @@
     m_bChromaExtended = false;
     m_lowresInit = false;
     m_reconRowCount.set(0);
+    m_reconColCount = NULL;
     m_countRefEncoders = 0;
     m_encData = NULL;
     m_reconPic = NULL;
@@ -51,6 +52,10 @@
     if (m_fencPic->create(param->sourceWidth, param->sourceHeight, param->internalCsp) &&
         m_lowres.create(m_fencPic, param->bframes, !!param->rc.aqMode))
     {
+        X265_CHECK((m_reconColCount == NULL), "m_reconColCount was initialized");
+        m_numRows = (m_fencPic->m_picHeight + g_maxCUSize - 1)  / g_maxCUSize;
+        m_reconColCount = new ThreadSafeInteger[m_numRows];
+
         if (quantOffsets)
         {
             int32_t cuCount = m_lowres.maxBlocksInRow * m_lowres.maxBlocksInCol;
@@ -122,6 +127,12 @@
         m_reconPic = NULL;
     }
 
+    if (m_reconColCount)
+    {
+        delete[] m_reconColCount;
+        m_reconColCount = NULL;
+    }
+
     if (m_quantOffsets)
     {
         delete[] m_quantOffsets;
diff -r 942587f1ab44 -r bcc6d005cd85 source/common/frame.h
--- a/source/common/frame.h	Wed Dec 16 09:08:00 2015 +0530
+++ b/source/common/frame.h	Mon Dec 21 13:46:26 2015 -0600
@@ -63,6 +63,8 @@
 
     /* Frame Parallelism - notification between FrameEncoders of available motion reference rows */
     ThreadSafeInteger      m_reconRowCount;      // count of CTU rows completely reconstructed and extended for motion reference
+    ThreadSafeInteger*     m_reconColCount;      // count of CTU cols completely reconstructed and extended for motion reference
+    int32_t                m_numRows;
     volatile uint32_t      m_countRefEncoders;   // count of FrameEncoder threads monitoring m_reconRowCount
 
     Frame*                 m_next;               // PicList doubly linked list pointers
diff -r 942587f1ab44 -r bcc6d005cd85 source/encoder/dpb.cpp
--- a/source/encoder/dpb.cpp	Wed Dec 16 09:08:00 2015 +0530
+++ b/source/encoder/dpb.cpp	Mon Dec 21 13:46:26 2015 -0600
@@ -74,6 +74,12 @@
             curFrame->m_reconRowCount.set(0);
             curFrame->m_bChromaExtended = false;
 
+            // Reset column counter
+            X265_CHECK(curFrame->m_reconColCount != NULL, "curFrame->m_reconColCount check failure");
+            X265_CHECK(curFrame->m_numRows > 0, "curFrame->m_numRows check failure");
+            for(int32_t col = 0; col < curFrame->m_numRows; col++)
+                curFrame->m_reconColCount[col].set(0);
+
             // iterator is invalidated by remove, restart scan
             m_picList.remove(*curFrame);
             iterFrame = m_picList.first();
diff -r 942587f1ab44 -r bcc6d005cd85 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Wed Dec 16 09:08:00 2015 +0530
+++ b/source/encoder/frameencoder.cpp	Mon Dec 21 13:46:26 2015 -0600
@@ -1139,6 +1139,12 @@
                 m_frameFilter.m_parallelFilter[row].tryBondPeers(*this, 1);
             }
         }
+        // Both Loopfilter and SAO Disabled
+        else
+        {
+            m_frameFilter.m_parallelFilter[row].processPostCu(col);
+            m_frame->m_reconColCount[row].set(col);
+        }
 
         if (m_param->bEnableWavefront && curRow.completed >= 2 && row < m_numRows - 1 &&
             (!m_bAllRowsStop || intRow + 1 < m_vbvResetTriggerRow))
@@ -1247,6 +1253,13 @@
                     m_frameFilter.m_parallelFilter[row].processSaoUnitCu(saoParam, col);
                 }
             }
+
+            // Process border extension on last row
+            for(uint32_t col = 0; col < numCols; col++)
+            {
+                m_frameFilter.m_parallelFilter[row].processPostCu(col);
+            }
+            m_frame->m_reconColCount[row].set(numCols - 1);
         }
     }
 
diff -r 942587f1ab44 -r bcc6d005cd85 source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp	Wed Dec 16 09:08:00 2015 +0530
+++ b/source/encoder/framefilter.cpp	Mon Dec 21 13:46:26 2015 -0600
@@ -37,6 +37,8 @@
 
 uint32_t FrameFilter::ParallelFilter::numCols = 0;
 uint32_t FrameFilter::ParallelFilter::numRows = 0;
+uint32_t FrameFilter::ParallelFilter::lastHeight = 0;
+uint32_t FrameFilter::ParallelFilter::lastWidth = 0;
 
 void FrameFilter::destroy()
 {
@@ -65,7 +67,7 @@
     m_pad[0] = top->m_sps.conformanceWindow.rightOffset;
     m_pad[1] = top->m_sps.conformanceWindow.bottomOffset;
     m_saoRowDelay = m_param->bEnableLoopFilter ? 1 : 0;
-    m_lastHeight = m_param->sourceHeight % g_maxCUSize ? m_param->sourceHeight % g_maxCUSize : g_maxCUSize;
+    m_lastHeight = (m_param->sourceHeight % g_maxCUSize) ? (m_param->sourceHeight % g_maxCUSize) : g_maxCUSize;
 
     if (m_param->bEnableSsim)
         m_ssimBuf = X265_MALLOC(int, 8 * (m_param->sourceWidth / 4 + 3));
@@ -105,6 +107,8 @@
     // Setting maximum columns
     ParallelFilter::numCols = numCols;
     ParallelFilter::numRows = numRows;
+    ParallelFilter::lastHeight = m_lastHeight;
+    ParallelFilter::lastWidth = (m_param->sourceWidth % g_maxCUSize) ? (m_param->sourceWidth % g_maxCUSize) : g_maxCUSize;
 }
 
 void FrameFilter::start(Frame *frame, Entropy& initState, int qp)
@@ -123,6 +127,7 @@
             m_parallelFilter[row].m_allowedCol.set(0);
             m_parallelFilter[row].m_lastDeblocked.set(-1);
             m_parallelFilter[row].m_encData = frame->m_encData;
+            m_parallelFilter[row].m_frame = frame;
         }
 
         // Reset SAO common statistics
@@ -218,8 +223,98 @@
 
         uint32_t cuAddr = m_rowAddr + col;
         const CUData* ctu = m_encData->getPicCTU(cuAddr);
-        assert(m_frameEncoder->m_frame->m_reconPic == m_encData->m_reconPic);
-        origCUSampleRestoration(ctu, cuGeoms[ctuGeomMap[cuAddr]], *m_frameEncoder->m_frame);
+        assert(m_frame->m_reconPic == m_encData->m_reconPic);
+        origCUSampleRestoration(ctu, cuGeoms[ctuGeomMap[cuAddr]], *m_frame);
+    }
+}
+
+// NOTE: MUST BE delay a row when Deblock enabled, the Deblock will modify above pixels in Horizon pass
+void FrameFilter::ParallelFilter::processPostCu(uint32_t col) const
+{
+    // shortcut path for non-border area
+    if ((col != 0) & (col != numCols - 1) & (m_row != 0) & (m_row != numRows - 1))
+        return;
+
+    PicYuv *reconPic = m_frame->m_reconPic;
+    const uint32_t lineStartCUAddr = m_rowAddr + col;
+    const int realH = getCUHeight(m_row);
+    const int realW = getCUWidth(col);
+
+    const uint32_t lumaMarginX = reconPic->m_lumaMarginX;
+    const uint32_t lumaMarginY = reconPic->m_lumaMarginY;
+    const uint32_t chromaMarginX = reconPic->m_chromaMarginX;
+    const uint32_t chromaMarginY = reconPic->m_chromaMarginY;
+    const int hChromaShift = reconPic->m_hChromaShift;
+    const int vChromaShift = reconPic->m_vChromaShift;
+    const intptr_t stride = reconPic->m_stride;
+    const intptr_t strideC = reconPic->m_strideC;
+    pixel *pixY = reconPic->getLumaAddr(lineStartCUAddr);
+    // // MUST BE check I400 since m_picOrg uninitialize in that case
+    pixel *pixU = (m_param->internalCsp != X265_CSP_I400) ? reconPic->getCbAddr(lineStartCUAddr) : NULL;
+    pixel *pixV = (m_param->internalCsp != X265_CSP_I400) ? reconPic->getCrAddr(lineStartCUAddr) : NULL;
+    int copySizeY = realW;
+    int copySizeC = (realW >> hChromaShift);
+
+    if ((col == 0) | (col == numCols - 1))
+    {
+        // TODO: improve by process on Left or Right only
+        primitives.extendRowBorder(reconPic->getLumaAddr(m_rowAddr), stride, reconPic->m_picWidth, realH, reconPic->m_lumaMarginX);
+
+        if (m_param->internalCsp != X265_CSP_I400)
+        {
+            primitives.extendRowBorder(reconPic->getCbAddr(m_rowAddr), strideC, reconPic->m_picWidth >> hChromaShift, realH >> vChromaShift, reconPic->m_chromaMarginX);
+            primitives.extendRowBorder(reconPic->getCrAddr(m_rowAddr), strideC, reconPic->m_picWidth >> hChromaShift, realH >> vChromaShift, reconPic->m_chromaMarginX);
+        }
+    }
+
+    // Extra Left and Right border on first and last CU
+    if ((col == 0) | (col == numCols - 1))
+    {
+        copySizeY += lumaMarginX;
+        copySizeC += chromaMarginX;
+    }
+
+    // First column need extension left padding area and first CU
+    if (col == 0)
+    {
+        pixY -= lumaMarginX;
+        pixU -= chromaMarginX;
+        pixV -= chromaMarginX;
+    }
+
+    // Border extend Top
+    if (m_row == 0)
+    {
+        for (uint32_t y = 0; y < lumaMarginY; y++)
+            memcpy(pixY - (y + 1) * stride, pixY, copySizeY * sizeof(pixel));
+
+        if (m_param->internalCsp != X265_CSP_I400)
+        {
+            for (uint32_t y = 0; y < chromaMarginY; y++)
+            {
+                memcpy(pixU - (y + 1) * strideC, pixU, copySizeC * sizeof(pixel));
+                memcpy(pixV - (y + 1) * strideC, pixV, copySizeC * sizeof(pixel));
+            }
+        }
+    }
+
+    // Border extend Bottom
+    if (m_row == numRows - 1)
+    {
+        pixY += (realH - 1) * stride;
+        pixU += ((realH >> vChromaShift) - 1) * strideC;
+        pixV += ((realH >> vChromaShift) - 1) * strideC;
+        for (uint32_t y = 0; y < lumaMarginY; y++)
+            memcpy(pixY + (y + 1) * stride, pixY, copySizeY * sizeof(pixel));
+
+        if (m_param->internalCsp != X265_CSP_I400)
+        {
+            for (uint32_t y = 0; y < chromaMarginY; y++)
+            {
+                memcpy(pixU + (y + 1) * strideC, pixU, copySizeC * sizeof(pixel));
+                memcpy(pixV + (y + 1) * strideC, pixV, copySizeC * sizeof(pixel));
+            }
+        }
     }
 }
 
@@ -254,6 +349,13 @@
             {
                 const CUData* ctuPrev = m_encData->getPicCTU(cuAddr - 1);
                 deblockCTU(ctuPrev, cuGeoms[ctuGeomMap[cuAddr - 1]], Deblock::EDGE_HOR);
+
+                // When SAO Disable, setting column counter here
+                if ((!m_param->bEnableSAO) & (m_row >= 1))
+                {
+                    m_prevRow->processPostCu(col - 1);
+                    m_frame->m_reconColCount[m_row - 1].set(col - 1);
+                }
             }
 
             if (m_param->bEnableSAO)
@@ -275,6 +377,8 @@
                 {
                     // Must delay 1 row to avoid thread data race conflict
                     m_prevRow->processSaoUnitCu(saoParam, col - 3);
+                    m_prevRow->processPostCu(col - 3);
+                    m_frame->m_reconColCount[m_row - 1].set(col - 3);
                 }
             }
 
@@ -291,8 +395,16 @@
         {
             const CUData* ctuPrev = m_encData->getPicCTU(cuAddr);
             deblockCTU(ctuPrev, cuGeoms[ctuGeomMap[cuAddr]], Deblock::EDGE_HOR);
+
+            // When SAO Disable, setting column counter here
+            if ((!m_param->bEnableSAO) & (m_row >= 1))
+            {
+                m_prevRow->processPostCu(numCols - 1);
+                m_frame->m_reconColCount[m_row - 1].set(numCols - 1);
+            }
         }
 
+        // TODO: move processPostCu() into processSaoUnitCu()
         if (m_param->bEnableSAO)
         {
             // Save SAO bottom row reference pixels
@@ -308,13 +420,26 @@
 
             // Process Previous Rows SAO CU
             if (m_row >= 1 && numCols >= 3)
+            {
                 m_prevRow->processSaoUnitCu(saoParam, numCols - 3);
+                m_prevRow->processPostCu(numCols - 3);
+            }
 
             if (m_row >= 1 && numCols >= 2)
+            {
                 m_prevRow->processSaoUnitCu(saoParam, numCols - 2);
+                m_prevRow->processPostCu(numCols - 2);
+            }
 
             if (m_row >= 1 && numCols >= 1)
+            {
                 m_prevRow->processSaoUnitCu(saoParam, numCols - 1);
+                m_prevRow->processPostCu(numCols - 1);
+            }
+
+            // Setting column sync counter
+            if (m_row >= 1)
+                m_frame->m_reconColCount[m_row - 1].set(numCols - 1);
         }
         m_lastDeblocked.set(numCols);
     }
@@ -371,61 +496,6 @@
     PicYuv *reconPic = m_frame->m_reconPic;
     const uint32_t numCols = m_frame->m_encData->m_slice->m_sps->numCuInWidth;
     const uint32_t lineStartCUAddr = row * numCols;
-    const int realH = getCUHeight(row);
-
-    // Border extend Left and Right
-    primitives.extendRowBorder(reconPic->getLumaAddr(lineStartCUAddr), reconPic->m_stride, reconPic->m_picWidth, realH, reconPic->m_lumaMarginX);
-    if (m_param->internalCsp != X265_CSP_I400)
-    {
-        primitives.extendRowBorder(reconPic->getCbAddr(lineStartCUAddr), reconPic->m_strideC, reconPic->m_picWidth >> m_hChromaShift, realH >> m_vChromaShift, reconPic->m_chromaMarginX);
-        primitives.extendRowBorder(reconPic->getCrAddr(lineStartCUAddr), reconPic->m_strideC, reconPic->m_picWidth >> m_hChromaShift, realH >> m_vChromaShift, reconPic->m_chromaMarginX);
-    }
-
-    // Border extend Top
-    if (!row)
-    {
-        const intptr_t stride = reconPic->m_stride;
-        pixel *pixY = reconPic->getLumaAddr(lineStartCUAddr) - reconPic->m_lumaMarginX;
-
-        for (uint32_t y = 0; y < reconPic->m_lumaMarginY; y++)
-            memcpy(pixY - (y + 1) * stride, pixY, stride * sizeof(pixel));
-
-        if (m_param->internalCsp != X265_CSP_I400)
-        {
-            const intptr_t strideC = reconPic->m_strideC;
-            pixel *pixU = reconPic->getCbAddr(lineStartCUAddr) - reconPic->m_chromaMarginX;
-            pixel *pixV = reconPic->getCrAddr(lineStartCUAddr) - reconPic->m_chromaMarginX;
-
-            for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++)
-            {
-                memcpy(pixU - (y + 1) * strideC, pixU, strideC * sizeof(pixel));
-                memcpy(pixV - (y + 1) * strideC, pixV, strideC * sizeof(pixel));
-            }
-        }
-    }
-
-    // Border extend Bottom
-    if (row == m_numRows - 1)
-    {
-        const intptr_t stride = reconPic->m_stride;
-        pixel *pixY = reconPic->getLumaAddr(lineStartCUAddr) - reconPic->m_lumaMarginX + (realH - 1) * stride;
-
-        for (uint32_t y = 0; y < reconPic->m_lumaMarginY; y++)
-            memcpy(pixY + (y + 1) * stride, pixY, stride * sizeof(pixel));
-
-        if (m_param->internalCsp != X265_CSP_I400)
-        {
-            const intptr_t strideC = reconPic->m_strideC;
-            pixel *pixU = reconPic->getCbAddr(lineStartCUAddr) - reconPic->m_chromaMarginX + ((realH >> m_vChromaShift) - 1) * strideC;
-            pixel *pixV = reconPic->getCrAddr(lineStartCUAddr) - reconPic->m_chromaMarginX + ((realH >> m_vChromaShift) - 1) * strideC;
-
-            for (uint32_t y = 0; y < reconPic->m_chromaMarginY; y++)
-            {
-                memcpy(pixU + (y + 1) * strideC, pixU, strideC * sizeof(pixel));
-                memcpy(pixV + (y + 1) * strideC, pixV, strideC * sizeof(pixel));
-            }
-        }
-    }
 
     // Notify other FrameEncoders that this row of reconstructed pixels is available
     m_frame->m_reconRowCount.incr();
diff -r 942587f1ab44 -r bcc6d005cd85 source/encoder/framefilter.h
--- a/source/encoder/framefilter.h	Wed Dec 16 09:08:00 2015 +0530
+++ b/source/encoder/framefilter.h	Mon Dec 21 13:46:26 2015 -0600
@@ -63,9 +63,12 @@
     public:
         static uint32_t     numCols;
         static uint32_t     numRows;
+        static uint32_t     lastHeight;
+        static uint32_t     lastWidth;
         uint32_t            m_row;
         uint32_t            m_rowAddr;
         x265_param*         m_param;
+        Frame*              m_frame;
         FrameEncoder*       m_frameEncoder;
         FrameData*          m_encData;
         ParallelFilter*     m_prevRow;
@@ -78,6 +81,7 @@
             : m_row(0)
             , m_rowAddr(0)
             , m_param(NULL)
+            , m_frame(NULL)
             , m_frameEncoder(NULL)
             , m_encData(NULL)
             , m_prevRow(NULL)
@@ -95,6 +99,19 @@
         // Copy and Save SAO reference pixels for SAO Rdo decide
         void copySaoAboveRef(PicYuv* reconPic, uint32_t cuAddr, int col);
 
+        // Post-Process (Border extension)
+        void processPostCu(uint32_t col) const;
+
+        uint32_t getCUHeight(int rowNum) const
+        {
+            return (rowNum == (int)numRows - 1) ? lastHeight : g_maxCUSize;
+        }
+
+        uint32_t getCUWidth(int colNum) const
+        {
+            return (colNum == (int)numCols - 1) ? lastWidth : g_maxCUSize;
+        }
+
     protected:
 
         ParallelFilter operator=(const ParallelFilter&);



More information about the x265-devel mailing list