[x265] [PATCH 2 of 3] simplify logic on setting reconColCount[] and fix bug in case that disable both Deblock and Sao

Min Chen chenm003 at 163.com
Tue Dec 22 02:49:09 CET 2015


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1450727188 21600
# Node ID d8c3eded1440670bde63e2fb5bec0e80ff6e2d67
# Parent  bcc6d005cd852c043413d1f90aca05366acec514
simplify logic on setting reconColCount[] and fix bug in case that disable both Deblock and Sao
---
 source/encoder/frameencoder.cpp |    5 +-
 source/encoder/framefilter.cpp  |  119 +++++++++++++++++++++++++++++++++------
 source/encoder/framefilter.h    |    8 +-
 3 files changed, 108 insertions(+), 24 deletions(-)

diff -r bcc6d005cd85 -r d8c3eded1440 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Mon Dec 21 13:46:26 2015 -0600
+++ b/source/encoder/frameencoder.cpp	Mon Dec 21 13:46:28 2015 -0600
@@ -1142,8 +1142,7 @@
         // Both Loopfilter and SAO Disabled
         else
         {
-            m_frameFilter.m_parallelFilter[row].processPostCu(col);
-            m_frame->m_reconColCount[row].set(col);
+            m_frameFilter.processPostCu(row, col);
         }
 
         if (m_param->bEnableWavefront && curRow.completed >= 2 && row < m_numRows - 1 &&
@@ -1257,9 +1256,9 @@
             // Process border extension on last row
             for(uint32_t col = 0; col < numCols; col++)
             {
+                // m_reconColCount will be set in processPostCu()
                 m_frameFilter.m_parallelFilter[row].processPostCu(col);
             }
-            m_frame->m_reconColCount[row].set(numCols - 1);
         }
     }
 
diff -r bcc6d005cd85 -r d8c3eded1440 source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp	Mon Dec 21 13:46:26 2015 -0600
+++ b/source/encoder/framefilter.cpp	Mon Dec 21 13:46:28 2015 -0600
@@ -231,6 +231,9 @@
 // NOTE: MUST BE delay a row when Deblock enabled, the Deblock will modify above pixels in Horizon pass
 void FrameFilter::ParallelFilter::processPostCu(uint32_t col) const
 {
+    // Update finished CU cursor
+    m_frame->m_reconColCount[m_row].set(col);
+
     // shortcut path for non-border area
     if ((col != 0) & (col != numCols - 1) & (m_row != 0) & (m_row != numRows - 1))
         return;
@@ -352,10 +355,7 @@
 
                 // When SAO Disable, setting column counter here
                 if ((!m_param->bEnableSAO) & (m_row >= 1))
-                {
                     m_prevRow->processPostCu(col - 1);
-                    m_frame->m_reconColCount[m_row - 1].set(col - 1);
-                }
             }
 
             if (m_param->bEnableSAO)
@@ -378,7 +378,6 @@
                     // Must delay 1 row to avoid thread data race conflict
                     m_prevRow->processSaoUnitCu(saoParam, col - 3);
                     m_prevRow->processPostCu(col - 3);
-                    m_frame->m_reconColCount[m_row - 1].set(col - 3);
                 }
             }
 
@@ -398,10 +397,7 @@
 
             // When SAO Disable, setting column counter here
             if ((!m_param->bEnableSAO) & (m_row >= 1))
-            {
                 m_prevRow->processPostCu(numCols - 1);
-                m_frame->m_reconColCount[m_row - 1].set(numCols - 1);
-            }
         }
 
         // TODO: move processPostCu() into processSaoUnitCu()
@@ -456,7 +452,7 @@
 
     if (!m_param->bEnableLoopFilter && !m_param->bEnableSAO)
     {
-        processRowPost(row);
+        processPostRow(row);
         return;
     }
     FrameData& encData = *m_frame->m_encData;
@@ -467,7 +463,7 @@
     // this row of CTUs has been encoded
 
     if (row > 0)
-        processRowPost(row - 1);
+        processPostRow(row - 1);
 
     if (row == m_numRows - 1)
     {
@@ -482,16 +478,105 @@
 
             m_parallelFilter[0].m_sao.rdoSaoUnitRowEnd(saoParam, encData.m_slice->m_sps->numCUsInFrame);
         }
-        processRowPost(row);
+        processPostRow(row);
     }
 }
 
-uint32_t FrameFilter::getCUHeight(int rowNum) const
+// NOTE: This version for case that Disable both Deblock and Sao
+void FrameFilter::processPostCu(uint32_t row, uint32_t col) const
 {
-    return rowNum == m_numRows - 1 ? m_lastHeight : g_maxCUSize;
+    // Update finished CU cursor
+    m_frame->m_reconColCount[row].set(col);
+
+    // shortcut path for non-border area
+    if ((col != 0) & (col != FrameFilter::ParallelFilter::numCols - 1) & (row != 0) & (row != FrameFilter::ParallelFilter::numRows - 1))
+        return;
+
+    PicYuv *reconPic = m_frame->m_reconPic;
+    const uint32_t rowAddr = row * FrameFilter::ParallelFilter::numCols;
+    const uint32_t lineStartCUAddr = rowAddr + col;
+    const int realH = FrameFilter::ParallelFilter::getCUHeight(row);
+    const int realW = FrameFilter::ParallelFilter::getCUWidth(col);
+
+    const uint32_t lumaMarginX = reconPic->m_lumaMarginX;
+    const uint32_t lumaMarginY = reconPic->m_lumaMarginY;
+    const uint32_t chromaMarginX = reconPic->m_chromaMarginX;
+    const uint32_t chromaMarginY = reconPic->m_chromaMarginY;
+    const int hChromaShift = reconPic->m_hChromaShift;
+    const int vChromaShift = reconPic->m_vChromaShift;
+    const intptr_t stride = reconPic->m_stride;
+    const intptr_t strideC = reconPic->m_strideC;
+    pixel *pixY = reconPic->getLumaAddr(lineStartCUAddr);
+    // // MUST BE check I400 since m_picOrg uninitialize in that case
+    pixel *pixU = (m_param->internalCsp != X265_CSP_I400) ? reconPic->getCbAddr(lineStartCUAddr) : NULL;
+    pixel *pixV = (m_param->internalCsp != X265_CSP_I400) ? reconPic->getCrAddr(lineStartCUAddr) : NULL;
+    int copySizeY = realW;
+    int copySizeC = (realW >> hChromaShift);
+
+    if ((col == 0) | (col == FrameFilter::ParallelFilter::numCols - 1))
+    {
+        // TODO: improve by process on Left or Right only
+        primitives.extendRowBorder(reconPic->getLumaAddr(rowAddr), stride, reconPic->m_picWidth, realH, reconPic->m_lumaMarginX);
+
+        if (m_param->internalCsp != X265_CSP_I400)
+        {
+            primitives.extendRowBorder(reconPic->getCbAddr(rowAddr), strideC, reconPic->m_picWidth >> hChromaShift, realH >> vChromaShift, reconPic->m_chromaMarginX);
+            primitives.extendRowBorder(reconPic->getCrAddr(rowAddr), strideC, reconPic->m_picWidth >> hChromaShift, realH >> vChromaShift, reconPic->m_chromaMarginX);
+        }
+    }
+
+    // Extra Left and Right border on first and last CU
+    if ((col == 0) | (col == FrameFilter::ParallelFilter::numCols - 1))
+    {
+        copySizeY += lumaMarginX;
+        copySizeC += chromaMarginX;
+    }
+
+    // First column need extension left padding area and first CU
+    if (col == 0)
+    {
+        pixY -= lumaMarginX;
+        pixU -= chromaMarginX;
+        pixV -= chromaMarginX;
+    }
+
+    // Border extend Top
+    if (row == 0)
+    {
+        for (uint32_t y = 0; y < lumaMarginY; y++)
+            memcpy(pixY - (y + 1) * stride, pixY, copySizeY * sizeof(pixel));
+
+        if (m_param->internalCsp != X265_CSP_I400)
+        {
+            for (uint32_t y = 0; y < chromaMarginY; y++)
+            {
+                memcpy(pixU - (y + 1) * strideC, pixU, copySizeC * sizeof(pixel));
+                memcpy(pixV - (y + 1) * strideC, pixV, copySizeC * sizeof(pixel));
+            }
+        }
+    }
+
+    // Border extend Bottom
+    if (row == FrameFilter::ParallelFilter::numRows - 1)
+    {
+        pixY += (realH - 1) * stride;
+        pixU += ((realH >> vChromaShift) - 1) * strideC;
+        pixV += ((realH >> vChromaShift) - 1) * strideC;
+        for (uint32_t y = 0; y < lumaMarginY; y++)
+            memcpy(pixY + (y + 1) * stride, pixY, copySizeY * sizeof(pixel));
+
+        if (m_param->internalCsp != X265_CSP_I400)
+        {
+            for (uint32_t y = 0; y < chromaMarginY; y++)
+            {
+                memcpy(pixU + (y + 1) * strideC, pixU, copySizeC * sizeof(pixel));
+                memcpy(pixV + (y + 1) * strideC, pixV, copySizeC * sizeof(pixel));
+            }
+        }
+    }
 }
 
-void FrameFilter::processRowPost(int row)
+void FrameFilter::processPostRow(int row)
 {
     PicYuv *reconPic = m_frame->m_reconPic;
     const uint32_t numCols = m_frame->m_encData->m_slice->m_sps->numCuInWidth;
@@ -507,7 +592,7 @@
 
         intptr_t stride = reconPic->m_stride;
         uint32_t width  = reconPic->m_picWidth - m_pad[0];
-        uint32_t height = getCUHeight(row);
+        uint32_t height = FrameFilter::ParallelFilter::getCUHeight(row);
 
         uint64_t ssdY = computeSSD(fencPic->getLumaAddr(cuAddr), reconPic->getLumaAddr(cuAddr), stride, width, height);
         m_frameEncoder->m_SSDY += ssdY;
@@ -547,7 +632,7 @@
     }
     if (m_param->decodedPictureHashSEI == 1)
     {
-        uint32_t height = getCUHeight(row);
+        uint32_t height = FrameFilter::ParallelFilter::getCUHeight(row);
         uint32_t width = reconPic->m_picWidth;
         intptr_t stride = reconPic->m_stride;
 
@@ -573,7 +658,7 @@
     }
     else if (m_param->decodedPictureHashSEI == 2)
     {
-        uint32_t height = getCUHeight(row);
+        uint32_t height = FrameFilter::ParallelFilter::getCUHeight(row);
         uint32_t width = reconPic->m_picWidth;
         intptr_t stride = reconPic->m_stride;
 
@@ -595,7 +680,7 @@
     else if (m_param->decodedPictureHashSEI == 3)
     {
         uint32_t width = reconPic->m_picWidth;
-        uint32_t height = getCUHeight(row);
+        uint32_t height = FrameFilter::ParallelFilter::getCUHeight(row);
         intptr_t stride = reconPic->m_stride;
         uint32_t cuHeight = g_maxCUSize;
 
diff -r bcc6d005cd85 -r d8c3eded1440 source/encoder/framefilter.h
--- a/source/encoder/framefilter.h	Mon Dec 21 13:46:26 2015 -0600
+++ b/source/encoder/framefilter.h	Mon Dec 21 13:46:28 2015 -0600
@@ -102,12 +102,12 @@
         // Post-Process (Border extension)
         void processPostCu(uint32_t col) const;
 
-        uint32_t getCUHeight(int rowNum) const
+        static uint32_t getCUHeight(int rowNum)
         {
             return (rowNum == (int)numRows - 1) ? lastHeight : g_maxCUSize;
         }
 
-        uint32_t getCUWidth(int colNum) const
+        static uint32_t getCUWidth(int colNum)
         {
             return (colNum == (int)numCols - 1) ? lastWidth : g_maxCUSize;
         }
@@ -134,8 +134,8 @@
     void start(Frame *pic, Entropy& initState, int qp);
 
     void processRow(int row);
-    void processRowPost(int row);
-    uint32_t getCUHeight(int rowNum) const;
+    void processPostRow(int row);
+    void processPostCu(uint32_t row, uint32_t col) const;
 };
 }
 



More information about the x265-devel mailing list