[x265] [PATCH 05 of 24] move SAO into class ParallelFilter and modify it to row based

Min Chen chenm003 at 163.com
Tue Dec 8 00:54:42 CET 2015


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1449511560 21600
# Node ID c68eec7fb242748363ec985937b20ed1aff73f02
# Parent  3542d3abd018491d6ad67a79b0e6d05b604d3818
move SAO into class ParallelFilter and modify it to row based
---
 source/common/common.h          |    1 +
 source/encoder/frameencoder.cpp |   36 +++++++-------
 source/encoder/framefilter.cpp  |   95 +++++++++++++++++++++++++-------------
 source/encoder/framefilter.h    |   14 +++---
 source/encoder/sao.cpp          |   81 ++++++++++++++++++++++++---------
 source/encoder/sao.h            |    7 ++-
 6 files changed, 151 insertions(+), 83 deletions(-)

diff -r 3542d3abd018 -r c68eec7fb242 source/common/common.h
--- a/source/common/common.h	Mon Dec 07 12:05:57 2015 -0600
+++ b/source/common/common.h	Mon Dec 07 12:06:00 2015 -0600
@@ -215,6 +215,7 @@
 
 #define X265_MALLOC(type, count)    (type*)x265_malloc(sizeof(type) * (count))
 #define X265_FREE(ptr)              x265_free(ptr)
+#define X265_FREE_ZERO(ptr)         x265_free(ptr); (ptr) = NULL
 #define CHECKED_MALLOC(var, type, count) \
     { \
         var = (type*)x265_malloc(sizeof(type) * (count)); \
diff -r 3542d3abd018 -r c68eec7fb242 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Mon Dec 07 12:05:57 2015 -0600
+++ b/source/encoder/frameencoder.cpp	Mon Dec 07 12:06:00 2015 -0600
@@ -1093,7 +1093,7 @@
 
         /* SAO parameter estimation using non-deblocked pixels for CTU bottom and right boundary areas */
         if (m_param->bEnableSAO && m_param->bSaoNonDeblocked)
-            m_frameFilter.m_sao.calcSaoStatsCu_BeforeDblk(m_frame, col, row);
+            m_frameFilter.m_parallelFilter[row].m_sao.calcSaoStatsCu_BeforeDblk(m_frame, col, row);
 
         /* Deblock with idle threading */
         if (m_param->bEnableLoopFilter)
@@ -1103,24 +1103,24 @@
             if (row > 0)
             {
                 // Waitting last threading finish
-                m_frameFilter.m_pdeblock[row - 1].waitForExit();
+                m_frameFilter.m_parallelFilter[row - 1].waitForExit();
 
                 // Processing new group
-                const int allowCol = ((row >= 2) ? X265_MIN(m_frameFilter.m_pdeblock[row - 2].m_lastCol.get(), (int)col) : col);
-                m_frameFilter.m_pdeblock[row - 1].m_allowedCol.set(allowCol);
-                m_frameFilter.m_pdeblock[row - 1].tryBondPeers(*this, 1);
+                const int allowCol = ((row >= 2) ? X265_MIN(m_frameFilter.m_parallelFilter[row - 2].m_lastCol.get(), (int)col) : col);
+                m_frameFilter.m_parallelFilter[row - 1].m_allowedCol.set(allowCol);
+                m_frameFilter.m_parallelFilter[row - 1].tryBondPeers(*this, 1);
             }
 
             // Last Row may start early
             if (row == m_numRows - 1)
             {
                 // Waitting last threading finish
-                m_frameFilter.m_pdeblock[row].waitForExit();
+                m_frameFilter.m_parallelFilter[row].waitForExit();
 
                 // Processing last row
-                const int allowCol = ((row >= 2) ? X265_MIN(m_frameFilter.m_pdeblock[row - 1].m_lastCol.get(), (int)col) : col);
-                m_frameFilter.m_pdeblock[row].m_allowedCol.set(allowCol);
-                m_frameFilter.m_pdeblock[row].tryBondPeers(*this, 1);
+                const int allowCol = ((row >= 2) ? X265_MIN(m_frameFilter.m_parallelFilter[row - 1].m_lastCol.get(), (int)col) : col);
+                m_frameFilter.m_parallelFilter[row].m_allowedCol.set(allowCol);
+                m_frameFilter.m_parallelFilter[row].tryBondPeers(*this, 1);
             }
         }
 
@@ -1188,17 +1188,17 @@
         if (m_param->bEnableLoopFilter & (row > 0))
         {
             /* TODO: Multiple Threading */
-            m_frameFilter.m_pdeblock[row - 1].waitForExit();
+            m_frameFilter.m_parallelFilter[row - 1].waitForExit();
 
             /* Check to avoid previous row process slower than current row */
             if (row >= 2)
             {
-                int prevCol = m_frameFilter.m_pdeblock[row - 2].m_lastCol.get();
+                int prevCol = m_frameFilter.m_parallelFilter[row - 2].m_lastCol.get();
                 while(prevCol != (int)numCols)
-                    prevCol = m_frameFilter.m_pdeblock[row - 2].m_lastCol.waitForChange(prevCol);
+                    prevCol = m_frameFilter.m_parallelFilter[row - 2].m_lastCol.waitForChange(prevCol);
             }
-            m_frameFilter.m_pdeblock[row - 1].m_allowedCol.set(numCols);
-            m_frameFilter.m_pdeblock[row - 1].processTasks(-1);
+            m_frameFilter.m_parallelFilter[row - 1].m_allowedCol.set(numCols);
+            m_frameFilter.m_parallelFilter[row - 1].processTasks(-1);
         }
 
         /* trigger row-wise loop filters */
@@ -1217,12 +1217,12 @@
             /* TODO: Early start last row */
             if (m_param->bEnableLoopFilter)
             {
-                X265_CHECK(m_frameFilter.m_pdeblock[row - 1].m_allowedCol.get() == (int)numCols, "Deblock m_EncodedCol check failed");
+                X265_CHECK(m_frameFilter.m_parallelFilter[row - 1].m_allowedCol.get() == (int)numCols, "Deblock m_EncodedCol check failed");
 
                 /* NOTE: Last Row not execute before, so didn't need wait */
-                m_frameFilter.m_pdeblock[row].waitForExit();
-                m_frameFilter.m_pdeblock[row].m_allowedCol.set(numCols);
-                m_frameFilter.m_pdeblock[row].processTasks(-1);
+                m_frameFilter.m_parallelFilter[row].waitForExit();
+                m_frameFilter.m_parallelFilter[row].m_allowedCol.set(numCols);
+                m_frameFilter.m_parallelFilter[row].processTasks(-1);
             }
 
             for (uint32_t i = m_numRows - m_filterRowDelay; i < m_numRows; i++)
diff -r 3542d3abd018 -r c68eec7fb242 source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp	Mon Dec 07 12:05:57 2015 -0600
+++ b/source/encoder/framefilter.cpp	Mon Dec 07 12:06:00 2015 -0600
@@ -35,19 +35,22 @@
 static uint64_t computeSSD(pixel *fenc, pixel *rec, intptr_t stride, uint32_t width, uint32_t height);
 static float calculateSSIM(pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2, uint32_t width, uint32_t height, void *buf, uint32_t& cnt);
 
-uint32_t FrameFilter::ParallelDeblock::numCols = 0;
+uint32_t FrameFilter::ParallelFilter::numCols = 0;
 
 void FrameFilter::destroy()
 {
-    if (m_param->bEnableSAO)
-        m_sao.destroy();
-
     X265_FREE(m_ssimBuf);
 
-    if (m_pdeblock)
+    if (m_parallelFilter)
     {
-        delete[] m_pdeblock;
-        m_pdeblock = NULL;
+        if (m_param->bEnableSAO)
+        {
+            for(int row = 0; row < m_numRows; row++)
+                m_parallelFilter[row].m_sao.destroy((row == 0 ? 1 : 0));
+        }
+
+        delete[] m_parallelFilter;
+        m_parallelFilter = NULL;
     }
 }
 
@@ -63,50 +66,65 @@
     m_saoRowDelay = m_param->bEnableLoopFilter ? 1 : 0;
     m_lastHeight = m_param->sourceHeight % g_maxCUSize ? m_param->sourceHeight % g_maxCUSize : g_maxCUSize;
 
-    if (m_param->bEnableSAO)
-        if (!m_sao.create(m_param))
-            m_param->bEnableSAO = 0;
-
     if (m_param->bEnableSsim)
         m_ssimBuf = X265_MALLOC(int, 8 * (m_param->sourceWidth / 4 + 3));
 
     if (m_param->bEnableLoopFilter)
-        m_pdeblock = new ParallelDeblock[numRows];
+        m_parallelFilter = new ParallelFilter[numRows];
 
-    if (m_pdeblock)
+    if (m_parallelFilter)
     {
+        if (m_param->bEnableSAO)
+        {
+            for(int row = 0; row < numRows; row++)
+            {
+                if (!m_parallelFilter[row].m_sao.create(m_param, (row == 0 ? 1 : 0)))
+                    m_param->bEnableSAO = 0;
+                else
+                {
+                    if (row != 0)
+                        m_parallelFilter[row].m_sao.createFromRootNode(&m_parallelFilter[0].m_sao);
+                }
+
+            }
+        }
+
         for(int row = 0; row < numRows; row++)
         {
-            m_pdeblock[row].m_rowAddr = row * numCols;
-            m_pdeblock[row].m_frameEncoder = m_frameEncoder;
+            m_parallelFilter[row].m_rowAddr = row * numCols;
+            m_parallelFilter[row].m_frameEncoder = m_frameEncoder;
         }
     }
 
     // Setting maximum columns
-    ParallelDeblock::numCols = numCols;
+    ParallelFilter::numCols = numCols;
 }
 
 void FrameFilter::start(Frame *frame, Entropy& initState, int qp)
 {
     m_frame = frame;
 
-    if (m_param->bEnableSAO)
-        m_sao.startSlice(frame, initState, qp);
-
-    // Reset Deblock Data Struct
-    if (m_pdeblock)
+    // Reset Filter Data Struct
+    if (m_parallelFilter)
     {
         for(int row = 0; row < m_numRows; row++)
         {
-            m_pdeblock[row].m_lastCol.set(0);
-            m_pdeblock[row].m_allowedCol.set(0);
-            m_pdeblock[row].m_encData = frame->m_encData;
+            if (m_param->bEnableSAO)
+                m_parallelFilter[row].m_sao.startSlice(frame, initState, qp);
+
+            m_parallelFilter[row].m_lastCol.set(0);
+            m_parallelFilter[row].m_allowedCol.set(0);
+            m_parallelFilter[row].m_encData = frame->m_encData;
         }
+
+        // Reset SAO global/common statistics
+        if (m_param->bEnableSAO)
+            m_parallelFilter[0].m_sao.resetStats();
     }
 }
 
 // NOTE: Single Threading only
-void FrameFilter::ParallelDeblock::processTasks(int /*workerThreadId*/)
+void FrameFilter::ParallelFilter::processTasks(int /*workerThreadId*/)
 {
     const CUGeom* cuGeoms = m_frameEncoder->m_cuGeoms;
     const uint32_t* ctuGeomMap = m_frameEncoder->m_ctuGeomMap;
@@ -160,11 +178,11 @@
     SAOParam* saoParam = encData.m_saoParam;
     if (m_param->bEnableSAO)
     {
-        m_sao.m_entropyCoder.load(m_frameEncoder->m_initSliceContext);
-        m_sao.m_rdContexts.next.load(m_frameEncoder->m_initSliceContext);
-        m_sao.m_rdContexts.cur.load(m_frameEncoder->m_initSliceContext);
+        m_parallelFilter[row].m_sao.m_entropyCoder.load(m_frameEncoder->m_initSliceContext);
+        m_parallelFilter[row].m_sao.m_rdContexts.next.load(m_frameEncoder->m_initSliceContext);
+        m_parallelFilter[row].m_sao.m_rdContexts.cur.load(m_frameEncoder->m_initSliceContext);
 
-        m_sao.rdoSaoUnitRow(saoParam, row);
+        m_parallelFilter[row].m_sao.rdoSaoUnitRow(saoParam, row);
 
         // NOTE: Delay a row because SAO decide need top row pixels at next row, is it HM's bug?
         if (row >= m_saoRowDelay)
@@ -180,7 +198,7 @@
     {
         if (m_param->bEnableSAO)
         {
-            m_sao.rdoSaoUnitRowEnd(saoParam, encData.m_slice->m_sps->numCUsInFrame);
+            m_parallelFilter[row].m_sao.rdoSaoUnitRowEnd(saoParam, encData.m_slice->m_sps->numCUsInFrame);
 
             for (int i = m_numRows - m_saoRowDelay; i < m_numRows; i++)
                 processSao(i);
@@ -489,12 +507,23 @@
     SAOParam* saoParam = encData.m_saoParam;
 
     if (saoParam->bSaoFlag[0])
-        m_sao.processSaoUnitRow(saoParam->ctuParam[0], row, 0);
+    {
+        m_parallelFilter[row].m_sao.processSaoUnitRow(saoParam->ctuParam[0], row, 0);
+        if (row != m_numRows - 1)
+        {
+            memcpy(m_parallelFilter[row + 1].m_sao.m_tmpU1[0], m_parallelFilter[row].m_sao.m_tmpU1[0], sizeof(pixel) * m_param->sourceWidth);
+        }
+    }
 
     if (saoParam->bSaoFlag[1])
     {
-        m_sao.processSaoUnitRow(saoParam->ctuParam[1], row, 1);
-        m_sao.processSaoUnitRow(saoParam->ctuParam[2], row, 2);
+        m_parallelFilter[row].m_sao.processSaoUnitRow(saoParam->ctuParam[1], row, 1);
+        m_parallelFilter[row].m_sao.processSaoUnitRow(saoParam->ctuParam[2], row, 2);
+        if (row != m_numRows - 1)
+        {
+            memcpy(m_parallelFilter[row + 1].m_sao.m_tmpU1[1], m_parallelFilter[row].m_sao.m_tmpU1[1], sizeof(pixel) * m_param->sourceWidth);
+            memcpy(m_parallelFilter[row + 1].m_sao.m_tmpU1[2], m_parallelFilter[row].m_sao.m_tmpU1[2], sizeof(pixel) * m_param->sourceWidth);
+        }
     }
 
     if (encData.m_slice->m_pps->bTransquantBypassEnabled)
diff -r 3542d3abd018 -r c68eec7fb242 source/encoder/framefilter.h
--- a/source/encoder/framefilter.h	Mon Dec 07 12:05:57 2015 -0600
+++ b/source/encoder/framefilter.h	Mon Dec 07 12:06:00 2015 -0600
@@ -51,7 +51,6 @@
     int           m_vChromaShift;
     int           m_pad[2];
 
-    SAO           m_sao;
     int           m_numRows;
     int           m_saoRowDelay;
     int           m_lastHeight;
@@ -59,41 +58,42 @@
     void*         m_ssimBuf; /* Temp storage for ssim computation */
 
 #define MAX_PFILTER_CUS     (4) /* maximum CUs for every thread */
-    class ParallelDeblock : public BondedTaskGroup, public Deblock
+    class ParallelFilter : public BondedTaskGroup, public Deblock
     {
     public:
         static uint32_t     numCols;
         uint32_t            m_rowAddr;
         FrameEncoder*       m_frameEncoder;
         FrameData*          m_encData;
+        SAO                 m_sao;
         ThreadSafeInteger   m_lastCol;          /* The column that next to process */
         ThreadSafeInteger   m_allowedCol;       /* The column that processed from Encode pipeline */
 
-        ParallelDeblock()
+        ParallelFilter()
             : m_rowAddr(0)
             , m_frameEncoder(NULL)
             , m_encData(NULL)
         {
         }
 
-        ~ParallelDeblock()
+        ~ParallelFilter()
         { }
 
         void processTasks(int workerThreadId);
 
     protected:
 
-        ParallelDeblock operator=(const ParallelDeblock&);
+        ParallelFilter operator=(const ParallelFilter&);
     };
 
-    ParallelDeblock*    m_pdeblock;
+    ParallelFilter*     m_parallelFilter;
 
     FrameFilter()
         : m_param(NULL)
         , m_frame(NULL)
         , m_frameEncoder(NULL)
         , m_ssimBuf(NULL)
-        , m_pdeblock(NULL)
+        , m_parallelFilter(NULL)
     {
     }
 
diff -r 3542d3abd018 -r c68eec7fb242 source/encoder/sao.cpp
--- a/source/encoder/sao.cpp	Mon Dec 07 12:05:57 2015 -0600
+++ b/source/encoder/sao.cpp	Mon Dec 07 12:06:00 2015 -0600
@@ -103,7 +103,7 @@
     m_depthSaoRate[1][3] = 0;
 }
 
-bool SAO::create(x265_param* param)
+bool SAO::create(x265_param* param, int initCommon)
 {
     m_param = param;
     m_chromaFormat = param->internalCsp;
@@ -131,12 +131,24 @@
         m_tmpU2[i] += 1;
     }
 
-    CHECKED_MALLOC(m_count, PerClass, NUM_PLANE);
-    CHECKED_MALLOC(m_offset, PerClass, NUM_PLANE);
-    CHECKED_MALLOC(m_offsetOrg, PerClass, NUM_PLANE);
+    if (initCommon)
+    {
+        CHECKED_MALLOC(m_count, PerClass, NUM_PLANE);
+        CHECKED_MALLOC(m_offset, PerClass, NUM_PLANE);
+        CHECKED_MALLOC(m_offsetOrg, PerClass, NUM_PLANE);
 
-    CHECKED_MALLOC(m_countPreDblk, PerPlane, numCtu);
-    CHECKED_MALLOC(m_offsetOrgPreDblk, PerPlane, numCtu);
+        CHECKED_MALLOC(m_countPreDblk, PerPlane, numCtu);
+        CHECKED_MALLOC(m_offsetOrgPreDblk, PerPlane, numCtu);
+    }
+    else
+    {
+        // must initialize these common pointer outside of function
+        m_count = NULL;
+        m_offset = NULL;
+        m_offsetOrg = NULL;
+        m_countPreDblk = NULL;
+        m_offsetOrgPreDblk = NULL;
+    }
 
     m_clipTable = &(m_clipTableBase[rangeExt]);
 
@@ -155,24 +167,50 @@
     return false;
 }
 
-void SAO::destroy()
+void SAO::createFromRootNode(SAO* root)
 {
-    X265_FREE(m_clipTableBase);
+    X265_CHECK(m_count == NULL, "duplicate initialize on m_count");
+    X265_CHECK(m_offset == NULL, "duplicate initialize on m_offset");
+    X265_CHECK(m_offsetOrg == NULL, "duplicate initialize on m_offsetOrg");
+    X265_CHECK(m_countPreDblk == NULL, "duplicate initialize on m_countPreDblk");
+    X265_CHECK(m_offsetOrgPreDblk == NULL, "duplicate initialize on m_offsetOrgPreDblk");
 
-    X265_FREE(m_tmpL1);
-    X265_FREE(m_tmpL2);
+    m_count = root->m_count;
+    m_offset = root->m_offset;
+    m_offsetOrg = root->m_offsetOrg;
+    m_countPreDblk = root->m_countPreDblk;
+    m_offsetOrgPreDblk = root->m_offsetOrgPreDblk;
+}
+
+void SAO::destroy(int destoryCommon)
+{
+    X265_FREE_ZERO(m_clipTableBase);
+
+    X265_FREE_ZERO(m_tmpL1);
+    X265_FREE_ZERO(m_tmpL2);
 
     for (int i = 0; i < 3; i++)
     {
-        if (m_tmpU1[i]) X265_FREE(m_tmpU1[i] - 1);
-        if (m_tmpU2[i]) X265_FREE(m_tmpU2[i] - 1);
+        if (m_tmpU1[i])
+        {
+            X265_FREE(m_tmpU1[i] - 1);
+            m_tmpU1[i] = NULL;
+        }
+        if (m_tmpU2[i])
+        {
+            X265_FREE(m_tmpU2[i] - 1);
+            m_tmpU2[i] = NULL;
+        }
     }
 
-    X265_FREE(m_count);
-    X265_FREE(m_offset);
-    X265_FREE(m_offsetOrg);
-    X265_FREE(m_countPreDblk);
-    X265_FREE(m_offsetOrgPreDblk);
+    if (destoryCommon)
+    {
+        X265_FREE(m_count);
+        X265_FREE(m_offset);
+        X265_FREE(m_offsetOrg);
+        X265_FREE(m_countPreDblk);
+        X265_FREE(m_offsetOrgPreDblk);
+    }
 }
 
 /* allocate memory for SAO parameters */
@@ -210,8 +248,6 @@
         break;
     }
 
-    resetStats();
-
     m_entropyCoder.load(initState);
     m_rdContexts.next.load(initState);
     m_rdContexts.cur.load(initState);
@@ -586,15 +622,14 @@
         ctuHeight >>= m_vChromaShift;
     }
 
+    int addr = idxY * m_numCuInWidth;
+    pixel* rec = reconPic->getPlaneAddr(plane, addr);
+
     if (!idxY)
     {
-        pixel* rec = reconPic->m_picOrg[plane];
         memcpy(m_tmpU1[plane], rec, sizeof(pixel) * picWidth);
     }
 
-    int addr = idxY * m_numCuInWidth;
-    pixel* rec = plane ? reconPic->getChromaAddr(plane, addr) : reconPic->getLumaAddr(addr);
-
     for (int i = 0; i < ctuHeight + 1; i++)
     {
         m_tmpL1[i] = rec[0];
diff -r 3542d3abd018 -r c68eec7fb242 source/encoder/sao.h
--- a/source/encoder/sao.h	Mon Dec 07 12:05:57 2015 -0600
+++ b/source/encoder/sao.h	Mon Dec 07 12:06:00 2015 -0600
@@ -120,8 +120,9 @@
 
     SAO();
 
-    bool create(x265_param* param);
-    void destroy();
+    bool create(x265_param* param, int initCommon);
+    void createFromRootNode(SAO *root);
+    void destroy(int destoryCommon);
 
     void allocSaoParam(SAOParam* saoParam) const;
 
@@ -147,6 +148,8 @@
 
     void rdoSaoUnitRowEnd(const SAOParam* saoParam, int numctus);
     void rdoSaoUnitRow(SAOParam* saoParam, int idxY);
+
+    friend class FrameFilter;
 };
 
 }



More information about the x265-devel mailing list