[x265] [PATCH 2 of 2] framepp: Refactor loopfilter thread

Min Chen chenm003 at 163.com
Mon Aug 19 17:46:47 CEST 2013


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1376927138 -28800
# Node ID bd21077d29ea0c870e7ba6718ba636bb1d22cf9b
# Parent  6a4a9173030c962ae56d02e90485eb7e27071714
framepp: Refactor loopfilter thread

diff -r 6a4a9173030c -r bd21077d29ea source/Lib/TLibCommon/TComPic.cpp
--- a/source/Lib/TLibCommon/TComPic.cpp	Mon Aug 19 23:45:19 2013 +0800
+++ b/source/Lib/TLibCommon/TComPic.cpp	Mon Aug 19 23:45:38 2013 +0800
@@ -85,7 +85,6 @@
 
     int numRows = (height + maxHeight - 1) / maxHeight;
     m_complete_enc = new uint32_t[numRows]; // initial in FrameEncoder::encode()
-    m_complete_lft = new uint32_t[numRows]; // initial in FrameFilter::encode()
 }
 
 Void TComPic::destroy()
@@ -116,11 +115,6 @@
         delete[] m_complete_enc;
     }
 
-    if (m_complete_lft)
-    {
-        delete[] m_complete_lft;
-    }
-
     m_lowres.destroy();
 }
 
diff -r 6a4a9173030c -r bd21077d29ea source/Lib/TLibCommon/TComPic.h
--- a/source/Lib/TLibCommon/TComPic.h	Mon Aug 19 23:45:19 2013 +0800
+++ b/source/Lib/TLibCommon/TComPic.h	Mon Aug 19 23:45:38 2013 +0800
@@ -70,7 +70,6 @@
 public:
 
     volatile uint32_t*    m_complete_enc;       // Array of Col number that was finished stage encode
-    volatile uint32_t*    m_complete_lft;       // Array of Col number that was finished stage loopfilter
 
     x265::Lowres          m_lowres;
 
diff -r 6a4a9173030c -r bd21077d29ea source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Mon Aug 19 23:45:19 2013 +0800
+++ b/source/encoder/frameencoder.cpp	Mon Aug 19 23:45:38 2013 +0800
@@ -944,13 +944,6 @@
         // Completed CU processing
         m_pic->m_complete_enc[row]++;
 
-        // Active Loopfilter
-        if (row > 0)
-        {
-            // NOTE: my version, it need check active flag
-            m_frameFilter.enqueueRow(row - 1);
-        }
-
         if (m_pic->m_complete_enc[row] >= 2 && row < m_numRows - 1)
         {
             ScopedLock below(m_rows[row + 1].m_lock);
@@ -975,6 +968,13 @@
         }
     }
 
+    // Active Loopfilter
+    if (row > 0)
+    {
+        // NOTE: my version, it need check active flag
+        m_frameFilter.enqueueRow(row - 1);
+    }
+
     // this row of CTUs has been encoded
     if (row == m_numRows - 1)
     {
diff -r 6a4a9173030c -r bd21077d29ea source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp	Mon Aug 19 23:45:19 2013 +0800
+++ b/source/encoder/framefilter.cpp	Mon Aug 19 23:45:38 2013 +0800
@@ -36,10 +36,7 @@
     : WaveFront(pool)
     , m_cfg(NULL)
     , m_pic(NULL)
-    , m_complete_lftV(NULL)
-    , m_rows_active(NULL)
-    , m_locks(NULL)
-    , m_loopFilter(NULL)
+    , m_lft_active(false)
     , m_sao(NULL)
 {}
 
@@ -47,33 +44,17 @@
 {
     JobProvider::flush();  // ensure no worker threads are using this frame
 
-    if (m_complete_lftV)
-    {
-        delete[] m_complete_lftV;
-    }
-
-    if (m_rows_active)
-    {
-        delete[] m_rows_active;
-    }
-
-    if (m_locks)
-    {
-        delete[] m_locks;
-    }
-
     if (m_cfg->param.bEnableLoopFilter)
     {
         assert(m_cfg->param.bEnableSAO);
+        m_loopFilter.destroy();
         for (int i = 0; i < m_numRows; ++i)
         {
-            m_loopFilter[i].destroy();
             // NOTE: I don't check sao flag since loopfilter and sao have same control status
             m_sao[i].destroy();
             m_sao[i].destroyEncBuffer();
         }
 
-        delete[] m_loopFilter;
         delete[] m_sao;
     }
 }
@@ -83,17 +64,12 @@
     m_cfg = top;
     m_numRows = numRows;
 
-    m_complete_lftV = new uint32_t[numRows];
-    m_rows_active = new bool[numRows];
-    m_locks = new Lock[numRows];
-
     if (top->param.bEnableLoopFilter)
     {
-        m_loopFilter = new TComLoopFilter[numRows];
         m_sao = new TEncSampleAdaptiveOffset[numRows];
+        m_loopFilter.create(g_maxCUDepth);
         for (int i = 0; i < m_numRows; ++i)
         {
-            m_loopFilter[i].create(g_maxCUDepth);
             m_sao[i].setSaoLcuBoundary(top->param.saoLcuBoundary);
             m_sao[i].setSaoLcuBasedOptimization(top->param.saoLcuBasedOptimization);
             m_sao[i].setMaxNumOffsetsPerPic(top->getMaxNumOffsetsPerPic());
@@ -102,7 +78,6 @@
         }
     }
 
-
     if (!WaveFront::init(m_numRows))
     {
         assert(!"Unable to initialize job queue.");
@@ -114,24 +89,16 @@
 {
     m_pic = pic;
 
+    m_loopFilter.setCfg(pic->getSlice()->getPPS()->getLoopFilterAcrossTilesEnabledFlag());
+    m_lft_active = false;
     for (int i = 0; i < m_numRows; i++)
     {
         if (m_cfg->param.bEnableLoopFilter)
         {
-            // TODO: I think this flag unused since we remove Tiles
-            m_loopFilter[i].setCfg(pic->getSlice()->getPPS()->getLoopFilterAcrossTilesEnabledFlag());
-            m_pic->m_complete_lft[i] = 0;
-            m_rows_active[i] = false;
-            m_complete_lftV[i] = 0;
-
             if (m_cfg->param.saoLcuBasedOptimization && m_cfg->param.saoLcuBoundary)
                 m_sao[i].resetStats();
             m_sao[i].createPicSaoInfo(pic);
         }
-        else
-        {
-            m_pic->m_complete_lft[i] = MAX_INT; // for SAO
-        }
     }
 
     if (m_cfg->param.bEnableLoopFilter && m_pool && m_cfg->param.bEnableWavefront)
@@ -160,11 +127,10 @@
 
 void FrameFilter::enqueueRow(int row)
 {
-    ScopedLock self(m_locks[row]);
+    ScopedLock self(m_lock);
 
-    if (!m_rows_active[row])
+    if (!m_lft_active)
     {
-        m_rows_active[row] = true;
         WaveFront::enqueueRow(row);
     }
 }
@@ -177,63 +143,58 @@
 
     const uint32_t numCols = m_pic->getPicSym()->getFrameWidthInCU();
     const uint32_t lineStartCUAddr = row * numCols;
-    for (UInt col = m_complete_lftV[row]; col < numCols; col++)
+
     {
+        ScopedLock self(m_lock);
+        if (m_lft_active)
+            return;
+        m_lft_active = true;
+    }
+
+    // SAO parameter estimation using non-deblocked pixels for LCU bottom and right boundary areas
+    if (m_cfg->param.saoLcuBasedOptimization && m_cfg->param.saoLcuBoundary)
+    {
+        for (UInt col = 0; col < numCols; col++)
         {
-            // TODO: modify FindJob to avoid invalid status here
-            ScopedLock self(m_locks[row]);
-            if (row < m_numRows - 1 && m_pic->m_complete_enc[row + 1] < col + 1)
-            {
-                m_rows_active[row] = false;
-                return;
-            }
-            if (row == m_numRows - 1 && m_pic->m_complete_enc[row] < col + 1)
-            {
-                m_rows_active[row] = false;
-                return;
-            }
-            if (row > 0 && m_complete_lftV[row - 1] < col + 1)
-            {
-                m_rows_active[row] = false;
-                return;
-            }
+            const uint32_t cuAddr = lineStartCUAddr + col;
+            m_sao[row].calcSaoStatsLCu_BeforeDblk(m_pic, cuAddr);
         }
+    }
+
+    for (UInt col = 0; col < numCols; col++)
+    {
         const uint32_t cuAddr = lineStartCUAddr + col;
         TComDataCU* cu = m_pic->getCU(cuAddr);
 
-        // SAO parameter estimation using non-deblocked pixels for LCU bottom and right boundary areas
-        if (m_cfg->param.saoLcuBasedOptimization && m_cfg->param.saoLcuBoundary)
-        {
-            m_sao[row].calcSaoStatsLCu_BeforeDblk(m_pic, cuAddr);
-        }
-
-        m_loopFilter[row].loopFilterCU(cu, EDGE_VER);
-        m_complete_lftV[row]++;
+        m_loopFilter.loopFilterCU(cu, EDGE_VER);
 
         if (col > 0)
         {
             TComDataCU* cu_prev = m_pic->getCU(cuAddr - 1);
-            m_loopFilter[row].loopFilterCU(cu_prev, EDGE_HOR);
-            m_pic->m_complete_lft[row]++;
-        }
-
-        // Active next row when possible
-        if (m_complete_lftV[row] >= 2 && row < m_numRows - 1)
-        {
-            ScopedLock below(m_locks[row + 1]);
-            if (m_rows_active[row + 1] == false &&
-                (m_complete_lftV[row + 1] + 2 <= m_complete_lftV[row] || m_complete_lftV[row] == numCols))
-            {
-                m_rows_active[row + 1] = true;
-                WaveFront::enqueueRow(row + 1);
-            }
+            m_loopFilter.loopFilterCU(cu_prev, EDGE_HOR);
         }
     }
 
     {
         TComDataCU* cu_prev = m_pic->getCU(lineStartCUAddr + numCols - 1);
-        m_loopFilter[row].loopFilterCU(cu_prev, EDGE_HOR);
-        m_pic->m_complete_lft[row]++;
+        m_loopFilter.loopFilterCU(cu_prev, EDGE_HOR);
+    }
+
+    // Active next row when possible
+    ScopedLock self(m_lock);
+    m_lft_active = false;
+    if (row + 2 < m_numRows)
+    {
+        if (m_pic->m_complete_enc[row + 2] == numCols)
+        {
+            WaveFront::enqueueRow(row + 1);
+        }
+    }
+
+    // Active last row
+    if (row == m_numRows - 2)
+    {
+        WaveFront::enqueueRow(row + 1);
     }
 
     // this row of CTUs has been encoded
diff -r 6a4a9173030c -r bd21077d29ea source/encoder/framefilter.h
--- a/source/encoder/framefilter.h	Mon Aug 19 23:45:19 2013 +0800
+++ b/source/encoder/framefilter.h	Mon Aug 19 23:45:38 2013 +0800
@@ -65,13 +65,12 @@
 
     TEncCfg*            m_cfg;
     TComPic*            m_pic;
-    volatile uint32_t*  m_complete_lftV;
-    volatile bool*      m_rows_active;
-    Lock*               m_locks;
+    volatile bool       m_lft_active;
+    Lock                m_lock;
 
 public:
 
-    TComLoopFilter*             m_loopFilter;
+    TComLoopFilter              m_loopFilter;
     TEncSampleAdaptiveOffset*   m_sao;
     int                         m_numRows;
     Event                       m_completionEvent;



More information about the x265-devel mailing list