[x265] [PATCH 2 of 4] move loopfilter into encode loop and process on CU level
    Min Chen 
    chenm003 at 163.com
       
    Sat Jun 21 01:41:39 CEST 2014
    
    
  
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1403307658 25200
# Node ID f6efadb8d2413e321f2b1964e6589410baeacc20
# Parent  3d5d76ef6559d1d5ffea529f51f1995f8c3facdc
move loopfilter into encode loop and process on CU level
diff -r 3d5d76ef6559 -r f6efadb8d241 source/encoder/cturow.h
--- a/source/encoder/cturow.h	Fri Jun 20 16:40:44 2014 -0700
+++ b/source/encoder/cturow.h	Fri Jun 20 16:40:58 2014 -0700
@@ -46,6 +46,10 @@
     RDCost      m_rdCost;
     TComTrQuant m_trQuant;
 
+    // NOTE: the maximum LCU 64x64 have 256 partitions
+    ALIGN_VAR_32(bool, edgeFilter[256]);
+    ALIGN_VAR_32(uint8_t, blockingStrength[256]);
+
     void init(Encoder&);
     ~ThreadLocalData();
 };
diff -r 3d5d76ef6559 -r f6efadb8d241 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Fri Jun 20 16:40:44 2014 -0700
+++ b/source/encoder/encoder.cpp	Fri Jun 20 16:40:58 2014 -0700
@@ -194,9 +194,10 @@
     if (m_frameEncoder)
     {
         int numRows = (m_param->sourceHeight + g_maxCUSize - 1) / g_maxCUSize;
+        int numCols = (m_param->sourceWidth  + g_maxCUSize - 1) / g_maxCUSize;
         for (int i = 0; i < m_param->frameNumThreads; i++)
         {
-            if (!m_frameEncoder[i].init(this, numRows))
+            if (!m_frameEncoder[i].init(this, numRows, numCols))
             {
                 x265_log(m_param, X265_LOG_ERROR, "Unable to initialize frame encoder, aborting\n");
                 m_aborted = true;
diff -r 3d5d76ef6559 -r f6efadb8d241 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Fri Jun 20 16:40:44 2014 -0700
+++ b/source/encoder/frameencoder.cpp	Fri Jun 20 16:40:58 2014 -0700
@@ -80,15 +80,17 @@
     stop();
 }
 
-bool FrameEncoder::init(Encoder *top, int numRows)
+bool FrameEncoder::init(Encoder *top, int numRows, int numCols)
 {
     bool ok = true;
 
     m_top = top;
     m_param = top->m_param;
     m_numRows = numRows;
+    m_numCols = numCols;
     m_filterRowDelay = (m_param->saoLcuBasedOptimization && m_param->saoLcuBoundary) ?
         2 : (m_param->bEnableSAO || m_param->bEnableLoopFilter ? 1 : 0);
+    m_filterRowDelayCus = m_filterRowDelay * numCols;
 
     m_rows = new CTURow[m_numRows];
     for (int i = 0; i < m_numRows; ++i)
@@ -945,7 +947,8 @@
     tld.m_search.m_me.setSourcePlane(fenc->getLumaAddr(), fenc->getStride());
 
     int64_t startTime = x265_mdate();
-    const uint32_t numCols = m_pic->getPicSym()->getFrameWidthInCU();
+    assert(m_pic->getPicSym()->getFrameWidthInCU() == m_numCols);
+    const uint32_t numCols = m_numCols;
     const uint32_t lineStartCUAddr = row * numCols;
     bool bIsVbv = m_param->rc.vbvBufferSize > 0 && m_param->rc.vbvMaxBitrate > 0;
 
@@ -1074,6 +1077,20 @@
                 // TODO: seems dead code, DEBUG IT!
                 m_frameFilter.m_sao.calcSaoStatsCu_BeforeDblk(m_pic, col, row);
             }
+
+            if (m_param->bEnableLoopFilter && row >= m_filterRowDelay)
+            {
+                TComDataCU* tmpCu;
+
+                tmpCu = m_pic->getCU(cuAddr - m_filterRowDelayCus);
+                m_frameFilter.m_loopFilter.loopFilterCU(tmpCu, EDGE_VER, tld.edgeFilter, tld.blockingStrength);
+
+                if (col > 0)
+                {
+                    tmpCu = m_pic->getCU(cuAddr - m_filterRowDelayCus - 1);
+                    m_frameFilter.m_loopFilter.loopFilterCU(tmpCu, EDGE_HOR, tld.edgeFilter, tld.blockingStrength);
+                }
+            }
         }
 
         // NOTE: active next row
@@ -1101,6 +1118,34 @@
     }
 
     // this row of CTUs has been encoded
+    if (m_param->bEnableLoopFilter)
+    {
+        TComDataCU* tmpCu;
+
+        if (row >= m_filterRowDelay)
+        {
+            tmpCu = m_pic->getCU(lineStartCUAddr - m_filterRowDelayCus + numCols - 1);
+            m_frameFilter.m_loopFilter.loopFilterCU(tmpCu, EDGE_HOR, tld.edgeFilter, tld.blockingStrength);
+        }
+
+        if (row == m_numRows - 1)
+        {
+            TComDataCU* prevTmpCu;
+            tmpCu = m_pic->getCU(lineStartCUAddr + 0);
+            m_frameFilter.m_loopFilter.loopFilterCU(tmpCu, EDGE_VER, tld.edgeFilter, tld.blockingStrength);
+
+            prevTmpCu = tmpCu;
+            for(uint32_t i = 1; i < numCols; i++)
+            {
+                tmpCu = m_pic->getCU(lineStartCUAddr + i);
+                m_frameFilter.m_loopFilter.loopFilterCU(tmpCu,     EDGE_VER, tld.edgeFilter, tld.blockingStrength);
+                m_frameFilter.m_loopFilter.loopFilterCU(prevTmpCu, EDGE_HOR, tld.edgeFilter, tld.blockingStrength);
+                prevTmpCu = tmpCu;
+            }
+
+            m_frameFilter.m_loopFilter.loopFilterCU(prevTmpCu, EDGE_HOR, tld.edgeFilter, tld.blockingStrength);
+        }
+    }
 
     // trigger row-wise loop filters
     if (row >= m_filterRowDelay)
diff -r 3d5d76ef6559 -r f6efadb8d241 source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h	Fri Jun 20 16:40:44 2014 -0700
+++ b/source/encoder/frameencoder.h	Fri Jun 20 16:40:58 2014 -0700
@@ -66,7 +66,7 @@
 
     void setThreadPool(ThreadPool *p);
 
-    bool init(Encoder *top, int numRows);
+    bool init(Encoder *top, int numRows, int numCols);
 
     void destroy();
 
@@ -155,6 +155,7 @@
     bool                     m_threadActive;
 
     int                      m_numRows;
+    uint32_t                 m_numCols;
     CTURow*                  m_rows;
     TComSPS                  m_sps;
     TComPPS                  m_pps;
@@ -196,6 +197,7 @@
     TComPic*                 m_pic;
 
     int                      m_filterRowDelay;
+    int                      m_filterRowDelayCus;
     Event                    m_completionEvent;
     int64_t                  m_totalTime;
     bool                     m_isReferenced;
diff -r 3d5d76ef6559 -r f6efadb8d241 source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp	Fri Jun 20 16:40:44 2014 -0700
+++ b/source/encoder/framefilter.cpp	Fri Jun 20 16:40:58 2014 -0700
@@ -139,35 +139,9 @@
         m_sao.startSaoEnc(m_pic, &m_entropyCoder, &m_rdGoOnSbacCoder);
     }
 
-    const uint32_t numCols = m_pic->getPicSym()->getFrameWidthInCU();
-    const uint32_t lineStartCUAddr = row * numCols;
-
     // NOTE: remove m_sao.calcSaoStatsRowCus_BeforeDblk at here, we do it in encode loop now
 
-    if (m_param->bEnableLoopFilter)
-    {
-        bool edgeFilter[256];    // NOTE: the maximum LCU 64x64 have 256 partitions
-        uint8_t blockingStrength[256];
-
-        for (uint32_t col = 0; col < numCols; col++)
-        {
-            const uint32_t cuAddr = lineStartCUAddr + col;
-            TComDataCU* cu = m_pic->getCU(cuAddr);
-
-            m_loopFilter.loopFilterCU(cu, EDGE_VER, edgeFilter, blockingStrength);
-
-            if (col > 0)
-            {
-                TComDataCU* cu_prev = m_pic->getCU(cuAddr - 1);
-                m_loopFilter.loopFilterCU(cu_prev, EDGE_HOR, edgeFilter, blockingStrength);
-            }
-        }
-
-        {
-            TComDataCU* cu_prev = m_pic->getCU(lineStartCUAddr + numCols - 1);
-            m_loopFilter.loopFilterCU(cu_prev, EDGE_HOR, edgeFilter, blockingStrength);
-        }
-    }
+    // NOTE: move loopfilter into encode loop now
 
     // SAO
     SAOParam* saoParam = m_pic->getPicSym()->getSaoParam();
    
    
More information about the x265-devel
mailing list