[x265] [PATCH 1 of 2] optimize sync logic to improve speed on preset medium and below

Min Chen chenm003 at 163.com
Thu Dec 31 18:05:27 CET 2015


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1451579383 21600
# Node ID 9e0fe9704998425e8d014fdfdb3c12f24e6c3cd9
# Parent  375ce77b8c35ae332bf51085f6d26044d55ae264
optimize sync logic to improve speed on preset medium and below
---
 source/encoder/frameencoder.cpp |   59 +++++---------------------------------
 source/encoder/framefilter.cpp  |   37 ++++++++++++++++++++++++
 source/encoder/framefilter.h    |    2 +-
 3 files changed, 46 insertions(+), 52 deletions(-)

diff -r 375ce77b8c35 -r 9e0fe9704998 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Mon Dec 28 16:06:55 2015 -0600
+++ b/source/encoder/frameencoder.cpp	Thu Dec 31 10:29:43 2015 -0600
@@ -1204,64 +1204,21 @@
         rowCoder.finishSlice();
 
     /* Processing left Deblock block with current threading */
-    if ((m_param->bEnableLoopFilter | m_param->bEnableSAO) & (row >= 1))
+    if ((m_param->bEnableLoopFilter | m_param->bEnableSAO) & (row >= 2))
     {
         /* TODO: Multiple Threading */
-        /* Check to avoid previous row process slower than current row */
-        if (row >= 2)
+
+        /* Check conditional to start previous row process with current threading */
+        if (m_frameFilter.m_parallelFilter[row - 2].m_lastDeblocked.get() == (int)numCols)
         {
-            int prevCol = m_frameFilter.m_parallelFilter[row - 2].m_lastDeblocked.get();
-            while(prevCol != (int)numCols)
-                prevCol = m_frameFilter.m_parallelFilter[row - 2].m_lastDeblocked.waitForChange(prevCol);
+            /* stop threading on current row and restart it */
+            m_frameFilter.m_parallelFilter[row - 1].waitForExit();
+            m_frameFilter.m_parallelFilter[row - 1].m_allowedCol.set(numCols);
+            m_frameFilter.m_parallelFilter[row - 1].processTasks(-1);
         }
-        m_frameFilter.m_parallelFilter[row - 1].waitForExit();
-        m_frameFilter.m_parallelFilter[row - 1].m_allowedCol.set(numCols);
-        m_frameFilter.m_parallelFilter[row - 1].processTasks(-1);
     }
 
     /* trigger row-wise loop filters */
-    if (row == m_numRows - 1)
-    {
-        /* TODO: Early start last row */
-        if (m_param->bEnableLoopFilter | m_param->bEnableSAO)
-        {
-            if (m_frameFilter.m_parallelFilter[row - 1].m_lastDeblocked.get() != (int)numCols)
-                x265_log(m_param, X265_LOG_WARNING, "detected ParallelFilter race condition on last row\n");
-
-            // avoid race on last row and last column
-            if (row >= 1)
-            {
-                int prevCol = m_frameFilter.m_parallelFilter[row - 1].m_lastDeblocked.get();
-                while(prevCol != (int)numCols)
-                    prevCol = m_frameFilter.m_parallelFilter[row - 1].m_lastDeblocked.waitForChange(prevCol);
-            }
-
-            /* NOTE: Last Row not execute before, so didn't need wait */
-            m_frameFilter.m_parallelFilter[row].waitForExit();
-            m_frameFilter.m_parallelFilter[row].m_allowedCol.set(numCols);
-            m_frameFilter.m_parallelFilter[row].processTasks(-1);
-
-            /* Apply SAO on last row of CUs, because we always apply SAO on row[X-1] */
-            if (m_param->bEnableSAO)
-            {
-                FrameData* encData = m_frameFilter.m_parallelFilter[row].m_encData;
-                SAOParam* saoParam = encData->m_saoParam;
-                for(uint32_t col = 0; col < numCols; col++)
-                {
-                    // NOTE: must use processSaoUnitCu(), it include TQBypass logic
-                    m_frameFilter.m_parallelFilter[row].processSaoUnitCu(saoParam, col);
-                }
-            }
-
-            // Process border extension on last row
-            for(uint32_t col = 0; col < numCols; col++)
-            {
-                // m_reconColCount will be set in processPostCu()
-                m_frameFilter.m_parallelFilter[row].processPostCu(col);
-            }
-        }
-    }
-
     if (m_param->bEnableWavefront)
     {
         if (row >= m_filterRowDelay)
diff -r 375ce77b8c35 -r 9e0fe9704998 source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp	Mon Dec 28 16:06:55 2015 -0600
+++ b/source/encoder/framefilter.cpp	Thu Dec 31 10:29:43 2015 -0600
@@ -460,6 +460,43 @@
     // SAO: was integrate into encode loop
     SAOParam* saoParam = encData.m_saoParam;
 
+    /* Processing left block Deblock with current threading */
+    {
+        /* stop threading on current row */
+        m_parallelFilter[row].waitForExit();
+
+        /* Check to avoid previous row process slower than current row */
+        if (row >= 1)
+            X265_CHECK(m_parallelFilter[row - 1].m_lastDeblocked.get() == (int)ParallelFilter::numCols, "previous row not finish");
+
+        m_parallelFilter[row].m_allowedCol.set(ParallelFilter::numCols);
+        m_parallelFilter[row].processTasks(-1);
+
+        if (row == m_numRows - 1)
+        {
+            /* TODO: Early start last row */
+            if ((row >= 1) && (m_parallelFilter[row - 1].m_lastDeblocked.get() != (int)ParallelFilter::numCols))
+                x265_log(m_param, X265_LOG_WARNING, "detected ParallelFilter race condition on last row\n");
+
+            /* Apply SAO on last row of CUs, because we always apply SAO on row[X-1] */
+            if (m_param->bEnableSAO)
+            {
+                for(uint32_t col = 0; col < ParallelFilter::numCols; col++)
+                {
+                    // NOTE: must use processSaoUnitCu(), it include TQBypass logic
+                    m_parallelFilter[row].processSaoUnitCu(saoParam, col);
+                }
+            }
+
+            // Process border extension on last row
+            for(uint32_t col = 0; col < ParallelFilter::numCols; col++)
+            {
+                // m_reconColCount will be set in processPostCu()
+                m_parallelFilter[row].processPostCu(col);
+            }
+        }
+    }
+
     // this row of CTUs has been encoded
 
     if (row > 0)
diff -r 375ce77b8c35 -r 9e0fe9704998 source/encoder/framefilter.h
--- a/source/encoder/framefilter.h	Mon Dec 28 16:06:55 2015 -0600
+++ b/source/encoder/framefilter.h	Thu Dec 31 10:29:43 2015 -0600
@@ -55,7 +55,7 @@
     int           m_saoRowDelay;
     int           m_lastHeight;
     
-    void*         m_ssimBuf; /* Temp storage for ssim computation */
+    void*         m_ssimBuf;        /* Temp storage for ssim computation */
 
 #define MAX_PFILTER_CUS     (4) /* maximum CUs for every thread */
     class ParallelFilter : public BondedTaskGroup, public Deblock



More information about the x265-devel mailing list