[x265] [PATCH 1 of 2] optimize sync logic to improve speed on preset medium and below
Min Chen
chenm003 at 163.com
Thu Dec 31 18:07:39 CET 2015
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1451579383 21600
# Node ID 9e0fe9704998425e8d014fdfdb3c12f24e6c3cd9
# Parent 375ce77b8c35ae332bf51085f6d26044d55ae264
optimize sync logic to improve speed on preset medium and below
---
source/encoder/frameencoder.cpp | 59 +++++---------------------------------
source/encoder/framefilter.cpp | 37 ++++++++++++++++++++++++
source/encoder/framefilter.h | 2 +-
3 files changed, 46 insertions(+), 52 deletions(-)
diff -r 375ce77b8c35 -r 9e0fe9704998 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Mon Dec 28 16:06:55 2015 -0600
+++ b/source/encoder/frameencoder.cpp Thu Dec 31 10:29:43 2015 -0600
@@ -1204,64 +1204,21 @@
rowCoder.finishSlice();
/* Processing left Deblock block with current threading */
- if ((m_param->bEnableLoopFilter | m_param->bEnableSAO) & (row >= 1))
+ if ((m_param->bEnableLoopFilter | m_param->bEnableSAO) & (row >= 2))
{
/* TODO: Multiple Threading */
- /* Check to avoid previous row process slower than current row */
- if (row >= 2)
+
+ /* Check conditional to start previous row process with current threading */
+ if (m_frameFilter.m_parallelFilter[row - 2].m_lastDeblocked.get() == (int)numCols)
{
- int prevCol = m_frameFilter.m_parallelFilter[row - 2].m_lastDeblocked.get();
- while(prevCol != (int)numCols)
- prevCol = m_frameFilter.m_parallelFilter[row - 2].m_lastDeblocked.waitForChange(prevCol);
+ /* stop threading on current row and restart it */
+ m_frameFilter.m_parallelFilter[row - 1].waitForExit();
+ m_frameFilter.m_parallelFilter[row - 1].m_allowedCol.set(numCols);
+ m_frameFilter.m_parallelFilter[row - 1].processTasks(-1);
}
- m_frameFilter.m_parallelFilter[row - 1].waitForExit();
- m_frameFilter.m_parallelFilter[row - 1].m_allowedCol.set(numCols);
- m_frameFilter.m_parallelFilter[row - 1].processTasks(-1);
}
/* trigger row-wise loop filters */
- if (row == m_numRows - 1)
- {
- /* TODO: Early start last row */
- if (m_param->bEnableLoopFilter | m_param->bEnableSAO)
- {
- if (m_frameFilter.m_parallelFilter[row - 1].m_lastDeblocked.get() != (int)numCols)
- x265_log(m_param, X265_LOG_WARNING, "detected ParallelFilter race condition on last row\n");
-
- // avoid race on last row and last column
- if (row >= 1)
- {
- int prevCol = m_frameFilter.m_parallelFilter[row - 1].m_lastDeblocked.get();
- while(prevCol != (int)numCols)
- prevCol = m_frameFilter.m_parallelFilter[row - 1].m_lastDeblocked.waitForChange(prevCol);
- }
-
- /* NOTE: Last Row not execute before, so didn't need wait */
- m_frameFilter.m_parallelFilter[row].waitForExit();
- m_frameFilter.m_parallelFilter[row].m_allowedCol.set(numCols);
- m_frameFilter.m_parallelFilter[row].processTasks(-1);
-
- /* Apply SAO on last row of CUs, because we always apply SAO on row[X-1] */
- if (m_param->bEnableSAO)
- {
- FrameData* encData = m_frameFilter.m_parallelFilter[row].m_encData;
- SAOParam* saoParam = encData->m_saoParam;
- for(uint32_t col = 0; col < numCols; col++)
- {
- // NOTE: must use processSaoUnitCu(), it include TQBypass logic
- m_frameFilter.m_parallelFilter[row].processSaoUnitCu(saoParam, col);
- }
- }
-
- // Process border extension on last row
- for(uint32_t col = 0; col < numCols; col++)
- {
- // m_reconColCount will be set in processPostCu()
- m_frameFilter.m_parallelFilter[row].processPostCu(col);
- }
- }
- }
-
if (m_param->bEnableWavefront)
{
if (row >= m_filterRowDelay)
diff -r 375ce77b8c35 -r 9e0fe9704998 source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp Mon Dec 28 16:06:55 2015 -0600
+++ b/source/encoder/framefilter.cpp Thu Dec 31 10:29:43 2015 -0600
@@ -460,6 +460,43 @@
// SAO: was integrate into encode loop
SAOParam* saoParam = encData.m_saoParam;
+ /* Processing left block Deblock with current threading */
+ {
+ /* stop threading on current row */
+ m_parallelFilter[row].waitForExit();
+
+ /* Check to avoid previous row process slower than current row */
+ if (row >= 1)
+ X265_CHECK(m_parallelFilter[row - 1].m_lastDeblocked.get() == (int)ParallelFilter::numCols, "previous row not finish");
+
+ m_parallelFilter[row].m_allowedCol.set(ParallelFilter::numCols);
+ m_parallelFilter[row].processTasks(-1);
+
+ if (row == m_numRows - 1)
+ {
+ /* TODO: Early start last row */
+ if ((row >= 1) && (m_parallelFilter[row - 1].m_lastDeblocked.get() != (int)ParallelFilter::numCols))
+ x265_log(m_param, X265_LOG_WARNING, "detected ParallelFilter race condition on last row\n");
+
+ /* Apply SAO on last row of CUs, because we always apply SAO on row[X-1] */
+ if (m_param->bEnableSAO)
+ {
+ for(uint32_t col = 0; col < ParallelFilter::numCols; col++)
+ {
+ // NOTE: must use processSaoUnitCu(), it include TQBypass logic
+ m_parallelFilter[row].processSaoUnitCu(saoParam, col);
+ }
+ }
+
+ // Process border extension on last row
+ for(uint32_t col = 0; col < ParallelFilter::numCols; col++)
+ {
+ // m_reconColCount will be set in processPostCu()
+ m_parallelFilter[row].processPostCu(col);
+ }
+ }
+ }
+
// this row of CTUs has been encoded
if (row > 0)
diff -r 375ce77b8c35 -r 9e0fe9704998 source/encoder/framefilter.h
--- a/source/encoder/framefilter.h Mon Dec 28 16:06:55 2015 -0600
+++ b/source/encoder/framefilter.h Thu Dec 31 10:29:43 2015 -0600
@@ -55,7 +55,7 @@
int m_saoRowDelay;
int m_lastHeight;
- void* m_ssimBuf; /* Temp storage for ssim computation */
+ void* m_ssimBuf; /* Temp storage for ssim computation */
#define MAX_PFILTER_CUS (4) /* maximum CUs for every thread */
class ParallelFilter : public BondedTaskGroup, public Deblock
More information about the x265-devel
mailing list