[x265-commits] [x265] deblock: removed bonded task group support in filtering
Ashok Kumar Mishra
ashok at multicorewareinc.com
Fri Sep 29 01:03:03 CEST 2017
details: http://hg.videolan.org/x265/rev/1a7edb6fd993
branches:
changeset: 11880:1a7edb6fd993
user: Ashok Kumar Mishra <ashok at multicorewareinc.com>
date: Thu Jun 29 13:13:56 2017 +0530
description:
deblock: removed bonded task group support in filtering
Since filtering is very light weight process compared to encoding the ctu,
there is no need to use bonded task group for filtering. There is little
improvement in performance after removing bonded task group.
Subject: [x265] wavefront: fix for triggering rows for multiple slices when wpp is enabled
details: http://hg.videolan.org/x265/rev/71f700844b0b
branches:
changeset: 11881:71f700844b0b
user: Ashok Kumar Mishra <ashok at multicorewareinc.com>
date: Tue Sep 12 18:13:03 2017 +0530
description:
wavefront: fix for triggering rows for multiple slices when wpp is enabled
It is required to trigger alternative rows in slices for encoding when wpp is enabled.
Subject: [x265] fix multiple insertion of payloadSize into bitstream
details: http://hg.videolan.org/x265/rev/0967d0add97e
branches:
changeset: 11882:0967d0add97e
user: Bhavna Hariharan <bhavna at multicorewareinc.com>
date: Mon Sep 25 18:35:07 2017 +0530
description:
fix multiple insertion of payloadSize into bitstream
bitbucket issue #369
# HG changeset patch
# User Bhavna Hariharan <bhavna at multicorewareinc.com>
# Date 1506344707 -19800
# Mon Sep 25 18:35:07 2017 +0530
# Node ID 0fb6bc88eb81b22ae1ec693f67b715606025fe8d
# Parent f8ae7afc1f61ed0db3b2f23f5d581706fe6ed677
fix multiple insertion of payloadSize into bitstream
bitbucket issue #369
diffstat:
source/common/wavefront.cpp | 6 +++
source/common/wavefront.h | 4 ++
source/encoder/encoder.cpp | 6 +-
source/encoder/frameencoder.cpp | 73 +++++++++++++++++++---------------------
source/encoder/framefilter.cpp | 6 +--
source/encoder/framefilter.h | 6 +--
6 files changed, 50 insertions(+), 51 deletions(-)
diffs (truncated from 319 to 300 lines):
diff -r f8ae7afc1f61 -r 0967d0add97e source/common/wavefront.cpp
--- a/source/common/wavefront.cpp Mon Sep 11 11:12:19 2017 +0530
+++ b/source/common/wavefront.cpp Mon Sep 25 18:35:07 2017 +0530
@@ -43,11 +43,17 @@ bool WaveFront::init(int numRows)
if (m_externalDependencyBitmap)
memset((void*)m_externalDependencyBitmap, 0, sizeof(uint32_t) * m_numWords);
+ m_row_to_idx = X265_MALLOC(uint32_t, m_numRows);
+ m_idx_to_row = X265_MALLOC(uint32_t, m_numRows);
+
return m_internalDependencyBitmap && m_externalDependencyBitmap;
}
WaveFront::~WaveFront()
{
+ x265_free((void*)m_row_to_idx);
+ x265_free((void*)m_idx_to_row);
+
x265_free((void*)m_internalDependencyBitmap);
x265_free((void*)m_externalDependencyBitmap);
}
diff -r f8ae7afc1f61 -r 0967d0add97e source/common/wavefront.h
--- a/source/common/wavefront.h Mon Sep 11 11:12:19 2017 +0530
+++ b/source/common/wavefront.h Mon Sep 25 18:35:07 2017 +0530
@@ -52,6 +52,10 @@ private:
int m_numRows;
+protected:
+ uint32_t *m_row_to_idx;
+ uint32_t *m_idx_to_row;
+
public:
WaveFront()
diff -r f8ae7afc1f61 -r 0967d0add97e source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Mon Sep 11 11:12:19 2017 +0530
+++ b/source/encoder/encoder.cpp Mon Sep 25 18:35:07 2017 +0530
@@ -631,12 +631,12 @@ int Encoder::encode(const x265_picture*
int32_t i = 0;
toneMap.payloadSize = 0;
while (m_cim[currentPOC][i] == 0xFF)
- toneMap.payloadSize += m_cim[currentPOC][i++] + 1;
- toneMap.payloadSize += m_cim[currentPOC][i] + 1;
+ toneMap.payloadSize += m_cim[currentPOC][i++];
+ toneMap.payloadSize += m_cim[currentPOC][i];
toneMap.payload = (uint8_t*)x265_malloc(sizeof(uint8_t) * toneMap.payloadSize);
toneMap.payloadType = USER_DATA_REGISTERED_ITU_T_T35;
- memcpy(toneMap.payload, m_cim[currentPOC], toneMap.payloadSize);
+ memcpy(toneMap.payload, &m_cim[currentPOC][i+1], toneMap.payloadSize);
}
}
#endif
diff -r f8ae7afc1f61 -r 0967d0add97e source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Mon Sep 11 11:12:19 2017 +0530
+++ b/source/encoder/frameencoder.cpp Mon Sep 25 18:35:07 2017 +0530
@@ -701,6 +701,24 @@ void FrameEncoder::compressFrame()
{
m_rows[m_sliceBaseRow[sliceId]].active = true;
}
+ if (m_param->bEnableWavefront)
+ {
+ int i = 0;
+ for (uint32_t rowInSlice = 0; rowInSlice < m_sliceGroupSize; rowInSlice++)
+ {
+ for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
+ {
+ const uint32_t sliceStartRow = m_sliceBaseRow[sliceId];
+ const uint32_t sliceEndRow = m_sliceBaseRow[sliceId + 1] - 1;
+ const uint32_t row = sliceStartRow + rowInSlice;
+ if (row > sliceEndRow)
+ continue;
+ m_row_to_idx[row] = i;
+ m_idx_to_row[i] = row;
+ i += 1;
+ }
+ }
+ }
if (m_param->bEnableWavefront)
{
@@ -735,11 +753,11 @@ void FrameEncoder::compressFrame()
}
}
- enableRowEncoder(row); /* clear external dependency for this row */
+ enableRowEncoder(m_row_to_idx[row]); /* clear external dependency for this row */
if (!rowInSlice)
{
m_row0WaitTime = x265_mdate();
- enqueueRowEncoder(row); /* clear internal dependency, start wavefront */
+ enqueueRowEncoder(m_row_to_idx[row]); /* clear internal dependency, start wavefront */
}
tryWakeOne();
} // end of loop rowInSlice
@@ -1196,8 +1214,8 @@ void FrameEncoder::processRow(int row, i
if (ATOMIC_INC(&m_activeWorkerCount) == 1 && m_stallStartTime)
m_totalNoWorkerTime += x265_mdate() - m_stallStartTime;
- const uint32_t realRow = row >> 1;
- const uint32_t typeNum = row & 1;
+ const uint32_t realRow = m_idx_to_row[row >> 1];
+ const uint32_t typeNum = m_idx_to_row[row & 1];
if (!typeNum)
processRowEncoder(realRow, m_tld[threadId]);
@@ -1207,7 +1225,7 @@ void FrameEncoder::processRow(int row, i
// NOTE: Active next row
if (realRow != m_sliceBaseRow[m_rows[realRow].sliceId + 1] - 1)
- enqueueRowFilter(realRow + 1);
+ enqueueRowFilter(m_row_to_idx[realRow + 1]);
}
if (ATOMIC_DEC(&m_activeWorkerCount) == 0)
@@ -1264,10 +1282,7 @@ void FrameEncoder::processRowEncoder(int
if (bFirstRowInSlice && !curRow.completed)
{
// Load SBAC coder context from previous row and initialize row state.
- //rowCoder.copyState(m_initSliceContext);
- //rowCoder.loadContexts(m_rows[row - 1].bufferedEntropy);
- rowCoder.load(m_initSliceContext);
- //m_rows[row - 1].bufferedEntropy.loadContexts(m_initSliceContext);
+ rowCoder.load(m_initSliceContext);
}
// calculate mean QP for consistent deltaQP signalling calculation
@@ -1328,9 +1343,7 @@ void FrameEncoder::processRowEncoder(int
}
curRow.avgQPComputed = 1;
}
- }
-
- // TODO: specially case handle on first and last row
+ }
// Initialize restrict on MV range in slices
tld.analysis.m_sliceMinY = -(int16_t)(rowInSlice * m_param->maxCUSize * 4) + 3 * 4;
@@ -1428,15 +1441,10 @@ void FrameEncoder::processRowEncoder(int
{
// NOTE: in VBV mode, we may reencode anytime, so we can't do Deblock stage-Horizon and SAO
if (!bIsVbv)
- {
- // TODO: Multiple Threading
- // Delay ONE row to avoid Intra Prediction Conflict
+ {
+ // Delay one row to avoid intra prediction conflict
if (m_pool && !bFirstRowInSlice)
- {
- // Waitting last threading finish
- m_frameFilter.m_parallelFilter[row - 1].waitForExit();
-
- // Processing new group
+ {
int allowCol = col;
// avoid race condition on last column
@@ -1446,15 +1454,11 @@ void FrameEncoder::processRowEncoder(int
: m_frameFilter.m_parallelFilter[row - 2].m_lastCol.get()), (int)col);
}
m_frameFilter.m_parallelFilter[row - 1].m_allowedCol.set(allowCol);
- m_frameFilter.m_parallelFilter[row - 1].tryBondPeers(*this, 1);
}
// Last Row may start early
if (m_pool && bLastRowInSlice)
{
- // Waiting for the last thread to finish
- m_frameFilter.m_parallelFilter[row].waitForExit();
-
// Deblocking last row
int allowCol = col;
@@ -1465,7 +1469,6 @@ void FrameEncoder::processRowEncoder(int
: m_frameFilter.m_parallelFilter[row - 1].m_lastCol.get()), (int)col);
}
m_frameFilter.m_parallelFilter[row].m_allowedCol.set(allowCol);
- m_frameFilter.m_parallelFilter[row].tryBondPeers(*this, 1);
}
} // end of !bIsVbv
}
@@ -1481,7 +1484,7 @@ void FrameEncoder::processRowEncoder(int
FrameStats frameLog;
curEncData.m_rowStat[row].sumQpAq += collectCTUStatistics(*ctu, &frameLog);
- // copy no. of intra, inter Cu cnt per row into frame stats for 2 pass
+ // copy number of intra, inter cu per row into frame stats for 2 pass
if (m_param->rc.bStatWrite)
{
curRow.rowStats.mvBits += best.mvBits;
@@ -1537,7 +1540,6 @@ void FrameEncoder::processRowEncoder(int
}
// If current block is at row end checkpoint, call vbv ratecontrol.
-
if (!m_param->bEnableWavefront && col == numCols - 1)
{
double qpBase = curEncData.m_cuStat[cuAddr].baseQp;
@@ -1566,9 +1568,7 @@ void FrameEncoder::processRowEncoder(int
curEncData.m_rowStat[row].sumQpAq = 0;
}
}
-
// If current block is at row diagonal checkpoint, call vbv ratecontrol.
-
else if (m_param->bEnableWavefront && row == col && row)
{
if (m_param->rc.bEnableConstVbv)
@@ -1667,7 +1667,7 @@ void FrameEncoder::processRowEncoder(int
m_rows[row + 1].completed + 2 <= curRow.completed)
{
m_rows[row + 1].active = true;
- enqueueRowEncoder(row + 1);
+ enqueueRowEncoder(m_row_to_idx[row + 1]);
tryWakeOne(); /* wake up a sleeping thread or set the help wanted flag */
}
}
@@ -1683,7 +1683,7 @@ void FrameEncoder::processRowEncoder(int
}
}
- /** this row of CTUs has been compressed **/
+ /* this row of CTUs has been compressed */
if (m_param->bEnableWavefront && m_param->rc.bEnableConstVbv)
{
if (row == m_numRows - 1)
@@ -1740,13 +1740,10 @@ void FrameEncoder::processRowEncoder(int
/* Processing left Deblock block with current threading */
if ((m_param->bEnableLoopFilter | m_param->bEnableSAO) & (rowInSlice >= 2))
{
- /* TODO: Multiple Threading */
-
/* Check conditional to start previous row process with current threading */
if (m_frameFilter.m_parallelFilter[row - 2].m_lastDeblocked.get() == (int)numCols)
{
/* stop threading on current row and restart it */
- m_frameFilter.m_parallelFilter[row - 1].waitForExit();
m_frameFilter.m_parallelFilter[row - 1].m_allowedCol.set(numCols);
m_frameFilter.m_parallelFilter[row - 1].processTasks(-1);
}
@@ -1757,11 +1754,11 @@ void FrameEncoder::processRowEncoder(int
{
if (rowInSlice >= m_filterRowDelay)
{
- enableRowFilter(row - m_filterRowDelay);
+ enableRowFilter(m_row_to_idx[row - m_filterRowDelay]);
/* NOTE: Activate filter if first row (row 0) */
if (rowInSlice == m_filterRowDelay)
- enqueueRowFilter(row - m_filterRowDelay);
+ enqueueRowFilter(m_row_to_idx[row - m_filterRowDelay]);
tryWakeOne();
}
@@ -1769,7 +1766,7 @@ void FrameEncoder::processRowEncoder(int
{
for (uint32_t i = endRowInSlicePlus1 - m_filterRowDelay; i < endRowInSlicePlus1; i++)
{
- enableRowFilter(i);
+ enableRowFilter(m_row_to_idx[i]);
}
tryWakeOne();
}
@@ -1777,7 +1774,7 @@ void FrameEncoder::processRowEncoder(int
// handle specially case - single row slice
if (bFirstRowInSlice & bLastRowInSlice)
{
- enqueueRowFilter(row);
+ enqueueRowFilter(m_row_to_idx[row]);
tryWakeOne();
}
}
diff -r f8ae7afc1f61 -r 0967d0add97e source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp Mon Sep 11 11:12:19 2017 +0530
+++ b/source/encoder/framefilter.cpp Mon Sep 25 18:35:07 2017 +0530
@@ -582,10 +582,7 @@ void FrameFilter::processRow(int row)
CUData* ctu = encData.getPicCTU(m_parallelFilter[row].m_rowAddr);
/* Processing left block Deblock with current threading */
- {
- /* stop threading on current row */
- m_parallelFilter[row].waitForExit();
-
+ {
/* Check to avoid previous row process slower than current row */
X265_CHECK(ctu->m_bFirstRowInSlice || m_parallelFilter[row - 1].m_lastDeblocked.get() == m_numCols, "previous row not finish");
@@ -618,7 +615,6 @@ void FrameFilter::processRow(int row)
}
// this row of CTUs has been encoded
-
if (!ctu->m_bFirstRowInSlice)
processPostRow(row - 1);
diff -r f8ae7afc1f61 -r 0967d0add97e source/encoder/framefilter.h
--- a/source/encoder/framefilter.h Mon Sep 11 11:12:19 2017 +0530
+++ b/source/encoder/framefilter.h Mon Sep 25 18:35:07 2017 +0530
@@ -62,7 +62,7 @@ public:
More information about the x265-commits
mailing list