[x265] [PATCH RFC] vbv: enable row resets during vbv when mid-frame qp adjustemets are too high/low
Steve Borho
steve at borho.org
Tue Mar 18 04:19:24 CET 2014
# HG changeset patch
# User Aarthi Thirumalai
# Date 1394482958 -19800
# Tue Mar 11 01:52:38 2014 +0530
# Node ID 079c5454ed3285a30c5be3ae0f998df7b25002a2
# Parent 7b86d42683be4c2727aa8d602b12a424806e1a49
vbv: enable row resets during vbv when mid-frame qp adjustemets are too high/low.
diff -r 7b86d42683be -r 079c5454ed32 source/common/wavefront.cpp
--- a/source/common/wavefront.cpp Sun Mar 16 23:37:56 2014 +0900
+++ b/source/common/wavefront.cpp Tue Mar 11 01:52:38 2014 +0530
@@ -32,6 +32,7 @@
bool WaveFront::init(int numRows)
{
m_numRows = numRows;
+ m_bAllRowsStop = false;
if (m_pool)
{
@@ -105,6 +106,9 @@
bool WaveFront::findJob()
{
+ if (m_bAllRowsStop)
+ return false;
+
unsigned long id;
// thread safe
diff -r 7b86d42683be -r 079c5454ed32 source/common/wavefront.h
--- a/source/common/wavefront.h Sun Mar 16 23:37:56 2014 +0900
+++ b/source/common/wavefront.h Tue Mar 11 01:52:38 2014 +0530
@@ -53,6 +53,9 @@
public:
+ // temporarily pause findJob() distributing work
+ bool m_bAllRowsStop;
+
WaveFront(ThreadPool *pool)
: JobProvider(pool)
, m_internalDependencyBitmap(0)
diff -r 7b86d42683be -r 079c5454ed32 source/encoder/cturow.h
--- a/source/encoder/cturow.h Sun Mar 16 23:37:56 2014 +0900
+++ b/source/encoder/cturow.h Tue Mar 11 01:52:38 2014 +0530
@@ -87,6 +87,9 @@
void processCU(TComDataCU *cu, TComSlice *slice, TEncSbac *bufferSBac, bool bSaveCabac);
/* Threading variables */
+
+ /* This row lock must be acquired when reading or writing m_active,
+ * m_completed, or m_busy */
Lock m_lock;
/* row is ready to run, has no neighbor dependencies. The row may have
diff -r 7b86d42683be -r 079c5454ed32 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Sun Mar 16 23:37:56 2014 +0900
+++ b/source/encoder/frameencoder.cpp Tue Mar 11 01:52:38 2014 +0530
@@ -908,7 +908,7 @@
m_rows[i].m_completed = 0;
m_rows[i].m_busy = false;
}
-
+ m_vbvResetTriggerRow = -1;
int range = m_cfg->param->searchRange; /* fpel search */
range += 1; /* diamond search range check lag */
range += 2; /* subpel refine */
@@ -998,80 +998,147 @@
m_totalTime = 0;
}
+// Called by worker threads
void FrameEncoder::processRowEncoder(int row)
{
PPAScopeEvent(Thread_ProcessRow);
- // Called by worker threads
+ CTURow& codeRow = m_rows[m_cfg->param->bEnableWavefront ? row : 0];
CTURow& curRow = m_rows[row];
+
if (curRow.m_busy)
{
/* On multi-socket Windows servers, we have seen problems with
* ATOMIC_CAS which resulted in multiple worker threads processing
* the same CU row, which often resulted in bad pointer accesses. We
* believe the problem is fixed, but are leaving this check in place
- * to prevent crashes in case it is not. */
+ * to prevent crashes in case it is not */
x265_log(m_cfg->param, X265_LOG_WARNING,
"internal error - simulaneous row access detected. Please report HW to x265-devel at videolan.org");
return;
}
+ if (row > 0)
+ {
+ ScopedLock self(curRow.m_lock);
+ if (m_bAllRowsStop)
+ {
+ curRow.m_active = false;
+ return;
+ }
+ }
curRow.m_busy = true;
int64_t startTime = x265_mdate();
- CTURow& codeRow = m_rows[m_cfg->param->bEnableWavefront ? row : 0];
const uint32_t numCols = m_pic->getPicSym()->getFrameWidthInCU();
const uint32_t lineStartCUAddr = row * numCols;
double qpBase = m_pic->m_avgQpRc;
- bool isVbv = m_cfg->param->rc.vbvBufferSize > 0 && m_cfg->param->rc.vbvMaxBitrate > 0;
- for (uint32_t col = curRow.m_completed; col < numCols; col++)
+ bool bIsVbv = m_cfg->param->rc.vbvBufferSize > 0 && m_cfg->param->rc.vbvMaxBitrate > 0;
+
+ while (curRow.m_completed < numCols)
{
+ uint32_t col = curRow.m_completed;
const uint32_t cuAddr = lineStartCUAddr + col;
TComDataCU* cu = m_pic->getCU(cuAddr);
cu->initCU(m_pic, cuAddr);
codeRow.m_entropyCoder.setEntropyCoder(&m_sbacCoder, m_pic->getSlice());
codeRow.m_entropyCoder.resetEntropy();
- TEncSbac *bufSbac = (m_cfg->param->bEnableWavefront && col == 0 && row > 0) ? &m_rows[row - 1].m_bufferSbacCoder : NULL;
- if ((uint32_t)row >= col && (row != 0) && isVbv)
- qpBase = m_pic->getCU(cuAddr - numCols + 1)->m_baseQp;
+ if (bIsVbv)
+ {
+ if (!row)
+ m_pic->m_rowDiagQp[row] = m_pic->m_avgQpRc;
- if (m_cfg->param->rc.aqMode || isVbv)
+ if ((uint32_t)row >= col && (row != 0) && m_vbvResetTriggerRow != row)
+ cu->m_baseQp = m_pic->getCU(cuAddr - numCols + 1)->m_baseQp;
+ else
+ cu->m_baseQp = m_pic->m_rowDiagQp[row];
+ }
+ else
+ cu->m_baseQp = m_pic->m_avgQpRc;
+
+ if (m_cfg->param->rc.aqMode || bIsVbv)
{
- int qp = calcQpForCu(m_pic, cuAddr, qpBase);
+ int qp = calcQpForCu(m_pic, cuAddr, cu->m_baseQp);
setLambda(qp, row);
qp = X265_MIN(qp, MAX_QP);
cu->setQPSubParts(char(qp), 0, 0);
- cu->m_baseQp = qpBase;
if (m_cfg->param->rc.aqMode)
m_pic->m_qpaAq[row] += qp;
}
+
+ TEncSbac *bufSbac = (m_cfg->param->bEnableWavefront && col == 0 && row > 0) ? &m_rows[row - 1].m_bufferSbacCoder : NULL;
codeRow.processCU(cu, m_pic->getSlice(), bufSbac, m_cfg->param->bEnableWavefront && col == 1);
- if (isVbv)
+ curRow.m_completed++;
+
+ // Completed CU processing
+
+ if (bIsVbv)
{
// Update encoded bits, satdCost, baseQP for each CU
m_pic->m_rowDiagSatd[row] += m_pic->m_cuCostsForVbv[cuAddr];
+ m_pic->m_rowDiagIntraSatd[row] += m_pic->m_intraCuCostsForVbv[cuAddr];
m_pic->m_rowEncodedBits[row] += cu->m_totalBits;
m_pic->m_numEncodedCusPerRow[row] = cuAddr;
m_pic->m_qpaRc[row] += cu->m_baseQp;
- if ((uint32_t)row == col)
- m_pic->m_rowDiagQp[row] = qpBase;
-
- // If current block is at row diagonal checkpoint, call vbv ratecontrol.
+ // If current block is at row diagonal checkpoint, update vbv ratecontrol.
if ((uint32_t)row == col && row != 0)
{
- m_top->m_rateControl->rowDiagonalVbvRateControl(m_pic, row, &m_rce, qpBase);
+ qpBase = cu->m_baseQp;
+ int reEncode = m_top->m_rateControl->rowDiagonalVbvRateControl(m_pic, row, &m_rce, qpBase);
qpBase = Clip3((double)MIN_QP, (double)MAX_MAX_QP, qpBase);
+ m_pic->m_rowDiagQp[row] = qpBase;
+ m_pic->m_rowDiagQScale[row] = x265_qp2qScale(qpBase);
+
+ if (reEncode < 0)
+ {
+ x265_log(m_cfg->param, X265_LOG_INFO, "POC %d row %d - encode restart required for VBV\n", m_pic->getPOC(), row);
+
+ // prevent the WaveFront::findJob() method from providing new jobs
+ m_bAllRowsStop = true;
+ m_vbvResetTriggerRow = row;
+
+ curRow.m_completed = 0;
+
+ for (int r = m_numRows - 1; r >= row ; r--)
+ {
+ CTURow& stopRow = m_rows[r];
+
+ if (r != row)
+ {
+ bool bRowBusy = true;
+ while (bRowBusy)
+ {
+ // Determine idle, hold lock while examining busy flags
+ stopRow.m_lock.acquire();
+ bRowBusy = stopRow.m_active && stopRow.m_busy;
+ stopRow.m_lock.release();
+
+ if (bRowBusy)
+ {
+ GIVE_UP_TIME();
+ }
+ }
+ }
+
+ stopRow.m_completed = 0;
+ m_pic->m_qpaAq[r] = 0;
+ m_pic->m_rowEncodedBits[r] = 0;
+ m_pic->m_qpaRc[r] = 0;
+ m_pic->m_numEncodedCusPerRow[r] = 0;
+ }
+
+ m_bAllRowsStop = false;
+ }
}
}
- // Completed CU processing
- m_rows[row].m_completed++;
- if (m_rows[row].m_completed >= 2 && row < m_numRows - 1)
+
+ if (curRow.m_completed >= 2 && row < m_numRows - 1)
{
ScopedLock below(m_rows[row + 1].m_lock);
- if (m_rows[row + 1].m_active == false &&
- m_rows[row + 1].m_completed + 2 <= m_rows[row].m_completed)
+ if (m_rows[row + 1].m_active == false && !m_bAllRowsStop &&
+ m_rows[row + 1].m_completed + 2 <= curRow.m_completed)
{
m_rows[row + 1].m_active = true;
enqueueRowEncoder(row + 1);
@@ -1079,18 +1146,19 @@
}
ScopedLock self(curRow.m_lock);
- if (row > 0 && m_rows[row].m_completed < numCols - 1 && m_rows[row - 1].m_completed < m_rows[row].m_completed + 2)
+ if (m_bAllRowsStop ||
+ (row > 0 && curRow.m_completed < numCols - 1 && m_rows[row - 1].m_completed < m_rows[row].m_completed + 2))
{
curRow.m_active = false;
curRow.m_busy = false;
- m_totalTime = m_totalTime + (x265_mdate() - startTime);
+ m_totalTime += x265_mdate() - startTime;
return;
}
}
// this row of CTUs has been encoded
- // Run row-wise loop filters
+ // trigger row-wise loop filters
if (row >= m_filterRowDelay)
{
enableRowFilter(row - m_filterRowDelay);
@@ -1106,7 +1174,7 @@
enableRowFilter(i);
}
}
- m_totalTime = m_totalTime + (x265_mdate() - startTime);
+ m_totalTime += x265_mdate() - startTime;
curRow.m_busy = false;
}
diff -r 7b86d42683be -r 079c5454ed32 source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h Sun Mar 16 23:37:56 2014 +0900
+++ b/source/encoder/frameencoder.h Tue Mar 11 01:52:38 2014 +0530
@@ -165,6 +165,7 @@
TComPPS m_pps;
RateControlEntry m_rce;
SEIDecodedPictureHash m_seiReconPictureDigest;
+ int m_vbvResetTriggerRow;
protected:
diff -r 7b86d42683be -r 079c5454ed32 source/encoder/ratecontrol.cpp
--- a/source/encoder/ratecontrol.cpp Sun Mar 16 23:37:56 2014 +0900
+++ b/source/encoder/ratecontrol.cpp Tue Mar 11 01:52:38 2014 +0530
@@ -792,8 +792,8 @@
}
refRowSatdCost >>= X265_DEPTH - 8;
- refQScale = row == maxRows - 1 ? refPic->m_rowDiagQScale[row] : refPic->m_rowDiagQScale[row + 1];
- }
+ refQScale = refPic->m_rowDiagQScale[row];
+ }
if (picType == I_SLICE || qScale >= refQScale)
{
@@ -829,8 +829,6 @@
{
double qScaleVbv = x265_qp2qScale(qpVbv);
- pic->m_rowDiagQp[row] = qpVbv;
- pic->m_rowDiagQScale[row] = qScaleVbv;
uint64_t rowSatdCost = pic->m_rowDiagSatd[row];
double encodedBits = pic->m_rowEncodedBits[row];
if (row == 1)
More information about the x265-devel
mailing list