[x265] [PATCH] vbv: enable row restarts when mid-frame qp adjustemets are inadequate
Steve Borho
steve at borho.org
Sat Mar 29 22:39:39 CET 2014
# HG changeset patch
# User Aarthi Thirumalai
# Date 1394482958 -19800
# Tue Mar 11 01:52:38 2014 +0530
# Node ID 25c068c2acc997c8fb7bb1e9889fddb5215b3df0
# Parent 7340feac3c9d637c12b58abb366f7287fb9342c1
vbv: enable row restarts when mid-frame qp adjustemets are inadequate
diff -r 7340feac3c9d -r 25c068c2acc9 source/common/wavefront.cpp
--- a/source/common/wavefront.cpp Thu Mar 27 18:19:32 2014 +0900
+++ b/source/common/wavefront.cpp Tue Mar 11 01:52:38 2014 +0530
@@ -103,6 +103,14 @@
return false;
}
+bool WaveFront::dequeueRow(int row)
+{
+ uint64_t oldval, newval;
+ oldval = m_internalDependencyBitmap[row >> 6];
+ newval = oldval & ~(1LL << (row & 63));
+ return ATOMIC_CAS(&m_internalDependencyBitmap[row >> 6], oldval, newval) == oldval;
+}
+
bool WaveFront::findJob()
{
unsigned long id;
diff -r 7340feac3c9d -r 25c068c2acc9 source/common/wavefront.h
--- a/source/common/wavefront.h Thu Mar 27 18:19:32 2014 +0900
+++ b/source/common/wavefront.h Tue Mar 11 01:52:38 2014 +0530
@@ -69,6 +69,10 @@
// This provider must be enqueued in the pool before enqueuing a row
void enqueueRow(int row);
+ // Mark a row as no longer having internal dependencies resolved. Returns
+ // true if bit clear was successful, false otherwise.
+ bool dequeueRow(int row);
+
// Mark the row's external dependencies as being resolved
void enableRow(int row);
diff -r 7340feac3c9d -r 25c068c2acc9 source/encoder/cturow.h
--- a/source/encoder/cturow.h Thu Mar 27 18:19:32 2014 +0900
+++ b/source/encoder/cturow.h Tue Mar 11 01:52:38 2014 +0530
@@ -87,6 +87,8 @@
void processCU(TComDataCU *cu, TComSlice *slice, TEncSbac *bufferSBac, bool bSaveCabac);
/* Threading variables */
+
+ /* This lock must be acquired when reading or writing m_active or m_busy */
Lock m_lock;
/* row is ready to run, has no neighbor dependencies. The row may have
diff -r 7340feac3c9d -r 25c068c2acc9 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Thu Mar 27 18:19:32 2014 +0900
+++ b/source/encoder/frameencoder.cpp Tue Mar 11 01:52:38 2014 +0530
@@ -57,6 +57,8 @@
m_nalCount = 0;
m_totalTime = 0;
+ m_bAllRowsStop = false;
+ m_vbvResetTriggerRow = -1;
memset(&m_rce, 0, sizeof(RateControlEntry));
}
@@ -400,6 +402,8 @@
m_frameFilter.m_sao.lumaLambda = lambda;
m_frameFilter.m_sao.chromaLambda = chromaLambda;
+ m_bAllRowsStop = false;
+ m_vbvResetTriggerRow = -1;
switch (slice->getSliceType())
{
@@ -946,6 +950,7 @@
if (m_pool && m_cfg->param->bEnableWavefront)
{
+ m_rows[0].m_active = true;
WaveFront::clearEnabledRowMask();
WaveFront::enqueue();
@@ -1020,46 +1025,53 @@
m_totalTime = 0;
}
+// Called by worker threads
void FrameEncoder::processRowEncoder(int row)
{
PPAScopeEvent(Thread_ProcessRow);
- // Called by worker threads
+ CTURow& codeRow = m_rows[m_cfg->param->bEnableWavefront ? row : 0];
CTURow& curRow = m_rows[row];
- if (curRow.m_busy)
{
- /* On multi-socket Windows servers, we have seen problems with
- * ATOMIC_CAS which resulted in multiple worker threads processing
- * the same CU row, which often resulted in bad pointer accesses. We
- * believe the problem is fixed, but are leaving this check in place
- * to prevent crashes in case it is not. */
- x265_log(m_cfg->param, X265_LOG_WARNING,
- "internal error - simulaneous row access detected. Please report HW to x265-devel at videolan.org");
- return;
+ ScopedLock self(curRow.m_lock);
+ if (!curRow.m_active)
+ {
+ /* VBV restart is in progress, exit out */
+ return;
+ }
+ if (curRow.m_busy)
+ {
+ /* On multi-socket Windows servers, we have seen problems with
+ * ATOMIC_CAS which resulted in multiple worker threads processing
+ * the same CU row, which often resulted in bad pointer accesses. We
+ * believe the problem is fixed, but are leaving this check in place
+ * to prevent crashes in case it is not */
+ x265_log(m_cfg->param, X265_LOG_WARNING,
+ "internal error - simulaneous row access detected. Please report HW to x265-devel at videolan.org\n");
+ return;
+ }
+ curRow.m_busy = true;
}
- curRow.m_busy = true;
int64_t startTime = x265_mdate();
- CTURow& codeRow = m_rows[m_cfg->param->bEnableWavefront ? row : 0];
const uint32_t numCols = m_pic->getPicSym()->getFrameWidthInCU();
const uint32_t lineStartCUAddr = row * numCols;
- bool isVbv = m_cfg->param->rc.vbvBufferSize > 0 && m_cfg->param->rc.vbvMaxBitrate > 0;
- for (uint32_t col = curRow.m_completed; col < numCols; col++)
+ bool bIsVbv = m_cfg->param->rc.vbvBufferSize > 0 && m_cfg->param->rc.vbvMaxBitrate > 0;
+
+ while (curRow.m_completed < numCols)
{
+ int col = curRow.m_completed;
const uint32_t cuAddr = lineStartCUAddr + col;
TComDataCU* cu = m_pic->getCU(cuAddr);
cu->initCU(m_pic, cuAddr);
cu->setQPSubParts(m_pic->getSlice()->getSliceQp(), 0, 0);
- codeRow.m_entropyCoder.setEntropyCoder(&m_sbacCoder, m_pic->getSlice());
- codeRow.m_entropyCoder.resetEntropy();
- TEncSbac *bufSbac = (m_cfg->param->bEnableWavefront && col == 0 && row > 0) ? &m_rows[row - 1].m_bufferSbacCoder : NULL;
- if (isVbv)
+ if (bIsVbv)
{
if (!row)
m_pic->m_rowDiagQp[row] = m_pic->m_avgQpRc;
- if ((uint32_t)row >= col && (row != 0))
+ if (row >= col && row && m_vbvResetTriggerRow != row)
cu->m_baseQp = m_pic->getCU(cuAddr - numCols + 1)->m_baseQp;
else
cu->m_baseQp = m_pic->m_rowDiagQp[row];
@@ -1067,7 +1079,7 @@
else
cu->m_baseQp = m_pic->m_avgQpRc;
- if (m_cfg->param->rc.aqMode || isVbv)
+ if (m_cfg->param->rc.aqMode || bIsVbv)
{
int qp = calcQpForCu(m_pic, cuAddr, cu->m_baseQp);
setLambda(qp, row);
@@ -1076,32 +1088,92 @@
if (m_cfg->param->rc.aqMode)
m_pic->m_qpaAq[row] += qp;
}
+
+ TEncSbac *bufSbac = (m_cfg->param->bEnableWavefront && col == 0 && row > 0) ? &m_rows[row - 1].m_bufferSbacCoder : NULL;
+ codeRow.m_entropyCoder.setEntropyCoder(&m_sbacCoder, m_pic->getSlice());
+ codeRow.m_entropyCoder.resetEntropy();
codeRow.processCU(cu, m_pic->getSlice(), bufSbac, m_cfg->param->bEnableWavefront && col == 1);
- if (isVbv)
+ // Completed CU processing
+ curRow.m_completed++;
+
+ if (bIsVbv)
{
// Update encoded bits, satdCost, baseQP for each CU
m_pic->m_rowDiagSatd[row] += m_pic->m_cuCostsForVbv[cuAddr];
+ m_pic->m_rowDiagIntraSatd[row] += m_pic->m_intraCuCostsForVbv[cuAddr];
m_pic->m_rowEncodedBits[row] += cu->m_totalBits;
m_pic->m_numEncodedCusPerRow[row] = cuAddr;
m_pic->m_qpaRc[row] += cu->m_baseQp;
// If current block is at row diagonal checkpoint, call vbv ratecontrol.
- if ((uint32_t)row == col && row != 0)
+ if (row == col && row)
{
- m_pic->m_rowDiagQp[row] = cu->m_baseQp;
- m_top->m_rateControl->rowDiagonalVbvRateControl(m_pic, row, &m_rce, m_pic->m_rowDiagQp[row]);
- m_pic->m_rowDiagQScale[row] = Clip3((double)MIN_QP, (double)MAX_QP, m_pic->m_rowDiagQScale[row]);
- m_pic->m_rowDiagQScale[row] = x265_qp2qScale(m_pic->m_rowDiagQp[row]);
+ double qpBase = cu->m_baseQp;
+ int reEncode = m_top->m_rateControl->rowDiagonalVbvRateControl(m_pic, row, &m_rce, qpBase);
+ qpBase = Clip3((double)MIN_QP, (double)MAX_MAX_QP, qpBase);
+ m_pic->m_rowDiagQp[row] = qpBase;
+ m_pic->m_rowDiagQScale[row] = x265_qp2qScale(qpBase);
+
+ if (reEncode < 0)
+ {
+ x265_log(m_cfg->param, X265_LOG_INFO, "POC %d row %d - encode restart required for VBV, to %.2f from %.2f\n",
+ m_pic->getPOC(), row, qpBase, cu->m_baseQp);
+
+ // prevent the WaveFront::findJob() method from providing new jobs
+ m_vbvResetTriggerRow = row;
+ m_bAllRowsStop = true;
+
+ for (int r = m_numRows - 1; r >= row ; r--)
+ {
+ CTURow& stopRow = m_rows[r];
+
+ if (r != row)
+ {
+ /* if row was active (ready to be run) clear active bit and bitmap bit for this row */
+ stopRow.m_lock.acquire();
+ while (stopRow.m_active)
+ {
+ if (dequeueRow(r * 2))
+ stopRow.m_active = false;
+ else
+ GIVE_UP_TIME();
+ }
+ stopRow.m_lock.release();
+
+ bool bRowBusy = true;
+ do
+ {
+ stopRow.m_lock.acquire();
+ bRowBusy = stopRow.m_busy;
+ stopRow.m_lock.release();
+
+ if (bRowBusy)
+ {
+ GIVE_UP_TIME();
+ }
+ }
+ while (bRowBusy);
+ }
+
+ stopRow.m_completed = 0;
+ if (m_pic->m_qpaAq)
+ m_pic->m_qpaAq[r] = 0;
+ m_pic->m_qpaRc[r] = 0;
+ m_pic->m_rowEncodedBits[r] = 0;
+ m_pic->m_numEncodedCusPerRow[r] = 0;
+ }
+
+ m_bAllRowsStop = false;
+ }
}
}
- // Completed CU processing
- m_rows[row].m_completed++;
- if (m_rows[row].m_completed >= 2 && row < m_numRows - 1)
+ if (curRow.m_completed >= 2 && row < m_numRows - 1)
{
ScopedLock below(m_rows[row + 1].m_lock);
if (m_rows[row + 1].m_active == false &&
- m_rows[row + 1].m_completed + 2 <= m_rows[row].m_completed)
+ m_rows[row + 1].m_completed + 2 <= curRow.m_completed &&
+ (!m_bAllRowsStop || row + 1 < m_vbvResetTriggerRow))
{
m_rows[row + 1].m_active = true;
enqueueRowEncoder(row + 1);
@@ -1109,18 +1181,19 @@
}
ScopedLock self(curRow.m_lock);
- if (row > 0 && m_rows[row].m_completed < numCols - 1 && m_rows[row - 1].m_completed < m_rows[row].m_completed + 2)
+ if ((m_bAllRowsStop && row > m_vbvResetTriggerRow) ||
+ (row > 0 && curRow.m_completed < numCols - 1 && m_rows[row - 1].m_completed < m_rows[row].m_completed + 2))
{
curRow.m_active = false;
curRow.m_busy = false;
- m_totalTime = m_totalTime + (x265_mdate() - startTime);
+ m_totalTime += x265_mdate() - startTime;
return;
}
}
// this row of CTUs has been encoded
- // Run row-wise loop filters
+ // trigger row-wise loop filters
if (row >= m_filterRowDelay)
{
enableRowFilter(row - m_filterRowDelay);
@@ -1136,7 +1209,7 @@
enableRowFilter(i);
}
}
- m_totalTime = m_totalTime + (x265_mdate() - startTime);
+ m_totalTime += x265_mdate() - startTime;
curRow.m_busy = false;
}
diff -r 7340feac3c9d -r 25c068c2acc9 source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h Thu Mar 27 18:19:32 2014 +0900
+++ b/source/encoder/frameencoder.h Tue Mar 11 01:52:38 2014 +0530
@@ -166,6 +166,9 @@
RateControlEntry m_rce;
SEIDecodedPictureHash m_seiReconPictureDigest;
+ volatile bool m_bAllRowsStop;
+ volatile int m_vbvResetTriggerRow;
+
protected:
void determineSliceBounds();
More information about the x265-devel
mailing list