[x265] [PATCH] vbv hanging issue; fix for multiple slices
ashok at multicorewareinc.com
ashok at multicorewareinc.com
Thu Sep 28 12:26:35 CEST 2017
# HG changeset patch
# User Ashok Kumar Mishra <ashok at multicorewareinc.com>
# Date 1506091858 -19800
# Fri Sep 22 20:20:58 2017 +0530
# Node ID 667bbf65185e86c8e1b722d54f2ce2606d58ed4f
# Parent 0967d0add97e8176adbb8e0229fafe72c547bb6e
vbv hanging issue; fix for multiple slices
When multiple slices are enabled, vbv rate control must take care of
correct rows in slices, since multiple slices are encoding simultaneously.
diff -r 0967d0add97e -r 667bbf65185e source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Mon Sep 25 18:35:07 2017 +0530
+++ b/source/encoder/frameencoder.cpp Fri Sep 22 20:20:58 2017 +0530
@@ -88,6 +88,7 @@
delete[] m_outStreams;
delete[] m_backupStreams;
X265_FREE(m_sliceBaseRow);
+ X265_FREE(m_sliceMaxBlockRow);
X265_FREE(m_cuGeoms);
X265_FREE(m_ctuGeomMap);
X265_FREE(m_substreamSizes);
@@ -118,6 +119,40 @@
m_sliceBaseRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1);
ok &= !!m_sliceBaseRow;
+ m_sliceGroupSize = (uint16_t)(m_numRows + m_param->maxSlices - 1) / m_param->maxSlices;
+ uint32_t sliceGroupSizeAccu = (m_numRows << 8) / m_param->maxSlices;
+ uint32_t rowSum = sliceGroupSizeAccu;
+ uint32_t sidx = 0;
+ for (uint32_t i = 0; i < m_numRows; i++)
+ {
+ const uint32_t rowRange = (rowSum >> 8);
+ if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))
+ {
+ rowSum += sliceGroupSizeAccu;
+ m_sliceBaseRow[++sidx] = i;
+ }
+ }
+ X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!");
+ m_sliceBaseRow[0] = 0;
+ m_sliceBaseRow[m_param->maxSlices] = m_numRows;
+
+ m_sliceMaxBlockRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1);
+ ok &= !!m_sliceMaxBlockRow;
+ uint32_t maxBlockRows = (m_param->sourceHeight + (16 - 1)) / 16;
+ sliceGroupSizeAccu = (maxBlockRows << 8) / m_param->maxSlices;
+ rowSum = sliceGroupSizeAccu;
+ sidx = 0;
+ for (uint32_t i = 0; i < maxBlockRows; i++)
+ {
+ const uint32_t rowRange = (rowSum >> 8);
+ if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))
+ {
+ rowSum += sliceGroupSizeAccu;
+ m_sliceMaxBlockRow[++sidx] = i;
+ }
+ }
+ m_sliceMaxBlockRow[0] = 0;
+ m_sliceMaxBlockRow[m_param->maxSlices] = maxBlockRows;
/* determine full motion search range */
int range = m_param->searchRange; /* fpel search */
@@ -341,6 +376,8 @@
m_completionCount = 0;
m_bAllRowsStop = false;
m_vbvResetTriggerRow = -1;
+ m_rowSliceTotalBits[0] = 0;
+ m_rowSliceTotalBits[1] = 0;
m_SSDY = m_SSDU = m_SSDV = 0;
m_ssim = 0;
@@ -550,28 +587,13 @@
/* reset entropy coders and compute slice id */
m_entropyCoder.load(m_initSliceContext);
- const uint32_t sliceGroupSize = (m_numRows + m_param->maxSlices - 1) / m_param->maxSlices;
- const uint32_t sliceGroupSizeAccu = (m_numRows << 8) / m_param->maxSlices;
- m_sliceGroupSize = (uint16_t)sliceGroupSize;
+
+ for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
+ for (uint32_t row = m_sliceBaseRow[sliceId]; row < m_sliceBaseRow[sliceId + 1]; row++)
+ m_rows[row].init(m_initSliceContext, sliceId);
- uint32_t rowSum = sliceGroupSizeAccu;
- uint32_t sidx = 0;
- for (uint32_t i = 0; i < m_numRows; i++)
- {
- const uint32_t rowRange = (rowSum >> 8);
-
- if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))
- {
- rowSum += sliceGroupSizeAccu;
- m_sliceBaseRow[++sidx] = i;
- }
-
- m_rows[i].init(m_initSliceContext, sidx);
- }
- X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!");
-
- m_sliceBaseRow[0] = 0;
- m_sliceBaseRow[m_param->maxSlices] = m_numRows;
+ // reset slice counter for rate control update
+ m_sliceCnt = 0;
uint32_t numSubstreams = m_param->bEnableWavefront ? slice->m_sps->numCuInHeight : m_param->maxSlices;
X265_CHECK(m_param->bEnableWavefront || (m_param->maxSlices == 1), "Multiple slices without WPP unsupport now!");
@@ -586,8 +608,10 @@
m_rows[i].rowGoOnCoder.setBitstream(&m_outStreams[i]);
}
else
+ {
for (uint32_t i = 0; i < numSubstreams; i++)
m_outStreams[i].resetBits();
+ }
int prevBPSEI = m_rce.encodeOrder ? m_top->m_lastBPSEI : 0;
@@ -697,10 +721,9 @@
* compressed in a wave-front pattern if WPP is enabled. Row based loop
* filters runs behind the CTU compression and reconstruction */
- for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
- {
+ for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
m_rows[m_sliceBaseRow[sliceId]].active = true;
- }
+
if (m_param->bEnableWavefront)
{
int i = 0;
@@ -982,9 +1005,8 @@
// complete the slice header by writing WPP row-starts
m_entropyCoder.setBitstream(&m_bs);
if (slice->m_pps->bEntropyCodingSyncEnabled)
- {
m_entropyCoder.codeSliceHeaderWPPEntryPoints(&m_substreamSizes[prevSliceRow], (nextSliceRow - prevSliceRow - 1), maxStreamSize);
- }
+
m_bs.writeByteAlignment();
m_nalList.serialize(slice->m_nalUnitType, m_bs);
@@ -1270,20 +1292,17 @@
const uint32_t lineStartCUAddr = row * numCols;
bool bIsVbv = m_param->rc.vbvBufferSize > 0 && m_param->rc.vbvMaxBitrate > 0;
+ const uint32_t sliceId = curRow.sliceId;
uint32_t maxBlockCols = (m_frame->m_fencPic->m_picWidth + (16 - 1)) / 16;
- uint32_t maxBlockRows = (m_frame->m_fencPic->m_picHeight + (16 - 1)) / 16;
uint32_t noOfBlocks = m_param->maxCUSize / 16;
const uint32_t bFirstRowInSlice = ((row == 0) || (m_rows[row - 1].sliceId != curRow.sliceId)) ? 1 : 0;
const uint32_t bLastRowInSlice = ((row == m_numRows - 1) || (m_rows[row + 1].sliceId != curRow.sliceId)) ? 1 : 0;
- const uint32_t sliceId = curRow.sliceId;
const uint32_t endRowInSlicePlus1 = m_sliceBaseRow[sliceId + 1];
const uint32_t rowInSlice = row - m_sliceBaseRow[sliceId];
- if (bFirstRowInSlice && !curRow.completed)
- {
- // Load SBAC coder context from previous row and initialize row state.
- rowCoder.load(m_initSliceContext);
- }
+ // Load SBAC coder context from previous row and initialize row state.
+ if (bFirstRowInSlice && !curRow.completed)
+ rowCoder.load(m_initSliceContext);
// calculate mean QP for consistent deltaQP signalling calculation
if (m_param->bOptCUDeltaQP)
@@ -1294,15 +1313,12 @@
if (m_param->bEnableWavefront || !row)
{
double meanQPOff = 0;
- uint32_t loopIncr, count = 0;
bool isReferenced = IS_REFERENCED(m_frame);
double *qpoffs = (isReferenced && m_param->rc.cuTree) ? m_frame->m_lowres.qpCuTreeOffset : m_frame->m_lowres.qpAqOffset;
if (qpoffs)
{
- if (m_param->rc.qgSize == 8)
- loopIncr = 8;
- else
- loopIncr = 16;
+ uint32_t loopIncr = (m_param->rc.qgSize == 8) ? 8 : 16;
+
uint32_t cuYStart = 0, height = m_frame->m_fencPic->m_picHeight;
if (m_param->bEnableWavefront)
{
@@ -1312,6 +1328,7 @@
uint32_t qgSize = m_param->rc.qgSize, width = m_frame->m_fencPic->m_picWidth;
uint32_t maxOffsetCols = (m_frame->m_fencPic->m_picWidth + (loopIncr - 1)) / loopIncr;
+ uint32_t count = 0;
for (uint32_t cuY = cuYStart; cuY < height && (cuY < m_frame->m_fencPic->m_picHeight); cuY += qgSize)
{
for (uint32_t cuX = 0; cuX < width; cuX += qgSize)
@@ -1372,16 +1389,16 @@
curRow.bufferedEntropy.copyState(rowCoder);
curRow.bufferedEntropy.loadContexts(rowCoder);
}
- if (!row && m_vbvResetTriggerRow != intRow)
+ if (bFirstRowInSlice && m_vbvResetTriggerRow != intRow)
{
curEncData.m_rowStat[row].rowQp = curEncData.m_avgQpRc;
curEncData.m_rowStat[row].rowQpScale = x265_qp2qScale(curEncData.m_avgQpRc);
}
FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr];
- if (m_param->bEnableWavefront && row >= col && row && m_vbvResetTriggerRow != intRow)
+ if (m_param->bEnableWavefront && rowInSlice >= col && !bFirstRowInSlice && m_vbvResetTriggerRow != intRow)
cuStat.baseQp = curEncData.m_cuStat[cuAddr - numCols + 1].baseQp;
- else if (!m_param->bEnableWavefront && row && m_vbvResetTriggerRow != intRow)
+ else if (!m_param->bEnableWavefront && !bFirstRowInSlice && m_vbvResetTriggerRow != intRow)
cuStat.baseQp = curEncData.m_rowStat[row - 1].rowQp;
else
cuStat.baseQp = curEncData.m_rowStat[row].rowQp;
@@ -1393,7 +1410,8 @@
{
cuStat.vbvCost = 0;
cuStat.intraVbvCost = 0;
- for (uint32_t h = 0; h < noOfBlocks && block_y < maxBlockRows; h++, block_y++)
+
+ for (uint32_t h = 0; h < noOfBlocks && block_y < m_sliceMaxBlockRow[sliceId + 1]; h++, block_y++)
{
uint32_t idx = block_x + (block_y * maxBlockCols);
@@ -1497,10 +1515,8 @@
int shift = 2 * (m_param->maxCUDepth - depth);
int cuSize = m_param->maxCUSize >> depth;
- if (cuSize == 8)
- curRow.rowStats.intra8x8Cnt += (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN);
- else
- curRow.rowStats.intra8x8Cnt += (int)(frameLog.cntIntra[depth] << shift);
+ curRow.rowStats.intra8x8Cnt += (cuSize == 8) ? (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN) :
+ (int)(frameLog.cntIntra[depth] << shift);
curRow.rowStats.inter8x8Cnt += (int)(frameLog.cntInter[depth] << shift);
curRow.rowStats.skip8x8Cnt += (int)((frameLog.cntSkipCu[depth] + frameLog.cntMergeCu[depth]) << shift);
@@ -1530,12 +1546,13 @@
if (bIsVbv)
{
// Update encoded bits, satdCost, baseQP for each CU if tune grain is disabled
- if ((m_param->bEnableWavefront && (!cuAddr || !m_param->rc.bEnableConstVbv)) || !m_param->bEnableWavefront)
+ FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr];
+ if ((m_param->bEnableWavefront && ((cuAddr == m_sliceBaseRow[sliceId] * numCols) || !m_param->rc.bEnableConstVbv)) || !m_param->bEnableWavefront)
{
- curEncData.m_rowStat[row].rowSatd += curEncData.m_cuStat[cuAddr].vbvCost;
- curEncData.m_rowStat[row].rowIntraSatd += curEncData.m_cuStat[cuAddr].intraVbvCost;
- curEncData.m_rowStat[row].encodedBits += curEncData.m_cuStat[cuAddr].totalBits;
- curEncData.m_rowStat[row].sumQpRc += curEncData.m_cuStat[cuAddr].baseQp;
+ curEncData.m_rowStat[row].rowSatd += cuStat.vbvCost;
+ curEncData.m_rowStat[row].rowIntraSatd += cuStat.intraVbvCost;
+ curEncData.m_rowStat[row].encodedBits += cuStat.totalBits;
+ curEncData.m_rowStat[row].sumQpRc += cuStat.baseQp;
curEncData.m_rowStat[row].numEncodedCUs = cuAddr;
}
@@ -1543,7 +1560,7 @@
if (!m_param->bEnableWavefront && col == numCols - 1)
{
double qpBase = curEncData.m_cuStat[cuAddr].baseQp;
- int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame, row, &m_rce, qpBase);
+ int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame, row, &m_rce, qpBase, m_sliceBaseRow, sliceId);
qpBase = x265_clip3((double)m_param->rc.qpMin, (double)m_param->rc.qpMax, qpBase);
curEncData.m_rowStat[row].rowQp = qpBase;
curEncData.m_rowStat[row].rowQpScale = x265_qp2qScale(qpBase);
@@ -1569,15 +1586,16 @@
}
}
// If current block is at row diagonal checkpoint, call vbv ratecontrol.
- else if (m_param->bEnableWavefront && row == col && row)
+ else if (m_param->bEnableWavefront && rowInSlice == col && !bFirstRowInSlice)
{
if (m_param->rc.bEnableConstVbv)
{
- int32_t startCuAddr = numCols * row;
- int32_t EndCuAddr = startCuAddr + col;
- for (int32_t r = row; r >= 0; r--)
+ uint32_t startCuAddr = numCols * row;
+ uint32_t EndCuAddr = startCuAddr + col;
+
+ for (int32_t r = row; r >= (int32_t)m_sliceBaseRow[sliceId]; r--)
{
- for (int32_t c = startCuAddr; c <= EndCuAddr && c <= (int32_t)numCols * (r + 1) - 1; c++)
+ for (uint32_t c = startCuAddr; c <= EndCuAddr && c <= numCols * (r + 1) - 1; c++)
{
curEncData.m_rowStat[r].rowSatd += curEncData.m_cuStat[c].vbvCost;
curEncData.m_rowStat[r].rowIntraSatd += curEncData.m_cuStat[c].intraVbvCost;
@@ -1590,10 +1608,10 @@
}
}
double qpBase = curEncData.m_cuStat[cuAddr].baseQp;
- int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame, row, &m_rce, qpBase);
+ int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame, row, &m_rce, qpBase, m_sliceBaseRow, sliceId);
qpBase = x265_clip3((double)m_param->rc.qpMin, (double)m_param->rc.qpMax, qpBase);
curEncData.m_rowStat[row].rowQp = qpBase;
- curEncData.m_rowStat[row].rowQpScale = x265_qp2qScale(qpBase);
+ curEncData.m_rowStat[row].rowQpScale = x265_qp2qScale(qpBase);
if (reEncode < 0)
{
@@ -1604,7 +1622,7 @@
m_vbvResetTriggerRow = row;
m_bAllRowsStop = true;
- for (uint32_t r = m_numRows - 1; r >= row; r--)
+ for (uint32_t r = m_sliceBaseRow[sliceId + 1] - 1; r >= row; r--)
{
CTURow& stopRow = m_rows[r];
@@ -1686,11 +1704,11 @@
/* this row of CTUs has been compressed */
if (m_param->bEnableWavefront && m_param->rc.bEnableConstVbv)
{
- if (row == m_numRows - 1)
+ if (bLastRowInSlice)
{
- for (int32_t r = 0; r < (int32_t)m_numRows; r++)
+ for (uint32_t r = m_sliceBaseRow[sliceId]; r < m_sliceBaseRow[sliceId + 1]; r++)
{
- for (int32_t c = curEncData.m_rowStat[r].numEncodedCUs + 1; c < (int32_t)numCols * (r + 1); c++)
+ for (uint32_t c = curEncData.m_rowStat[r].numEncodedCUs + 1; c < numCols * (r + 1); c++)
{
curEncData.m_rowStat[r].rowSatd += curEncData.m_cuStat[c].vbvCost;
curEncData.m_rowStat[r].rowIntraSatd += curEncData.m_cuStat[c].intraVbvCost;
@@ -1708,26 +1726,41 @@
* after half the frame is encoded, but after this initial period we update
* after refLagRows (the number of rows reference frames must have completed
* before referencees may begin encoding) */
- uint32_t rowCount = 0;
if (m_param->rc.rateControlMode == X265_RC_ABR || bIsVbv)
{
+ uint32_t rowCount = 0;
+ uint32_t maxRows = m_sliceBaseRow[sliceId + 1] - m_sliceBaseRow[sliceId];
if (!m_rce.encodeOrder)
- rowCount = m_numRows - 1;
+ rowCount = maxRows - 1;
else if ((uint32_t)m_rce.encodeOrder <= 2 * (m_param->fpsNum / m_param->fpsDenom))
- rowCount = X265_MIN((m_numRows + 1) / 2, m_numRows - 1);
+ rowCount = X265_MIN((maxRows + 1) / 2, maxRows - 1);
else
- rowCount = X265_MIN(m_refLagRows, m_numRows - 1);
- if (row == rowCount)
+ rowCount = X265_MIN(m_refLagRows / m_param->maxSlices, maxRows - 1);
+
+ if (rowInSlice == rowCount)
{
- m_rce.rowTotalBits = 0;
+ m_rowSliceTotalBits[sliceId] = 0;
if (bIsVbv)
- for (uint32_t i = 0; i < rowCount; i++)
- m_rce.rowTotalBits += curEncData.m_rowStat[i].encodedBits;
+ {
+ for (uint32_t i = m_sliceBaseRow[sliceId]; i < rowCount + m_sliceBaseRow[sliceId]; i++)
+ m_rowSliceTotalBits[sliceId] += curEncData.m_rowStat[i].encodedBits;
+ }
else
- for (uint32_t cuAddr = 0; cuAddr < rowCount * numCols; cuAddr++)
- m_rce.rowTotalBits += curEncData.m_cuStat[cuAddr].totalBits;
+ {
+ uint32_t startAddr = rowCount * numCols * sliceId;
+ uint32_t finishAddr = startAddr + rowCount * numCols;
+
+ for (uint32_t cuAddr = startAddr; cuAddr < finishAddr; cuAddr++)
+ m_rowSliceTotalBits[sliceId] += curEncData.m_cuStat[cuAddr].totalBits;
+ }
- m_top->m_rateControl->rateControlUpdateStats(&m_rce);
+ if (ATOMIC_INC(&m_sliceCnt) == (int)m_param->maxSlices)
+ {
+ m_rce.rowTotalBits = 0;
+ for (uint32_t i = 0; i < m_param->maxSlices; i++)
+ m_rce.rowTotalBits += m_rowSliceTotalBits[i];
+ m_top->m_rateControl->rateControlUpdateStats(&m_rce);
+ }
}
}
diff -r 0967d0add97e -r 667bbf65185e source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h Mon Sep 25 18:35:07 2017 +0530
+++ b/source/encoder/frameencoder.h Fri Sep 22 20:20:58 2017 +0530
@@ -138,6 +138,7 @@
volatile bool m_bAllRowsStop;
volatile int m_completionCount;
volatile int m_vbvResetTriggerRow;
+ volatile int m_sliceCnt;
uint32_t m_numRows;
uint32_t m_numCols;
@@ -147,8 +148,10 @@
CTURow* m_rows;
uint16_t m_sliceAddrBits;
- uint16_t m_sliceGroupSize;
- uint32_t* m_sliceBaseRow;
+ uint32_t m_sliceGroupSize;
+ uint32_t* m_sliceBaseRow;
+ uint32_t* m_sliceMaxBlockRow;
+ int64_t m_rowSliceTotalBits[2];
RateControlEntry m_rce;
SEIDecodedPictureHash m_seiReconPictureDigest;
diff -r 0967d0add97e -r 667bbf65185e source/encoder/ratecontrol.cpp
--- a/source/encoder/ratecontrol.cpp Mon Sep 25 18:35:07 2017 +0530
+++ b/source/encoder/ratecontrol.cpp Fri Sep 22 20:20:58 2017 +0530
@@ -732,7 +732,6 @@
m_bitrate = m_param->rc.bitrate * 1000;
}
-
void RateControl::initHRD(SPS& sps)
{
int vbvBufferSize = m_param->rc.vbvBufferSize * 1000;
@@ -765,6 +764,7 @@
#undef MAX_DURATION
}
+
bool RateControl::analyseABR2Pass(uint64_t allAvailableBits)
{
double rateFactor, stepMult;
@@ -1473,6 +1473,7 @@
return q;
}
+
double RateControl::countExpectedBits(int startPos, int endPos)
{
double expectedBits = 0;
@@ -1484,6 +1485,7 @@
}
return expectedBits;
}
+
bool RateControl::findUnderflow(double *fills, int *t0, int *t1, int over, int endPos)
{
/* find an interval ending on an overflow or underflow (depending on whether
@@ -1531,6 +1533,7 @@
}
return adjusted;
}
+
bool RateControl::cuTreeReadFor2Pass(Frame* frame)
{
int index = m_encOrder[frame->m_poc];
@@ -1579,24 +1582,24 @@
double RateControl::tuneAbrQScaleFromFeedback(double qScale)
{
double abrBuffer = 2 * m_rateTolerance * m_bitrate;
- /* use framesDone instead of POC as poc count is not serial with bframes enabled */
- double overflow = 1.0;
- double timeDone = (double)(m_framesDone - m_param->frameNumThreads + 1) * m_frameDuration;
- double wantedBits = timeDone * m_bitrate;
- int64_t encodedBits = m_totalBits;
- if (m_param->totalFrames && m_param->totalFrames <= 2 * m_fps)
- {
- abrBuffer = m_param->totalFrames * (m_bitrate / m_fps);
- encodedBits = m_encodedBits;
- }
+ /* use framesDone instead of POC as poc count is not serial with bframes enabled */
+ double overflow = 1.0;
+ double timeDone = (double)(m_framesDone - m_param->frameNumThreads + 1) * m_frameDuration;
+ double wantedBits = timeDone * m_bitrate;
+ int64_t encodedBits = m_totalBits;
+ if (m_param->totalFrames && m_param->totalFrames <= 2 * m_fps)
+ {
+ abrBuffer = m_param->totalFrames * (m_bitrate / m_fps);
+ encodedBits = m_encodedBits;
+ }
- if (wantedBits > 0 && encodedBits > 0 && (!m_partialResidualFrames ||
- m_param->rc.bStrictCbr || m_isGrainEnabled))
- {
- abrBuffer *= X265_MAX(1, sqrt(timeDone));
- overflow = x265_clip3(.5, 2.0, 1.0 + (encodedBits - wantedBits) / abrBuffer);
- qScale *= overflow;
- }
+ if (wantedBits > 0 && encodedBits > 0 && (!m_partialResidualFrames ||
+ m_param->rc.bStrictCbr || m_isGrainEnabled))
+ {
+ abrBuffer *= X265_MAX(1, sqrt(timeDone));
+ overflow = x265_clip3(.5, 2.0, 1.0 + (encodedBits - wantedBits) / abrBuffer);
+ qScale *= overflow;
+ }
return qScale;
}
@@ -2330,17 +2333,18 @@
return totalSatdBits + encodedBitsSoFar;
}
-int RateControl::rowVbvRateControl(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv)
+int RateControl::rowVbvRateControl(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv, uint32_t* m_sliceBaseRow, uint32_t sliceId)
{
FrameData& curEncData = *curFrame->m_encData;
double qScaleVbv = x265_qp2qScale(qpVbv);
uint64_t rowSatdCost = curEncData.m_rowStat[row].rowSatd;
double encodedBits = curEncData.m_rowStat[row].encodedBits;
+ uint32_t rowInSlice = row - m_sliceBaseRow[sliceId];
- if (m_param->bEnableWavefront && row == 1)
+ if (m_param->bEnableWavefront && rowInSlice == 1)
{
- rowSatdCost += curEncData.m_rowStat[0].rowSatd;
- encodedBits += curEncData.m_rowStat[0].encodedBits;
+ rowSatdCost += curEncData.m_rowStat[row - 1].rowSatd;
+ encodedBits += curEncData.m_rowStat[row - 1].encodedBits;
}
rowSatdCost >>= X265_DEPTH - 8;
updatePredictor(rce->rowPred[0], qScaleVbv, (double)rowSatdCost, encodedBits);
@@ -2350,8 +2354,8 @@
if (qpVbv < refFrame->m_encData->m_rowStat[row].rowQp)
{
uint64_t intraRowSatdCost = curEncData.m_rowStat[row].rowIntraSatd;
- if (m_param->bEnableWavefront && row == 1)
- intraRowSatdCost += curEncData.m_rowStat[0].rowIntraSatd;
+ if (m_param->bEnableWavefront && rowInSlice == 1)
+ intraRowSatdCost += curEncData.m_rowStat[row - 1].rowIntraSatd;
intraRowSatdCost >>= X265_DEPTH - 8;
updatePredictor(rce->rowPred[1], qScaleVbv, (double)intraRowSatdCost, encodedBits);
}
@@ -2376,7 +2380,7 @@
const SPS& sps = *curEncData.m_slice->m_sps;
double maxFrameError = X265_MAX(0.05, 1.0 / sps.numCuInHeight);
- if (row < sps.numCuInHeight - 1)
+ if (row < m_sliceBaseRow[sliceId + 1] - 1)
{
/* More threads means we have to be more cautious in letting ratecontrol use up extra bits. */
double rcTol = bufferLeftPlanned / m_param->frameNumThreads * m_rateTolerance;
@@ -2693,8 +2697,8 @@
m_encodedBitsWindow[pos % s_slidingWindowFrames] = actualBits;
if(rce->sliceType != I_SLICE)
{
- int qp = int (rce->qpaRc + 0.5);
- m_qpToEncodedBits[qp] = m_qpToEncodedBits[qp] == 0 ? actualBits : (m_qpToEncodedBits[qp] + actualBits) * 0.5;
+ int qp = int (rce->qpaRc + 0.5);
+ m_qpToEncodedBits[qp] = m_qpToEncodedBits[qp] == 0 ? actualBits : (m_qpToEncodedBits[qp] + actualBits) * 0.5;
}
curFrame->m_rcData->wantedBitsWindow = m_wantedBitsWindow;
curFrame->m_rcData->cplxrSum = m_cplxrSum;
@@ -2779,7 +2783,8 @@
curFrame->m_encData->m_frameStats.percent8x8Skip * m_ncu) < 0)
goto writeFailure;
}
- else{
+ else
+ {
RPS* rpsWriter = &curFrame->m_encData->m_slice->m_rps;
int i, num = rpsWriter->numberOfPictures;
char deltaPOC[128];
diff -r 0967d0add97e -r 667bbf65185e source/encoder/ratecontrol.h
--- a/source/encoder/ratecontrol.h Mon Sep 25 18:35:07 2017 +0530
+++ b/source/encoder/ratecontrol.h Fri Sep 22 20:20:58 2017 +0530
@@ -244,7 +244,7 @@
int rateControlStart(Frame* curFrame, RateControlEntry* rce, Encoder* enc);
void rateControlUpdateStats(RateControlEntry* rce);
int rateControlEnd(Frame* curFrame, int64_t bits, RateControlEntry* rce, int *filler);
- int rowVbvRateControl(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv);
+ int rowVbvRateControl(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv, uint32_t* m_sliceBaseRow, uint32_t sliceId);
int rateControlSliceType(int frameNum);
bool cuTreeReadFor2Pass(Frame* curFrame);
void hrdFullness(SEIBufferingPeriod* sei);
More information about the x265-devel
mailing list