<div dir="ltr">Kindly ignore this patch. I am sending the updated patch after review.</div><div class="gmail_extra"><br><div class="gmail_quote">On Thu, Sep 28, 2017 at 1:27 PM, Ashok Kumar Mishra <span dir="ltr"><<a href="mailto:ashok@multicorewareinc.com" target="_blank">ashok@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr">Both the patches are same, you can apply on top of my previous two patches.</div><div class="HOEnZb"><div class="h5"><div class="gmail_extra"><br><div class="gmail_quote">On Thu, Sep 28, 2017 at 12:07 PM, Pradeep Ramachandran <span dir="ltr"><<a href="mailto:pradeep@multicorewareinc.com" target="_blank">pradeep@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr"><div class="gmail_extra"><span><div><div class="m_2184269437007544026m_640334328106076000gmail_signature" data-smartmail="gmail_signature"><div dir="ltr"><div><div dir="ltr"><div><div dir="ltr"><div><div dir="ltr"><div dir="ltr"><div dir="ltr"><div dir="ltr"><div dir="ltr"><div>On Tue, Sep 26, 2017 at 6:49 PM, Ashok Kumar Mishra <span dir="ltr"><<a href="mailto:ashok@multicorewareinc.com" target="_blank">ashok@multicorewareinc.com</a>></span> wrote:<br></div></div></div></div></div></div></div></div></div></div></div></div></div></div></span><div class="gmail_quote"><span><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr">Please find the attached patch.</div></blockquote><div><br></div></span><div>This patch is confusing - is this to be applied on top of the previous patch (which didn't work), or is this a replacement patch (which didn't work either as I can't find the parent). </div><div><div class="m_2184269437007544026h5"><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="m_2184269437007544026m_640334328106076000HOEnZb"><div class="m_2184269437007544026m_640334328106076000h5"><div class="gmail_extra"><br><div class="gmail_quote">On Thu, Sep 21, 2017 at 8:21 PM, <span dir="ltr"><<a href="mailto:ashok@multicorewareinc.com" target="_blank">ashok@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Ashok Kumar Mishra <<a href="mailto:ashok@multicorewareinc.com" target="_blank">ashok@multicorewareinc.com</a>><br>
# Date 1506005452 -19800<br>
# Thu Sep 21 20:20:52 2017 +0530<br>
# Node ID 546387e0b983ac1d68cda73777b34a<wbr>122928cd32<br>
# Parent 71f700844b0b2a9120bfd8a2d1f13e<wbr>219aa20677<br>
vbv hanging issue; fix for multiple slices<br>
When multiple slices are enabled, vbv rate control must take care of<br>
correct rows in slices, since multiple slices are encoding simultaneously.<br>
<br>
diff -r 71f700844b0b -r 546387e0b983 source/encoder/frameencoder.cp<wbr>p<br>
--- a/source/encoder/frameencoder.<wbr>cpp Tue Sep 12 18:13:03 2017 +0530<br>
+++ b/source/encoder/frameencoder.<wbr>cpp Thu Sep 21 20:20:52 2017 +0530<br>
@@ -88,6 +88,7 @@<br>
delete[] m_outStreams;<br>
delete[] m_backupStreams;<br>
X265_FREE(m_sliceBaseRow);<br>
+ X265_FREE(m_sliceMaxBlockRow);<br>
X265_FREE(m_cuGeoms);<br>
X265_FREE(m_ctuGeomMap);<br>
X265_FREE(m_substreamSizes);<br>
@@ -118,6 +119,40 @@<br>
<br>
m_sliceBaseRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1);<br>
ok &= !!m_sliceBaseRow;<br>
+ m_sliceGroupSize = (uint16_t)(m_numRows + m_param->maxSlices - 1) / m_param->maxSlices;<br>
+ uint32_t sliceGroupSizeAccu = (m_numRows << 8) / m_param->maxSlices;<br>
+ uint32_t rowSum = sliceGroupSizeAccu;<br>
+ uint32_t sidx = 0;<br>
+ for (uint32_t i = 0; i < m_numRows; i++)<br>
+ {<br>
+ const uint32_t rowRange = (rowSum >> 8);<br>
+ if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))<br>
+ {<br>
+ rowSum += sliceGroupSizeAccu;<br>
+ m_sliceBaseRow[++sidx] = i;<br>
+ }<br>
+ }<br>
+ X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!");<br>
+ m_sliceBaseRow[0] = 0;<br>
+ m_sliceBaseRow[m_param->maxSli<wbr>ces] = m_numRows;<br>
+<br>
+ m_sliceMaxBlockRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1);<br>
+ ok &= !!m_sliceMaxBlockRow;<br>
+ uint32_t maxBlockRows = (m_param->sourceHeight + (16 - 1)) / 16;<br>
+ sliceGroupSizeAccu = (maxBlockRows << 8) / m_param->maxSlices;<br>
+ rowSum = sliceGroupSizeAccu;<br>
+ sidx = 0;<br>
+ for (uint32_t i = 0; i < maxBlockRows; i++)<br>
+ {<br>
+ const uint32_t rowRange = (rowSum >> 8);<br>
+ if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))<br>
+ {<br>
+ rowSum += sliceGroupSizeAccu;<br>
+ m_sliceMaxBlockRow[++sidx] = i;<br>
+ }<br>
+ }<br>
+ m_sliceMaxBlockRow[0] = 0;<br>
+ m_sliceMaxBlockRow[m_param->ma<wbr>xSlices] = maxBlockRows;<br>
<br>
/* determine full motion search range */<br>
int range = m_param->searchRange; /* fpel search */<br>
@@ -341,6 +376,8 @@<br>
m_completionCount = 0;<br>
m_bAllRowsStop = false;<br>
m_vbvResetTriggerRow = -1;<br>
+ m_rowSliceTotalBits[0] = 0;<br>
+ m_rowSliceTotalBits[1] = 0;<br>
<br>
m_SSDY = m_SSDU = m_SSDV = 0;<br>
m_ssim = 0;<br>
@@ -550,28 +587,13 @@<br>
<br>
/* reset entropy coders and compute slice id */<br>
m_entropyCoder.load(m_initSli<wbr>ceContext);<br>
- const uint32_t sliceGroupSize = (m_numRows + m_param->maxSlices - 1) / m_param->maxSlices;<br>
- const uint32_t sliceGroupSizeAccu = (m_numRows << 8) / m_param->maxSlices;<br>
- m_sliceGroupSize = (uint16_t)sliceGroupSize;<br>
+<br>
+ for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)<br>
+ for (uint32_t row = m_sliceBaseRow[sliceId]; row < m_sliceBaseRow[sliceId + 1]; row++)<br>
+ m_rows[row].init(m_initSliceCo<wbr>ntext, sliceId);<br>
<br>
- uint32_t rowSum = sliceGroupSizeAccu;<br>
- uint32_t sidx = 0;<br>
- for (uint32_t i = 0; i < m_numRows; i++)<br>
- {<br>
- const uint32_t rowRange = (rowSum >> 8);<br>
-<br>
- if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))<br>
- {<br>
- rowSum += sliceGroupSizeAccu;<br>
- m_sliceBaseRow[++sidx] = i;<br>
- }<br>
-<br>
- m_rows[i].init(m_initSliceCont<wbr>ext, sidx);<br>
- }<br>
- X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!");<br>
-<br>
- m_sliceBaseRow[0] = 0;<br>
- m_sliceBaseRow[m_param->maxSli<wbr>ces] = m_numRows;<br>
+ // reset slice counter for rate control update<br>
+ m_sliceCnt = 0;<br>
<br>
uint32_t numSubstreams = m_param->bEnableWavefront ? slice->m_sps->numCuInHeight : m_param->maxSlices;<br>
X265_CHECK(m_param->bEnableWa<wbr>vefront || (m_param->maxSlices == 1), "Multiple slices without WPP unsupport now!");<br>
@@ -586,8 +608,10 @@<br>
m_rows[i].rowGoOnCoder.setBit<wbr>stream(&m_outStreams[i]);<br>
}<br>
else<br>
+ {<br>
for (uint32_t i = 0; i < numSubstreams; i++)<br>
m_outStreams[i].resetBits();<br>
+ }<br>
<br>
int prevBPSEI = m_rce.encodeOrder ? m_top->m_lastBPSEI : 0;<br>
<br>
@@ -697,10 +721,9 @@<br>
* compressed in a wave-front pattern if WPP is enabled. Row based loop<br>
* filters runs behind the CTU compression and reconstruction */<br>
<br>
- for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)<br>
- {<br>
+ for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)<br>
m_rows[m_sliceBaseRow[sliceId<wbr>]].active = true;<br>
- }<br>
+<br>
if (m_param->bEnableWavefront)<br>
{<br>
int i = 0;<br>
@@ -982,9 +1005,8 @@<br>
// complete the slice header by writing WPP row-starts<br>
m_entropyCoder.setBitstream(&<wbr>m_bs);<br>
if (slice->m_pps->bEntropyCodingS<wbr>yncEnabled)<br>
- {<br>
m_entropyCoder.codeSliceHeade<wbr>rWPPEntryPoints(&m_substreamSi<wbr>zes[prevSliceRow], (nextSliceRow - prevSliceRow - 1), maxStreamSize);<br>
- }<br>
+<br>
m_bs.writeByteAlignment();<br>
<br>
m_nalList.serialize(slice->m_<wbr>nalUnitType, m_bs);<br>
@@ -1270,20 +1292,17 @@<br>
const uint32_t lineStartCUAddr = row * numCols;<br>
bool bIsVbv = m_param->rc.vbvBufferSize > 0 && m_param->rc.vbvMaxBitrate > 0;<br>
<br>
+ const uint32_t sliceId = curRow.sliceId;<br>
uint32_t maxBlockCols = (m_frame->m_fencPic->m_picWidt<wbr>h + (16 - 1)) / 16;<br>
- uint32_t maxBlockRows = (m_frame->m_fencPic->m_picHeig<wbr>ht + (16 - 1)) / 16;<br>
uint32_t noOfBlocks = m_param->maxCUSize / 16;<br>
const uint32_t bFirstRowInSlice = ((row == 0) || (m_rows[row - 1].sliceId != curRow.sliceId)) ? 1 : 0;<br>
const uint32_t bLastRowInSlice = ((row == m_numRows - 1) || (m_rows[row + 1].sliceId != curRow.sliceId)) ? 1 : 0;<br>
- const uint32_t sliceId = curRow.sliceId;<br>
const uint32_t endRowInSlicePlus1 = m_sliceBaseRow[sliceId + 1];<br>
const uint32_t rowInSlice = row - m_sliceBaseRow[sliceId];<br>
<br>
- if (bFirstRowInSlice && !curRow.completed)<br>
- {<br>
- // Load SBAC coder context from previous row and initialize row state.<br>
- rowCoder.load(m_initSliceConte<wbr>xt);<br>
- }<br>
+ // Load SBAC coder context from previous row and initialize row state.<br>
+ if (bFirstRowInSlice && !curRow.completed)<br>
+ rowCoder.load(m_initSliceConte<wbr>xt);<br>
<br>
// calculate mean QP for consistent deltaQP signalling calculation<br>
if (m_param->bOptCUDeltaQP)<br>
@@ -1294,15 +1313,12 @@<br>
if (m_param->bEnableWavefront || !row)<br>
{<br>
double meanQPOff = 0;<br>
- uint32_t loopIncr, count = 0;<br>
bool isReferenced = IS_REFERENCED(m_frame);<br>
double *qpoffs = (isReferenced && m_param->rc.cuTree) ? m_frame->m_lowres.qpCuTreeOffs<wbr>et : m_frame->m_lowres.qpAqOffset;<br>
if (qpoffs)<br>
{<br>
- if (m_param->rc.qgSize == 8)<br>
- loopIncr = 8;<br>
- else<br>
- loopIncr = 16;<br>
+ uint32_t loopIncr = (m_param->rc.qgSize == 8) ? 8 : 16;<br>
+<br>
uint32_t cuYStart = 0, height = m_frame->m_fencPic->m_picHeigh<wbr>t;<br>
if (m_param->bEnableWavefront)<br>
{<br>
@@ -1312,6 +1328,7 @@<br>
<br>
uint32_t qgSize = m_param->rc.qgSize, width = m_frame->m_fencPic->m_picWidth<wbr>;<br>
uint32_t maxOffsetCols = (m_frame->m_fencPic->m_picWidt<wbr>h + (loopIncr - 1)) / loopIncr;<br>
+ uint32_t count = 0;<br>
for (uint32_t cuY = cuYStart; cuY < height && (cuY < m_frame->m_fencPic->m_picHeigh<wbr>t); cuY += qgSize)<br>
{<br>
for (uint32_t cuX = 0; cuX < width; cuX += qgSize)<br>
@@ -1372,16 +1389,16 @@<br>
curRow.bufferedEntropy.copySt<wbr>ate(rowCoder);<br>
curRow.bufferedEntropy.loadCo<wbr>ntexts(rowCoder);<br>
}<br>
- if (!row && m_vbvResetTriggerRow != intRow)<br>
+ if (bFirstRowInSlice && m_vbvResetTriggerRow != intRow)<br>
{<br>
curEncData.m_rowStat[row].row<wbr>Qp = curEncData.m_avgQpRc;<br>
curEncData.m_rowStat[row].row<wbr>QpScale = x265_qp2qScale(curEncData.m_av<wbr>gQpRc);<br>
}<br>
<br>
FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr];<br>
- if (m_param->bEnableWavefront && row >= col && row && m_vbvResetTriggerRow != intRow)<br>
+ if (m_param->bEnableWavefront && rowInSlice >= col && !bFirstRowInSlice && m_vbvResetTriggerRow != intRow)<br>
cuStat.baseQp = curEncData.m_cuStat[cuAddr - numCols + 1].baseQp;<br>
- else if (!m_param->bEnableWavefront && row && m_vbvResetTriggerRow != intRow)<br>
+ else if (!m_param->bEnableWavefront && !bFirstRowInSlice && m_vbvResetTriggerRow != intRow)<br>
cuStat.baseQp = curEncData.m_rowStat[row - 1].rowQp;<br>
else<br>
cuStat.baseQp = curEncData.m_rowStat[row].rowQ<wbr>p;<br>
@@ -1393,7 +1410,8 @@<br>
{<br>
cuStat.vbvCost = 0;<br>
cuStat.intraVbvCost = 0;<br>
- for (uint32_t h = 0; h < noOfBlocks && block_y < maxBlockRows; h++, block_y++)<br>
+<br>
+ for (uint32_t h = 0; h < noOfBlocks && block_y < m_sliceMaxBlockRow[sliceId + 1]; h++, block_y++)<br>
{<br>
uint32_t idx = block_x + (block_y * maxBlockCols);<br>
<br>
@@ -1497,10 +1515,8 @@<br>
int shift = 2 * (m_param->maxCUDepth - depth);<br>
int cuSize = m_param->maxCUSize >> depth;<br>
<br>
- if (cuSize == 8)<br>
- curRow.rowStats.intra8x8Cnt += (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN);<br>
- else<br>
- curRow.rowStats.intra8x8Cnt += (int)(frameLog.cntIntra[depth] << shift);<br>
+ curRow.rowStats.intra8x8Cnt += (cuSize == 8) ? (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN) :<br>
+ (int)(frameLog.cntIntra[depth<wbr>] << shift);<br>
<br>
curRow.rowStats.inter8x8Cnt += (int)(frameLog.cntInter[depth] << shift);<br>
curRow.rowStats.skip8x8Cnt += (int)((frameLog.cntSkipCu[dept<wbr>h] + frameLog.cntMergeCu[depth]) << shift);<br>
@@ -1530,12 +1546,13 @@<br>
if (bIsVbv)<br>
{<br>
// Update encoded bits, satdCost, baseQP for each CU if tune grain is disabled<br>
- if ((m_param->bEnableWavefront && (!cuAddr || !m_param->rc.bEnableConstVbv)) || !m_param->bEnableWavefront)<br>
+ FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr];<br>
+ if ((m_param->bEnableWavefront && ((cuAddr == m_sliceBaseRow[sliceId] * numCols) || !m_param->rc.bEnableConstVbv)) || !m_param->bEnableWavefront)<br>
{<br>
- curEncData.m_rowStat[row].rowS<wbr>atd += curEncData.m_cuStat[cuAddr].vb<wbr>vCost;<br>
- curEncData.m_rowStat[row].rowI<wbr>ntraSatd += curEncData.m_cuStat[cuAddr].in<wbr>traVbvCost;<br>
- curEncData.m_rowStat[row].enco<wbr>dedBits += curEncData.m_cuStat[cuAddr].to<wbr>talBits;<br>
- curEncData.m_rowStat[row].sumQ<wbr>pRc += curEncData.m_cuStat[cuAddr].ba<wbr>seQp;<br>
+ curEncData.m_rowStat[row].rowS<wbr>atd += cuStat.vbvCost;<br>
+ curEncData.m_rowStat[row].rowI<wbr>ntraSatd += cuStat.intraVbvCost;<br>
+ curEncData.m_rowStat[row].enco<wbr>dedBits += cuStat.totalBits;<br>
+ curEncData.m_rowStat[row].sumQ<wbr>pRc += cuStat.baseQp;<br>
curEncData.m_rowStat[row].num<wbr>EncodedCUs = cuAddr;<br>
}<br>
<br>
@@ -1543,7 +1560,7 @@<br>
if (!m_param->bEnableWavefront && col == numCols - 1)<br>
{<br>
double qpBase = curEncData.m_cuStat[cuAddr].ba<wbr>seQp;<br>
- int reEncode = m_top->m_rateControl->rowVbvRa<wbr>teControl(m_frame, row, &m_rce, qpBase);<br>
+ int reEncode = m_top->m_rateControl->rowVbvRa<wbr>teControl(m_frame, row, &m_rce, qpBase, m_sliceBaseRow, sliceId);<br>
qpBase = x265_clip3((double)m_param->rc<wbr>.qpMin, (double)m_param->rc.qpMax, qpBase);<br>
curEncData.m_rowStat[row].row<wbr>Qp = qpBase;<br>
curEncData.m_rowStat[row].row<wbr>QpScale = x265_qp2qScale(qpBase);<br>
@@ -1569,15 +1586,16 @@<br>
}<br>
}<br>
// If current block is at row diagonal checkpoint, call vbv ratecontrol.<br>
- else if (m_param->bEnableWavefront && row == col && row)<br>
+ else if (m_param->bEnableWavefront && rowInSlice == col && !bFirstRowInSlice)<br>
{<br>
if (m_param->rc.bEnableConstVbv)<br>
{<br>
- int32_t startCuAddr = numCols * row;<br>
- int32_t EndCuAddr = startCuAddr + col;<br>
- for (int32_t r = row; r >= 0; r--)<br>
+ uint32_t startCuAddr = numCols * row;<br>
+ uint32_t EndCuAddr = startCuAddr + col;<br>
+<br>
+ for (int32_t r = row; r >= (int32_t)m_sliceBaseRow[sliceI<wbr>d]; r--)<br>
{<br>
- for (int32_t c = startCuAddr; c <= EndCuAddr && c <= (int32_t)numCols * (r + 1) - 1; c++)<br>
+ for (uint32_t c = startCuAddr; c <= EndCuAddr && c <= numCols * (r + 1) - 1; c++)<br>
{<br>
curEncData.m_rowStat[r].rowSa<wbr>td += curEncData.m_cuStat[c].vbvCost<wbr>;<br>
curEncData.m_rowStat[r].rowIn<wbr>traSatd += curEncData.m_cuStat[c].intraVb<wbr>vCost;<br>
@@ -1590,10 +1608,10 @@<br>
}<br>
}<br>
double qpBase = curEncData.m_cuStat[cuAddr].ba<wbr>seQp;<br>
- int reEncode = m_top->m_rateControl->rowVbvRa<wbr>teControl(m_frame, row, &m_rce, qpBase);<br>
+ int reEncode = m_top->m_rateControl->rowVbvRa<wbr>teControl(m_frame, row, &m_rce, qpBase, m_sliceBaseRow, sliceId);<br>
qpBase = x265_clip3((double)m_param->rc<wbr>.qpMin, (double)m_param->rc.qpMax, qpBase);<br>
curEncData.m_rowStat[row].row<wbr>Qp = qpBase;<br>
- curEncData.m_rowStat[row].rowQ<wbr>pScale = x265_qp2qScale(qpBase);<br>
+ curEncData.m_rowStat[row].rowQ<wbr>pScale = x265_qp2qScale(qpBase);<br>
<br>
if (reEncode < 0)<br>
{<br>
@@ -1604,7 +1622,7 @@<br>
m_vbvResetTriggerRow = row;<br>
m_bAllRowsStop = true;<br>
<br>
- for (uint32_t r = m_numRows - 1; r >= row; r--)<br>
+ for (uint32_t r = m_sliceBaseRow[sliceId + 1] - 1; r >= row; r--)<br>
{<br>
CTURow& stopRow = m_rows[r];<br>
<br>
@@ -1686,11 +1704,11 @@<br>
/* this row of CTUs has been compressed */<br>
if (m_param->bEnableWavefront && m_param->rc.bEnableConstVbv)<br>
{<br>
- if (row == m_numRows - 1)<br>
+ if (bLastRowInSlice)<br>
{<br>
- for (int32_t r = 0; r < (int32_t)m_numRows; r++)<br>
+ for (uint32_t r = m_sliceBaseRow[sliceId]; r < m_sliceBaseRow[sliceId + 1]; r++)<br>
{<br>
- for (int32_t c = curEncData.m_rowStat[r].numEnc<wbr>odedCUs + 1; c < (int32_t)numCols * (r + 1); c++)<br>
+ for (uint32_t c = curEncData.m_rowStat[r].numEnc<wbr>odedCUs + 1; c < numCols * (r + 1); c++)<br>
{<br>
curEncData.m_rowStat[r].rowSa<wbr>td += curEncData.m_cuStat[c].vbvCost<wbr>;<br>
curEncData.m_rowStat[r].rowIn<wbr>traSatd += curEncData.m_cuStat[c].intraVb<wbr>vCost;<br>
@@ -1708,26 +1726,41 @@<br>
* after half the frame is encoded, but after this initial period we update<br>
* after refLagRows (the number of rows reference frames must have completed<br>
* before referencees may begin encoding) */<br>
- uint32_t rowCount = 0;<br>
if (m_param->rc.rateControlMode == X265_RC_ABR || bIsVbv)<br>
{<br>
+ uint32_t rowCount = 0;<br>
+ uint32_t maxRows = m_sliceBaseRow[sliceId + 1] - m_sliceBaseRow[sliceId];<br>
if (!m_rce.encodeOrder)<br>
- rowCount = m_numRows - 1;<br>
+ rowCount = maxRows - 1;<br>
else if ((uint32_t)m_rce.encodeOrder <= 2 * (m_param->fpsNum / m_param->fpsDenom))<br>
- rowCount = X265_MIN((m_numRows + 1) / 2, m_numRows - 1);<br>
+ rowCount = X265_MIN((maxRows + 1) / 2, maxRows - 1);<br>
else<br>
- rowCount = X265_MIN(m_refLagRows, m_numRows - 1);<br>
- if (row == rowCount)<br>
+ rowCount = X265_MIN(m_refLagRows, maxRows - 1);<br>
+<br>
+ if (rowInSlice == rowCount / m_param->maxSlices)<br>
{<br>
- m_rce.rowTotalBits = 0;<br>
+ m_rowSliceTotalBits[sliceId] = 0;<br>
if (bIsVbv)<br>
- for (uint32_t i = 0; i < rowCount; i++)<br>
- m_rce.rowTotalBits += curEncData.m_rowStat[i].encode<wbr>dBits;<br>
+ {<br>
+ for (uint32_t i = m_sliceBaseRow[sliceId]; i < (rowCount / m_param->maxSlices) + m_sliceBaseRow[sliceId]; i++)<br>
+ m_rowSliceTotalBits[sliceId] += curEncData.m_rowStat[i].encode<wbr>dBits;<br>
+ }<br>
else<br>
- for (uint32_t cuAddr = 0; cuAddr < rowCount * numCols; cuAddr++)<br>
- m_rce.rowTotalBits += curEncData.m_cuStat[cuAddr].to<wbr>talBits;<br>
+ {<br>
+ uint32_t startAddr = rowCount * numCols * sliceId;<br>
+ uint32_t finishAddr = startAddr + rowCount * numCols;<br>
+<br>
+ for (uint32_t cuAddr = startAddr; cuAddr < finishAddr; cuAddr++)<br>
+ m_rowSliceTotalBits[sliceId] += curEncData.m_cuStat[cuAddr].to<wbr>talBits;<br>
+ }<br>
<br>
- m_top->m_rateControl->rateCont<wbr>rolUpdateStats(&m_rce);<br>
+ if (ATOMIC_INC(&m_sliceCnt) == (int)m_param->maxSlices)<br>
+ {<br>
+ m_rce.rowTotalBits = 0;<br>
+ for (uint32_t i = 0; i < m_param->maxSlices; i++)<br>
+ m_rce.rowTotalBits += m_rowSliceTotalBits[i];<br>
+ m_top->m_rateControl->rateCont<wbr>rolUpdateStats(&m_rce);<br>
+ }<br>
}<br>
}<br>
<br>
diff -r 71f700844b0b -r 546387e0b983 source/encoder/frameencoder.h<br>
--- a/source/encoder/frameencoder.<wbr>h Tue Sep 12 18:13:03 2017 +0530<br>
+++ b/source/encoder/frameencoder.<wbr>h Thu Sep 21 20:20:52 2017 +0530<br>
@@ -138,6 +138,7 @@<br>
volatile bool m_bAllRowsStop;<br>
volatile int m_completionCount;<br>
volatile int m_vbvResetTriggerRow;<br>
+ volatile int m_sliceCnt;<br>
<br>
uint32_t m_numRows;<br>
uint32_t m_numCols;<br>
@@ -147,8 +148,10 @@<br>
<br>
CTURow* m_rows;<br>
uint16_t m_sliceAddrBits;<br>
- uint16_t m_sliceGroupSize;<br>
- uint32_t* m_sliceBaseRow;<br>
+ uint32_t m_sliceGroupSize;<br>
+ uint32_t* m_sliceBaseRow;<br>
+ uint32_t* m_sliceMaxBlockRow;<br>
+ int64_t m_rowSliceTotalBits[2];<br>
RateControlEntry m_rce;<br>
SEIDecodedPictureHash m_seiReconPictureDigest;<br>
<br>
diff -r 71f700844b0b -r 546387e0b983 source/encoder/ratecontrol.cpp<br>
--- a/source/encoder/ratecontrol.c<wbr>pp Tue Sep 12 18:13:03 2017 +0530<br>
+++ b/source/encoder/ratecontrol.c<wbr>pp Thu Sep 21 20:20:52 2017 +0530<br>
@@ -732,7 +732,6 @@<br>
m_bitrate = m_param->rc.bitrate * 1000;<br>
}<br>
<br>
-<br>
void RateControl::initHRD(SPS& sps)<br>
{<br>
int vbvBufferSize = m_param->rc.vbvBufferSize * 1000;<br>
@@ -765,6 +764,7 @@<br>
<br>
#undef MAX_DURATION<br>
}<br>
+<br>
bool RateControl::analyseABR2Pass(u<wbr>int64_t allAvailableBits)<br>
{<br>
double rateFactor, stepMult;<br>
@@ -1473,6 +1473,7 @@<br>
<br>
return q;<br>
}<br>
+<br>
double RateControl::countExpectedBits<wbr>(int startPos, int endPos)<br>
{<br>
double expectedBits = 0;<br>
@@ -1484,6 +1485,7 @@<br>
}<br>
return expectedBits;<br>
}<br>
+<br>
bool RateControl::findUnderflow(dou<wbr>ble *fills, int *t0, int *t1, int over, int endPos)<br>
{<br>
/* find an interval ending on an overflow or underflow (depending on whether<br>
@@ -1531,6 +1533,7 @@<br>
}<br>
return adjusted;<br>
}<br>
+<br>
bool RateControl::cuTreeReadFor2Pas<wbr>s(Frame* frame)<br>
{<br>
int index = m_encOrder[frame->m_poc];<br>
@@ -1579,24 +1582,24 @@<br>
double RateControl::tuneAbrQScaleFrom<wbr>Feedback(double qScale)<br>
{<br>
double abrBuffer = 2 * m_rateTolerance * m_bitrate;<br>
- /* use framesDone instead of POC as poc count is not serial with bframes enabled */<br>
- double overflow = 1.0;<br>
- double timeDone = (double)(m_framesDone - m_param->frameNumThreads + 1) * m_frameDuration;<br>
- double wantedBits = timeDone * m_bitrate;<br>
- int64_t encodedBits = m_totalBits;<br>
- if (m_param->totalFrames && m_param->totalFrames <= 2 * m_fps)<br>
- {<br>
- abrBuffer = m_param->totalFrames * (m_bitrate / m_fps);<br>
- encodedBits = m_encodedBits;<br>
- }<br>
+ /* use framesDone instead of POC as poc count is not serial with bframes enabled */<br>
+ double overflow = 1.0;<br>
+ double timeDone = (double)(m_framesDone - m_param->frameNumThreads + 1) * m_frameDuration;<br>
+ double wantedBits = timeDone * m_bitrate;<br>
+ int64_t encodedBits = m_totalBits;<br>
+ if (m_param->totalFrames && m_param->totalFrames <= 2 * m_fps)<br>
+ {<br>
+ abrBuffer = m_param->totalFrames * (m_bitrate / m_fps);<br>
+ encodedBits = m_encodedBits;<br>
+ }<br>
<br>
- if (wantedBits > 0 && encodedBits > 0 && (!m_partialResidualFrames ||<br>
- m_param->rc.bStrictCbr || m_isGrainEnabled))<br>
- {<br>
- abrBuffer *= X265_MAX(1, sqrt(timeDone));<br>
- overflow = x265_clip3(.5, 2.0, 1.0 + (encodedBits - wantedBits) / abrBuffer);<br>
- qScale *= overflow;<br>
- }<br>
+ if (wantedBits > 0 && encodedBits > 0 && (!m_partialResidualFrames ||<br>
+ m_param->rc.bStrictCbr || m_isGrainEnabled))<br>
+ {<br>
+ abrBuffer *= X265_MAX(1, sqrt(timeDone));<br>
+ overflow = x265_clip3(.5, 2.0, 1.0 + (encodedBits - wantedBits) / abrBuffer);<br>
+ qScale *= overflow;<br>
+ }<br>
return qScale;<br>
}<br>
<br>
@@ -2330,17 +2333,18 @@<br>
return totalSatdBits + encodedBitsSoFar;<br>
}<br>
<br>
-int RateControl::rowVbvRateControl<wbr>(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv)<br>
+int RateControl::rowVbvRateControl<wbr>(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv, uint32_t* m_sliceBaseRow, uint32_t sliceId)<br>
{<br>
FrameData& curEncData = *curFrame->m_encData;<br>
double qScaleVbv = x265_qp2qScale(qpVbv);<br>
uint64_t rowSatdCost = curEncData.m_rowStat[row].rowS<wbr>atd;<br>
double encodedBits = curEncData.m_rowStat[row].enco<wbr>dedBits;<br>
+ uint32_t rowInSlice = row - m_sliceBaseRow[sliceId];<br>
<br>
- if (m_param->bEnableWavefront && row == 1)<br>
+ if (m_param->bEnableWavefront && rowInSlice == 1)<br>
{<br>
- rowSatdCost += curEncData.m_rowStat[0].rowSat<wbr>d;<br>
- encodedBits += curEncData.m_rowStat[0].encode<wbr>dBits;<br>
+ rowSatdCost += curEncData.m_rowStat[row - 1].rowSatd;<br>
+ encodedBits += curEncData.m_rowStat[row - 1].encodedBits;</blockquote></div></div></div></div></blockquote></div></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div><div class="m_2184269437007544026h5"><div class="m_2184269437007544026m_640334328106076000HOEnZb"><div class="m_2184269437007544026m_640334328106076000h5"><div class="gmail_extra"><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
}<br>
rowSatdCost >>= X265_DEPTH - 8;<br>
updatePredictor(rce->rowPred[<wbr>0], qScaleVbv, (double)rowSatdCost, encodedBits);<br>
@@ -2350,8 +2354,8 @@<br>
if (qpVbv < refFrame->m_encData->m_rowStat<wbr>[row].rowQp)<br>
{<br>
uint64_t intraRowSatdCost = curEncData.m_rowStat[row].rowI<wbr>ntraSatd;<br>
- if (m_param->bEnableWavefront && row == 1)<br>
- intraRowSatdCost += curEncData.m_rowStat[0].rowInt<wbr>raSatd;<br>
+ if (m_param->bEnableWavefront && rowInSlice == 1)<br>
+ intraRowSatdCost += curEncData.m_rowStat[row - 1].rowIntraSatd;<br>
intraRowSatdCost >>= X265_DEPTH - 8;<br>
updatePredictor(rce->rowPred[<wbr>1], qScaleVbv, (double)intraRowSatdCost, encodedBits);<br>
}<br>
@@ -2376,7 +2380,7 @@<br>
const SPS& sps = *curEncData.m_slice->m_sps;<br>
double maxFrameError = X265_MAX(0.05, 1.0 / sps.numCuInHeight);<br>
<br>
- if (row < sps.numCuInHeight - 1)<br>
+ if (row < m_sliceBaseRow[sliceId + 1] - 1)<br>
{<br>
/* More threads means we have to be more cautious in letting ratecontrol use up extra bits. */<br>
double rcTol = bufferLeftPlanned / m_param->frameNumThreads * m_rateTolerance;<br>
@@ -2693,8 +2697,8 @@<br>
m_encodedBitsWindow[pos % s_slidingWindowFrames] = actualBits;<br>
if(rce->sliceType != I_SLICE)<br>
{<br>
- int qp = int (rce->qpaRc + 0.5);<br>
- m_qpToEncodedBits[qp] = m_qpToEncodedBits[qp] == 0 ? actualBits : (m_qpToEncodedBits[qp] + actualBits) * 0.5;<br>
+ int qp = int (rce->qpaRc + 0.5);<br>
+ m_qpToEncodedBits[qp] = m_qpToEncodedBits[qp] == 0 ? actualBits : (m_qpToEncodedBits[qp] + actualBits) * 0.5;<br>
}<br>
curFrame->m_rcData->wantedBit<wbr>sWindow = m_wantedBitsWindow;<br>
curFrame->m_rcData->cplxrSum = m_cplxrSum;<br>
@@ -2779,7 +2783,8 @@<br>
curFrame->m_encData->m_frameS<wbr>tats.percent8x8Skip * m_ncu) < 0)<br>
goto writeFailure;<br>
}<br>
- else{<br>
+ else<br>
+ {<br>
RPS* rpsWriter = &curFrame->m_encData->m_slice-<wbr>>m_rps;<br>
int i, num = rpsWriter->numberOfPictures;<br>
char deltaPOC[128];<br>
diff -r 71f700844b0b -r 546387e0b983 source/encoder/ratecontrol.h<br>
--- a/source/encoder/ratecontrol.h<wbr> Tue Sep 12 18:13:03 2017 +0530<br>
+++ b/source/encoder/ratecontrol.h<wbr> Thu Sep 21 20:20:52 2017 +0530<br>
@@ -244,7 +244,7 @@<br>
int rateControlStart(Frame* curFrame, RateControlEntry* rce, Encoder* enc);<br>
void rateControlUpdateStats(RateCon<wbr>trolEntry* rce);<br>
int rateControlEnd(Frame* curFrame, int64_t bits, RateControlEntry* rce, int *filler);<br>
- int rowVbvRateControl(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv);<br>
+ int rowVbvRateControl(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv, uint32_t* m_sliceBaseRow, uint32_t sliceId);<br>
int rateControlSliceType(int frameNum);<br>
bool cuTreeReadFor2Pass(Frame* curFrame);<br>
void hrdFullness(SEIBufferingPeriod<wbr>* sei);<br>
</blockquote></div><br></div>
</div></div><br></div></div>______________________________<wbr>_________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org" target="_blank">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/l<wbr>istinfo/x265-devel</a><br>
<br></blockquote></div><br></div></div>
<br>______________________________<wbr>_________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org" target="_blank">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/l<wbr>istinfo/x265-devel</a><br>
<br></blockquote></div><br></div>
</div></div></blockquote></div><br></div>