[x265] [PATCH] vbv hanging issue; fix for multiple slices
Ashok Kumar Mishra
ashok at multicorewareinc.com
Thu Sep 28 09:57:24 CEST 2017
Both the patches are same, you can apply on top of my previous two patches.
On Thu, Sep 28, 2017 at 12:07 PM, Pradeep Ramachandran <
pradeep at multicorewareinc.com> wrote:
> On Tue, Sep 26, 2017 at 6:49 PM, Ashok Kumar Mishra <
> ashok at multicorewareinc.com> wrote:
>
>> Please find the attached patch.
>>
>
> This patch is confusing - is this to be applied on top of the previous
> patch (which didn't work), or is this a replacement patch (which didn't
> work either as I can't find the parent).
>
>
>>
>> On Thu, Sep 21, 2017 at 8:21 PM, <ashok at multicorewareinc.com> wrote:
>>
>>> # HG changeset patch
>>> # User Ashok Kumar Mishra <ashok at multicorewareinc.com>
>>> # Date 1506005452 -19800
>>> # Thu Sep 21 20:20:52 2017 +0530
>>> # Node ID 546387e0b983ac1d68cda73777b34a122928cd32
>>> # Parent 71f700844b0b2a9120bfd8a2d1f13e219aa20677
>>> vbv hanging issue; fix for multiple slices
>>> When multiple slices are enabled, vbv rate control must take care of
>>> correct rows in slices, since multiple slices are encoding
>>> simultaneously.
>>>
>>> diff -r 71f700844b0b -r 546387e0b983 source/encoder/frameencoder.cpp
>>> --- a/source/encoder/frameencoder.cpp Tue Sep 12 18:13:03 2017 +0530
>>> +++ b/source/encoder/frameencoder.cpp Thu Sep 21 20:20:52 2017 +0530
>>> @@ -88,6 +88,7 @@
>>> delete[] m_outStreams;
>>> delete[] m_backupStreams;
>>> X265_FREE(m_sliceBaseRow);
>>> + X265_FREE(m_sliceMaxBlockRow);
>>> X265_FREE(m_cuGeoms);
>>> X265_FREE(m_ctuGeomMap);
>>> X265_FREE(m_substreamSizes);
>>> @@ -118,6 +119,40 @@
>>>
>>> m_sliceBaseRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1);
>>> ok &= !!m_sliceBaseRow;
>>> + m_sliceGroupSize = (uint16_t)(m_numRows + m_param->maxSlices - 1) /
>>> m_param->maxSlices;
>>> + uint32_t sliceGroupSizeAccu = (m_numRows << 8) / m_param->maxSlices;
>>> + uint32_t rowSum = sliceGroupSizeAccu;
>>> + uint32_t sidx = 0;
>>> + for (uint32_t i = 0; i < m_numRows; i++)
>>> + {
>>> + const uint32_t rowRange = (rowSum >> 8);
>>> + if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))
>>> + {
>>> + rowSum += sliceGroupSizeAccu;
>>> + m_sliceBaseRow[++sidx] = i;
>>> + }
>>> + }
>>> + X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!");
>>> + m_sliceBaseRow[0] = 0;
>>> + m_sliceBaseRow[m_param->maxSlices] = m_numRows;
>>> +
>>> + m_sliceMaxBlockRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1);
>>> + ok &= !!m_sliceMaxBlockRow;
>>> + uint32_t maxBlockRows = (m_param->sourceHeight + (16 - 1)) / 16;
>>> + sliceGroupSizeAccu = (maxBlockRows << 8) / m_param->maxSlices;
>>> + rowSum = sliceGroupSizeAccu;
>>> + sidx = 0;
>>> + for (uint32_t i = 0; i < maxBlockRows; i++)
>>> + {
>>> + const uint32_t rowRange = (rowSum >> 8);
>>> + if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))
>>> + {
>>> + rowSum += sliceGroupSizeAccu;
>>> + m_sliceMaxBlockRow[++sidx] = i;
>>> + }
>>> + }
>>> + m_sliceMaxBlockRow[0] = 0;
>>> + m_sliceMaxBlockRow[m_param->maxSlices] = maxBlockRows;
>>>
>>> /* determine full motion search range */
>>> int range = m_param->searchRange; /* fpel search */
>>> @@ -341,6 +376,8 @@
>>> m_completionCount = 0;
>>> m_bAllRowsStop = false;
>>> m_vbvResetTriggerRow = -1;
>>> + m_rowSliceTotalBits[0] = 0;
>>> + m_rowSliceTotalBits[1] = 0;
>>>
>>> m_SSDY = m_SSDU = m_SSDV = 0;
>>> m_ssim = 0;
>>> @@ -550,28 +587,13 @@
>>>
>>> /* reset entropy coders and compute slice id */
>>> m_entropyCoder.load(m_initSliceContext);
>>> - const uint32_t sliceGroupSize = (m_numRows + m_param->maxSlices -
>>> 1) / m_param->maxSlices;
>>> - const uint32_t sliceGroupSizeAccu = (m_numRows << 8) /
>>> m_param->maxSlices;
>>> - m_sliceGroupSize = (uint16_t)sliceGroupSize;
>>> +
>>> + for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
>>> + for (uint32_t row = m_sliceBaseRow[sliceId]; row <
>>> m_sliceBaseRow[sliceId + 1]; row++)
>>> + m_rows[row].init(m_initSliceContext, sliceId);
>>>
>>> - uint32_t rowSum = sliceGroupSizeAccu;
>>> - uint32_t sidx = 0;
>>> - for (uint32_t i = 0; i < m_numRows; i++)
>>> - {
>>> - const uint32_t rowRange = (rowSum >> 8);
>>> -
>>> - if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))
>>> - {
>>> - rowSum += sliceGroupSizeAccu;
>>> - m_sliceBaseRow[++sidx] = i;
>>> - }
>>> -
>>> - m_rows[i].init(m_initSliceContext, sidx);
>>> - }
>>> - X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!");
>>> -
>>> - m_sliceBaseRow[0] = 0;
>>> - m_sliceBaseRow[m_param->maxSlices] = m_numRows;
>>> + // reset slice counter for rate control update
>>> + m_sliceCnt = 0;
>>>
>>> uint32_t numSubstreams = m_param->bEnableWavefront ?
>>> slice->m_sps->numCuInHeight : m_param->maxSlices;
>>> X265_CHECK(m_param->bEnableWavefront || (m_param->maxSlices == 1),
>>> "Multiple slices without WPP unsupport now!");
>>> @@ -586,8 +608,10 @@
>>> m_rows[i].rowGoOnCoder.setBitstream(&m_outStreams[i]);
>>> }
>>> else
>>> + {
>>> for (uint32_t i = 0; i < numSubstreams; i++)
>>> m_outStreams[i].resetBits();
>>> + }
>>>
>>> int prevBPSEI = m_rce.encodeOrder ? m_top->m_lastBPSEI : 0;
>>>
>>> @@ -697,10 +721,9 @@
>>> * compressed in a wave-front pattern if WPP is enabled. Row based
>>> loop
>>> * filters runs behind the CTU compression and reconstruction */
>>>
>>> - for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
>>> - {
>>> + for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
>>> m_rows[m_sliceBaseRow[sliceId]].active = true;
>>> - }
>>> +
>>> if (m_param->bEnableWavefront)
>>> {
>>> int i = 0;
>>> @@ -982,9 +1005,8 @@
>>> // complete the slice header by writing WPP row-starts
>>> m_entropyCoder.setBitstream(&m_bs);
>>> if (slice->m_pps->bEntropyCodingSyncEnabled)
>>> - {
>>> m_entropyCoder.codeSliceHeade
>>> rWPPEntryPoints(&m_substreamSizes[prevSliceRow], (nextSliceRow -
>>> prevSliceRow - 1), maxStreamSize);
>>> - }
>>> +
>>> m_bs.writeByteAlignment();
>>>
>>> m_nalList.serialize(slice->m_nalUnitType, m_bs);
>>> @@ -1270,20 +1292,17 @@
>>> const uint32_t lineStartCUAddr = row * numCols;
>>> bool bIsVbv = m_param->rc.vbvBufferSize > 0 &&
>>> m_param->rc.vbvMaxBitrate > 0;
>>>
>>> + const uint32_t sliceId = curRow.sliceId;
>>> uint32_t maxBlockCols = (m_frame->m_fencPic->m_picWidth + (16 -
>>> 1)) / 16;
>>> - uint32_t maxBlockRows = (m_frame->m_fencPic->m_picHeight + (16 -
>>> 1)) / 16;
>>> uint32_t noOfBlocks = m_param->maxCUSize / 16;
>>> const uint32_t bFirstRowInSlice = ((row == 0) || (m_rows[row -
>>> 1].sliceId != curRow.sliceId)) ? 1 : 0;
>>> const uint32_t bLastRowInSlice = ((row == m_numRows - 1) ||
>>> (m_rows[row + 1].sliceId != curRow.sliceId)) ? 1 : 0;
>>> - const uint32_t sliceId = curRow.sliceId;
>>> const uint32_t endRowInSlicePlus1 = m_sliceBaseRow[sliceId + 1];
>>> const uint32_t rowInSlice = row - m_sliceBaseRow[sliceId];
>>>
>>> - if (bFirstRowInSlice && !curRow.completed)
>>> - {
>>> - // Load SBAC coder context from previous row and initialize row
>>> state.
>>> - rowCoder.load(m_initSliceContext);
>>> - }
>>> + // Load SBAC coder context from previous row and initialize row
>>> state.
>>> + if (bFirstRowInSlice && !curRow.completed)
>>> + rowCoder.load(m_initSliceContext);
>>>
>>> // calculate mean QP for consistent deltaQP signalling calculation
>>> if (m_param->bOptCUDeltaQP)
>>> @@ -1294,15 +1313,12 @@
>>> if (m_param->bEnableWavefront || !row)
>>> {
>>> double meanQPOff = 0;
>>> - uint32_t loopIncr, count = 0;
>>> bool isReferenced = IS_REFERENCED(m_frame);
>>> double *qpoffs = (isReferenced && m_param->rc.cuTree) ?
>>> m_frame->m_lowres.qpCuTreeOffset : m_frame->m_lowres.qpAqOffset;
>>> if (qpoffs)
>>> {
>>> - if (m_param->rc.qgSize == 8)
>>> - loopIncr = 8;
>>> - else
>>> - loopIncr = 16;
>>> + uint32_t loopIncr = (m_param->rc.qgSize == 8) ? 8 :
>>> 16;
>>> +
>>> uint32_t cuYStart = 0, height =
>>> m_frame->m_fencPic->m_picHeight;
>>> if (m_param->bEnableWavefront)
>>> {
>>> @@ -1312,6 +1328,7 @@
>>>
>>> uint32_t qgSize = m_param->rc.qgSize, width =
>>> m_frame->m_fencPic->m_picWidth;
>>> uint32_t maxOffsetCols =
>>> (m_frame->m_fencPic->m_picWidth + (loopIncr - 1)) / loopIncr;
>>> + uint32_t count = 0;
>>> for (uint32_t cuY = cuYStart; cuY < height && (cuY
>>> < m_frame->m_fencPic->m_picHeight); cuY += qgSize)
>>> {
>>> for (uint32_t cuX = 0; cuX < width; cuX +=
>>> qgSize)
>>> @@ -1372,16 +1389,16 @@
>>> curRow.bufferedEntropy.copyState(rowCoder);
>>> curRow.bufferedEntropy.loadContexts(rowCoder);
>>> }
>>> - if (!row && m_vbvResetTriggerRow != intRow)
>>> + if (bFirstRowInSlice && m_vbvResetTriggerRow != intRow)
>>> {
>>> curEncData.m_rowStat[row].rowQp = curEncData.m_avgQpRc;
>>> curEncData.m_rowStat[row].rowQpScale =
>>> x265_qp2qScale(curEncData.m_avgQpRc);
>>> }
>>>
>>> FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr];
>>> - if (m_param->bEnableWavefront && row >= col && row &&
>>> m_vbvResetTriggerRow != intRow)
>>> + if (m_param->bEnableWavefront && rowInSlice >= col &&
>>> !bFirstRowInSlice && m_vbvResetTriggerRow != intRow)
>>> cuStat.baseQp = curEncData.m_cuStat[cuAddr - numCols +
>>> 1].baseQp;
>>> - else if (!m_param->bEnableWavefront && row &&
>>> m_vbvResetTriggerRow != intRow)
>>> + else if (!m_param->bEnableWavefront && !bFirstRowInSlice &&
>>> m_vbvResetTriggerRow != intRow)
>>> cuStat.baseQp = curEncData.m_rowStat[row - 1].rowQp;
>>> else
>>> cuStat.baseQp = curEncData.m_rowStat[row].rowQp;
>>> @@ -1393,7 +1410,8 @@
>>> {
>>> cuStat.vbvCost = 0;
>>> cuStat.intraVbvCost = 0;
>>> - for (uint32_t h = 0; h < noOfBlocks && block_y <
>>> maxBlockRows; h++, block_y++)
>>> +
>>> + for (uint32_t h = 0; h < noOfBlocks && block_y <
>>> m_sliceMaxBlockRow[sliceId + 1]; h++, block_y++)
>>> {
>>> uint32_t idx = block_x + (block_y * maxBlockCols);
>>>
>>> @@ -1497,10 +1515,8 @@
>>> int shift = 2 * (m_param->maxCUDepth - depth);
>>> int cuSize = m_param->maxCUSize >> depth;
>>>
>>> - if (cuSize == 8)
>>> - curRow.rowStats.intra8x8Cnt +=
>>> (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN);
>>> - else
>>> - curRow.rowStats.intra8x8Cnt +=
>>> (int)(frameLog.cntIntra[depth] << shift);
>>> + curRow.rowStats.intra8x8Cnt += (cuSize == 8) ?
>>> (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN) :
>>> +
>>> (int)(frameLog.cntIntra[depth] << shift);
>>>
>>> curRow.rowStats.inter8x8Cnt +=
>>> (int)(frameLog.cntInter[depth] << shift);
>>> curRow.rowStats.skip8x8Cnt +=
>>> (int)((frameLog.cntSkipCu[depth] + frameLog.cntMergeCu[depth]) <<
>>> shift);
>>> @@ -1530,12 +1546,13 @@
>>> if (bIsVbv)
>>> {
>>> // Update encoded bits, satdCost, baseQP for each CU if
>>> tune grain is disabled
>>> - if ((m_param->bEnableWavefront && (!cuAddr ||
>>> !m_param->rc.bEnableConstVbv)) || !m_param->bEnableWavefront)
>>> + FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr];
>>> + if ((m_param->bEnableWavefront && ((cuAddr ==
>>> m_sliceBaseRow[sliceId] * numCols) || !m_param->rc.bEnableConstVbv)) ||
>>> !m_param->bEnableWavefront)
>>> {
>>> - curEncData.m_rowStat[row].rowSatd +=
>>> curEncData.m_cuStat[cuAddr].vbvCost;
>>> - curEncData.m_rowStat[row].rowIntraSatd +=
>>> curEncData.m_cuStat[cuAddr].intraVbvCost;
>>> - curEncData.m_rowStat[row].encodedBits +=
>>> curEncData.m_cuStat[cuAddr].totalBits;
>>> - curEncData.m_rowStat[row].sumQpRc +=
>>> curEncData.m_cuStat[cuAddr].baseQp;
>>> + curEncData.m_rowStat[row].rowSatd += cuStat.vbvCost;
>>> + curEncData.m_rowStat[row].rowIntraSatd +=
>>> cuStat.intraVbvCost;
>>> + curEncData.m_rowStat[row].encodedBits +=
>>> cuStat.totalBits;
>>> + curEncData.m_rowStat[row].sumQpRc += cuStat.baseQp;
>>> curEncData.m_rowStat[row].numEncodedCUs = cuAddr;
>>> }
>>>
>>> @@ -1543,7 +1560,7 @@
>>> if (!m_param->bEnableWavefront && col == numCols - 1)
>>> {
>>> double qpBase = curEncData.m_cuStat[cuAddr].baseQp;
>>> - int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame,
>>> row, &m_rce, qpBase);
>>> + int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame,
>>> row, &m_rce, qpBase, m_sliceBaseRow, sliceId);
>>> qpBase = x265_clip3((double)m_param->rc.qpMin,
>>> (double)m_param->rc.qpMax, qpBase);
>>> curEncData.m_rowStat[row].rowQp = qpBase;
>>> curEncData.m_rowStat[row].rowQpScale =
>>> x265_qp2qScale(qpBase);
>>> @@ -1569,15 +1586,16 @@
>>> }
>>> }
>>> // If current block is at row diagonal checkpoint, call vbv
>>> ratecontrol.
>>> - else if (m_param->bEnableWavefront && row == col && row)
>>> + else if (m_param->bEnableWavefront && rowInSlice == col &&
>>> !bFirstRowInSlice)
>>> {
>>> if (m_param->rc.bEnableConstVbv)
>>> {
>>> - int32_t startCuAddr = numCols * row;
>>> - int32_t EndCuAddr = startCuAddr + col;
>>> - for (int32_t r = row; r >= 0; r--)
>>> + uint32_t startCuAddr = numCols * row;
>>> + uint32_t EndCuAddr = startCuAddr + col;
>>> +
>>> + for (int32_t r = row; r >=
>>> (int32_t)m_sliceBaseRow[sliceId]; r--)
>>> {
>>> - for (int32_t c = startCuAddr; c <= EndCuAddr &&
>>> c <= (int32_t)numCols * (r + 1) - 1; c++)
>>> + for (uint32_t c = startCuAddr; c <= EndCuAddr
>>> && c <= numCols * (r + 1) - 1; c++)
>>> {
>>> curEncData.m_rowStat[r].rowSatd +=
>>> curEncData.m_cuStat[c].vbvCost;
>>> curEncData.m_rowStat[r].rowIntraSatd +=
>>> curEncData.m_cuStat[c].intraVbvCost;
>>> @@ -1590,10 +1608,10 @@
>>> }
>>> }
>>> double qpBase = curEncData.m_cuStat[cuAddr].baseQp;
>>> - int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame,
>>> row, &m_rce, qpBase);
>>> + int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame,
>>> row, &m_rce, qpBase, m_sliceBaseRow, sliceId);
>>> qpBase = x265_clip3((double)m_param->rc.qpMin,
>>> (double)m_param->rc.qpMax, qpBase);
>>> curEncData.m_rowStat[row].rowQp = qpBase;
>>> - curEncData.m_rowStat[row].rowQpScale =
>>> x265_qp2qScale(qpBase);
>>> + curEncData.m_rowStat[row].rowQpScale =
>>> x265_qp2qScale(qpBase);
>>>
>>> if (reEncode < 0)
>>> {
>>> @@ -1604,7 +1622,7 @@
>>> m_vbvResetTriggerRow = row;
>>> m_bAllRowsStop = true;
>>>
>>> - for (uint32_t r = m_numRows - 1; r >= row; r--)
>>> + for (uint32_t r = m_sliceBaseRow[sliceId + 1] - 1;
>>> r >= row; r--)
>>> {
>>> CTURow& stopRow = m_rows[r];
>>>
>>> @@ -1686,11 +1704,11 @@
>>> /* this row of CTUs has been compressed */
>>> if (m_param->bEnableWavefront && m_param->rc.bEnableConstVbv)
>>> {
>>> - if (row == m_numRows - 1)
>>> + if (bLastRowInSlice)
>>> {
>>> - for (int32_t r = 0; r < (int32_t)m_numRows; r++)
>>> + for (uint32_t r = m_sliceBaseRow[sliceId]; r <
>>> m_sliceBaseRow[sliceId + 1]; r++)
>>> {
>>> - for (int32_t c = curEncData.m_rowStat[r].numEncodedCUs
>>> + 1; c < (int32_t)numCols * (r + 1); c++)
>>> + for (uint32_t c = curEncData.m_rowStat[r].numEncodedCUs
>>> + 1; c < numCols * (r + 1); c++)
>>> {
>>> curEncData.m_rowStat[r].rowSatd +=
>>> curEncData.m_cuStat[c].vbvCost;
>>> curEncData.m_rowStat[r].rowIntraSatd +=
>>> curEncData.m_cuStat[c].intraVbvCost;
>>> @@ -1708,26 +1726,41 @@
>>> * after half the frame is encoded, but after this initial period
>>> we update
>>> * after refLagRows (the number of rows reference frames must have
>>> completed
>>> * before referencees may begin encoding) */
>>> - uint32_t rowCount = 0;
>>> if (m_param->rc.rateControlMode == X265_RC_ABR || bIsVbv)
>>> {
>>> + uint32_t rowCount = 0;
>>> + uint32_t maxRows = m_sliceBaseRow[sliceId + 1] -
>>> m_sliceBaseRow[sliceId];
>>> if (!m_rce.encodeOrder)
>>> - rowCount = m_numRows - 1;
>>> + rowCount = maxRows - 1;
>>> else if ((uint32_t)m_rce.encodeOrder <= 2 * (m_param->fpsNum /
>>> m_param->fpsDenom))
>>> - rowCount = X265_MIN((m_numRows + 1) / 2, m_numRows - 1);
>>> + rowCount = X265_MIN((maxRows + 1) / 2, maxRows - 1);
>>> else
>>> - rowCount = X265_MIN(m_refLagRows, m_numRows - 1);
>>> - if (row == rowCount)
>>> + rowCount = X265_MIN(m_refLagRows, maxRows - 1);
>>> +
>>> + if (rowInSlice == rowCount / m_param->maxSlices)
>>> {
>>> - m_rce.rowTotalBits = 0;
>>> + m_rowSliceTotalBits[sliceId] = 0;
>>> if (bIsVbv)
>>> - for (uint32_t i = 0; i < rowCount; i++)
>>> - m_rce.rowTotalBits += curEncData.m_rowStat[i].encode
>>> dBits;
>>> + {
>>> + for (uint32_t i = m_sliceBaseRow[sliceId]; i <
>>> (rowCount / m_param->maxSlices) + m_sliceBaseRow[sliceId]; i++)
>>> + m_rowSliceTotalBits[sliceId] +=
>>> curEncData.m_rowStat[i].encodedBits;
>>> + }
>>> else
>>> - for (uint32_t cuAddr = 0; cuAddr < rowCount * numCols;
>>> cuAddr++)
>>> - m_rce.rowTotalBits += curEncData.m_cuStat[cuAddr].to
>>> talBits;
>>> + {
>>> + uint32_t startAddr = rowCount * numCols * sliceId;
>>> + uint32_t finishAddr = startAddr + rowCount * numCols;
>>> +
>>> + for (uint32_t cuAddr = startAddr; cuAddr < finishAddr;
>>> cuAddr++)
>>> + m_rowSliceTotalBits[sliceId] +=
>>> curEncData.m_cuStat[cuAddr].totalBits;
>>> + }
>>>
>>> - m_top->m_rateControl->rateControlUpdateStats(&m_rce);
>>> + if (ATOMIC_INC(&m_sliceCnt) == (int)m_param->maxSlices)
>>> + {
>>> + m_rce.rowTotalBits = 0;
>>> + for (uint32_t i = 0; i < m_param->maxSlices; i++)
>>> + m_rce.rowTotalBits += m_rowSliceTotalBits[i];
>>> + m_top->m_rateControl->rateControlUpdateStats(&m_rce);
>>> + }
>>> }
>>> }
>>>
>>> diff -r 71f700844b0b -r 546387e0b983 source/encoder/frameencoder.h
>>> --- a/source/encoder/frameencoder.h Tue Sep 12 18:13:03 2017 +0530
>>> +++ b/source/encoder/frameencoder.h Thu Sep 21 20:20:52 2017 +0530
>>> @@ -138,6 +138,7 @@
>>> volatile bool m_bAllRowsStop;
>>> volatile int m_completionCount;
>>> volatile int m_vbvResetTriggerRow;
>>> + volatile int m_sliceCnt;
>>>
>>> uint32_t m_numRows;
>>> uint32_t m_numCols;
>>> @@ -147,8 +148,10 @@
>>>
>>> CTURow* m_rows;
>>> uint16_t m_sliceAddrBits;
>>> - uint16_t m_sliceGroupSize;
>>> - uint32_t* m_sliceBaseRow;
>>> + uint32_t m_sliceGroupSize;
>>> + uint32_t* m_sliceBaseRow;
>>> + uint32_t* m_sliceMaxBlockRow;
>>> + int64_t m_rowSliceTotalBits[2];
>>> RateControlEntry m_rce;
>>> SEIDecodedPictureHash m_seiReconPictureDigest;
>>>
>>> diff -r 71f700844b0b -r 546387e0b983 source/encoder/ratecontrol.cpp
>>> --- a/source/encoder/ratecontrol.cpp Tue Sep 12 18:13:03 2017 +0530
>>> +++ b/source/encoder/ratecontrol.cpp Thu Sep 21 20:20:52 2017 +0530
>>> @@ -732,7 +732,6 @@
>>> m_bitrate = m_param->rc.bitrate * 1000;
>>> }
>>>
>>> -
>>> void RateControl::initHRD(SPS& sps)
>>> {
>>> int vbvBufferSize = m_param->rc.vbvBufferSize * 1000;
>>> @@ -765,6 +764,7 @@
>>>
>>> #undef MAX_DURATION
>>> }
>>> +
>>> bool RateControl::analyseABR2Pass(uint64_t allAvailableBits)
>>> {
>>> double rateFactor, stepMult;
>>> @@ -1473,6 +1473,7 @@
>>>
>>> return q;
>>> }
>>> +
>>> double RateControl::countExpectedBits(int startPos, int endPos)
>>> {
>>> double expectedBits = 0;
>>> @@ -1484,6 +1485,7 @@
>>> }
>>> return expectedBits;
>>> }
>>> +
>>> bool RateControl::findUnderflow(double *fills, int *t0, int *t1, int
>>> over, int endPos)
>>> {
>>> /* find an interval ending on an overflow or underflow (depending
>>> on whether
>>> @@ -1531,6 +1533,7 @@
>>> }
>>> return adjusted;
>>> }
>>> +
>>> bool RateControl::cuTreeReadFor2Pass(Frame* frame)
>>> {
>>> int index = m_encOrder[frame->m_poc];
>>> @@ -1579,24 +1582,24 @@
>>> double RateControl::tuneAbrQScaleFromFeedback(double qScale)
>>> {
>>> double abrBuffer = 2 * m_rateTolerance * m_bitrate;
>>> - /* use framesDone instead of POC as poc count is not serial
>>> with bframes enabled */
>>> - double overflow = 1.0;
>>> - double timeDone = (double)(m_framesDone -
>>> m_param->frameNumThreads + 1) * m_frameDuration;
>>> - double wantedBits = timeDone * m_bitrate;
>>> - int64_t encodedBits = m_totalBits;
>>> - if (m_param->totalFrames && m_param->totalFrames <= 2 * m_fps)
>>> - {
>>> - abrBuffer = m_param->totalFrames * (m_bitrate / m_fps);
>>> - encodedBits = m_encodedBits;
>>> - }
>>> + /* use framesDone instead of POC as poc count is not serial with
>>> bframes enabled */
>>> + double overflow = 1.0;
>>> + double timeDone = (double)(m_framesDone - m_param->frameNumThreads
>>> + 1) * m_frameDuration;
>>> + double wantedBits = timeDone * m_bitrate;
>>> + int64_t encodedBits = m_totalBits;
>>> + if (m_param->totalFrames && m_param->totalFrames <= 2 * m_fps)
>>> + {
>>> + abrBuffer = m_param->totalFrames * (m_bitrate / m_fps);
>>> + encodedBits = m_encodedBits;
>>> + }
>>>
>>> - if (wantedBits > 0 && encodedBits > 0 &&
>>> (!m_partialResidualFrames ||
>>> - m_param->rc.bStrictCbr || m_isGrainEnabled))
>>> - {
>>> - abrBuffer *= X265_MAX(1, sqrt(timeDone));
>>> - overflow = x265_clip3(.5, 2.0, 1.0 + (encodedBits -
>>> wantedBits) / abrBuffer);
>>> - qScale *= overflow;
>>> - }
>>> + if (wantedBits > 0 && encodedBits > 0 && (!m_partialResidualFrames
>>> ||
>>> + m_param->rc.bStrictCbr || m_isGrainEnabled))
>>> + {
>>> + abrBuffer *= X265_MAX(1, sqrt(timeDone));
>>> + overflow = x265_clip3(.5, 2.0, 1.0 + (encodedBits - wantedBits)
>>> / abrBuffer);
>>> + qScale *= overflow;
>>> + }
>>> return qScale;
>>> }
>>>
>>> @@ -2330,17 +2333,18 @@
>>> return totalSatdBits + encodedBitsSoFar;
>>> }
>>>
>>> -int RateControl::rowVbvRateControl(Frame* curFrame, uint32_t row,
>>> RateControlEntry* rce, double& qpVbv)
>>> +int RateControl::rowVbvRateControl(Frame* curFrame, uint32_t row,
>>> RateControlEntry* rce, double& qpVbv, uint32_t* m_sliceBaseRow, uint32_t
>>> sliceId)
>>> {
>>> FrameData& curEncData = *curFrame->m_encData;
>>> double qScaleVbv = x265_qp2qScale(qpVbv);
>>> uint64_t rowSatdCost = curEncData.m_rowStat[row].rowSatd;
>>> double encodedBits = curEncData.m_rowStat[row].encodedBits;
>>> + uint32_t rowInSlice = row - m_sliceBaseRow[sliceId];
>>>
>>> - if (m_param->bEnableWavefront && row == 1)
>>> + if (m_param->bEnableWavefront && rowInSlice == 1)
>>> {
>>> - rowSatdCost += curEncData.m_rowStat[0].rowSatd;
>>> - encodedBits += curEncData.m_rowStat[0].encodedBits;
>>> + rowSatdCost += curEncData.m_rowStat[row - 1].rowSatd;
>>> + encodedBits += curEncData.m_rowStat[row - 1].encodedBits;
>>
>> }
>>> rowSatdCost >>= X265_DEPTH - 8;
>>> updatePredictor(rce->rowPred[0], qScaleVbv, (double)rowSatdCost,
>>> encodedBits);
>>> @@ -2350,8 +2354,8 @@
>>> if (qpVbv < refFrame->m_encData->m_rowStat[row].rowQp)
>>> {
>>> uint64_t intraRowSatdCost = curEncData.m_rowStat[row].rowI
>>> ntraSatd;
>>> - if (m_param->bEnableWavefront && row == 1)
>>> - intraRowSatdCost += curEncData.m_rowStat[0].rowInt
>>> raSatd;
>>> + if (m_param->bEnableWavefront && rowInSlice == 1)
>>> + intraRowSatdCost += curEncData.m_rowStat[row -
>>> 1].rowIntraSatd;
>>> intraRowSatdCost >>= X265_DEPTH - 8;
>>> updatePredictor(rce->rowPred[1], qScaleVbv,
>>> (double)intraRowSatdCost, encodedBits);
>>> }
>>> @@ -2376,7 +2380,7 @@
>>> const SPS& sps = *curEncData.m_slice->m_sps;
>>> double maxFrameError = X265_MAX(0.05, 1.0 / sps.numCuInHeight);
>>>
>>> - if (row < sps.numCuInHeight - 1)
>>> + if (row < m_sliceBaseRow[sliceId + 1] - 1)
>>> {
>>> /* More threads means we have to be more cautious in letting
>>> ratecontrol use up extra bits. */
>>> double rcTol = bufferLeftPlanned / m_param->frameNumThreads *
>>> m_rateTolerance;
>>> @@ -2693,8 +2697,8 @@
>>> m_encodedBitsWindow[pos % s_slidingWindowFrames] =
>>> actualBits;
>>> if(rce->sliceType != I_SLICE)
>>> {
>>> - int qp = int (rce->qpaRc + 0.5);
>>> - m_qpToEncodedBits[qp] = m_qpToEncodedBits[qp] == 0 ?
>>> actualBits : (m_qpToEncodedBits[qp] + actualBits) * 0.5;
>>> + int qp = int (rce->qpaRc + 0.5);
>>> + m_qpToEncodedBits[qp] = m_qpToEncodedBits[qp] == 0 ?
>>> actualBits : (m_qpToEncodedBits[qp] + actualBits) * 0.5;
>>> }
>>> curFrame->m_rcData->wantedBitsWindow = m_wantedBitsWindow;
>>> curFrame->m_rcData->cplxrSum = m_cplxrSum;
>>> @@ -2779,7 +2783,8 @@
>>> curFrame->m_encData->m_frameStats.percent8x8Skip * m_ncu)
>>> < 0)
>>> goto writeFailure;
>>> }
>>> - else{
>>> + else
>>> + {
>>> RPS* rpsWriter = &curFrame->m_encData->m_slice->m_rps;
>>> int i, num = rpsWriter->numberOfPictures;
>>> char deltaPOC[128];
>>> diff -r 71f700844b0b -r 546387e0b983 source/encoder/ratecontrol.h
>>> --- a/source/encoder/ratecontrol.h Tue Sep 12 18:13:03 2017 +0530
>>> +++ b/source/encoder/ratecontrol.h Thu Sep 21 20:20:52 2017 +0530
>>> @@ -244,7 +244,7 @@
>>> int rateControlStart(Frame* curFrame, RateControlEntry* rce,
>>> Encoder* enc);
>>> void rateControlUpdateStats(RateControlEntry* rce);
>>> int rateControlEnd(Frame* curFrame, int64_t bits,
>>> RateControlEntry* rce, int *filler);
>>> - int rowVbvRateControl(Frame* curFrame, uint32_t row,
>>> RateControlEntry* rce, double& qpVbv);
>>> + int rowVbvRateControl(Frame* curFrame, uint32_t row,
>>> RateControlEntry* rce, double& qpVbv, uint32_t* m_sliceBaseRow, uint32_t
>>> sliceId);
>>> int rateControlSliceType(int frameNum);
>>> bool cuTreeReadFor2Pass(Frame* curFrame);
>>> void hrdFullness(SEIBufferingPeriod* sei);
>>>
>>
>>
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20170928/ed8a81ee/attachment-0001.html>
More information about the x265-devel
mailing list