[x265] [PATCH] vbv hanging issue; fix for multiple slices
Pradeep Ramachandran
pradeep at multicorewareinc.com
Thu Sep 28 11:29:21 CEST 2017
On Thu, Sep 28, 2017 at 2:16 PM, <ashok at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Ashok Kumar Mishra <ashok at multicorewareinc.com>
> # Date 1506091858 -19800
> # Fri Sep 22 20:20:58 2017 +0530
> # Node ID c838e60c7c6ba0ab07e2d4130a5c2ba22e0b1eea
> # Parent e62b12bd8b4573b15290ebf110e01c8fafce55be
> vbv hanging issue; fix for multiple slices
> When multiple slices are enabled, vbv rate control must take care of
> correct rows in slices, since multiple slices are encoding simultaneously.
>
>
This patch doesn't apply on the current tip of the default branch. Please
fix and resend.
> diff -r e62b12bd8b45 -r c838e60c7c6b source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp Thu Jun 29 13:13:56 2017 +0530
> +++ b/source/encoder/frameencoder.cpp Fri Sep 22 20:20:58 2017 +0530
> @@ -88,6 +88,7 @@
> delete[] m_outStreams;
> delete[] m_backupStreams;
> X265_FREE(m_sliceBaseRow);
> + X265_FREE(m_sliceMaxBlockRow);
> X265_FREE(m_cuGeoms);
> X265_FREE(m_ctuGeomMap);
> X265_FREE(m_substreamSizes);
> @@ -118,6 +119,40 @@
>
> m_sliceBaseRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1);
> ok &= !!m_sliceBaseRow;
> + m_sliceGroupSize = (uint16_t)(m_numRows + m_param->maxSlices - 1) /
> m_param->maxSlices;
> + uint32_t sliceGroupSizeAccu = (m_numRows << 8) / m_param->maxSlices;
> + uint32_t rowSum = sliceGroupSizeAccu;
> + uint32_t sidx = 0;
> + for (uint32_t i = 0; i < m_numRows; i++)
> + {
> + const uint32_t rowRange = (rowSum >> 8);
> + if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))
> + {
> + rowSum += sliceGroupSizeAccu;
> + m_sliceBaseRow[++sidx] = i;
> + }
> + }
> + X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!");
> + m_sliceBaseRow[0] = 0;
> + m_sliceBaseRow[m_param->maxSlices] = m_numRows;
> +
> + m_sliceMaxBlockRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1);
> + ok &= !!m_sliceMaxBlockRow;
> + uint32_t maxBlockRows = (m_param->sourceHeight + (16 - 1)) / 16;
> + sliceGroupSizeAccu = (maxBlockRows << 8) / m_param->maxSlices;
> + rowSum = sliceGroupSizeAccu;
> + sidx = 0;
> + for (uint32_t i = 0; i < maxBlockRows; i++)
> + {
> + const uint32_t rowRange = (rowSum >> 8);
> + if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))
> + {
> + rowSum += sliceGroupSizeAccu;
> + m_sliceMaxBlockRow[++sidx] = i;
> + }
> + }
> + m_sliceMaxBlockRow[0] = 0;
> + m_sliceMaxBlockRow[m_param->maxSlices] = maxBlockRows;
>
> /* determine full motion search range */
> int range = m_param->searchRange; /* fpel search */
> @@ -341,6 +376,8 @@
> m_completionCount = 0;
> m_bAllRowsStop = false;
> m_vbvResetTriggerRow = -1;
> + m_rowSliceTotalBits[0] = 0;
> + m_rowSliceTotalBits[1] = 0;
>
> m_SSDY = m_SSDU = m_SSDV = 0;
> m_ssim = 0;
> @@ -550,28 +587,13 @@
>
> /* reset entropy coders and compute slice id */
> m_entropyCoder.load(m_initSliceContext);
> - const uint32_t sliceGroupSize = (m_numRows + m_param->maxSlices - 1)
> / m_param->maxSlices;
> - const uint32_t sliceGroupSizeAccu = (m_numRows << 8) /
> m_param->maxSlices;
> - m_sliceGroupSize = (uint16_t)sliceGroupSize;
> +
> + for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
> + for (uint32_t row = m_sliceBaseRow[sliceId]; row <
> m_sliceBaseRow[sliceId + 1]; row++)
> + m_rows[row].init(m_initSliceContext, sliceId);
>
> - uint32_t rowSum = sliceGroupSizeAccu;
> - uint32_t sidx = 0;
> - for (uint32_t i = 0; i < m_numRows; i++)
> - {
> - const uint32_t rowRange = (rowSum >> 8);
> -
> - if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))
> - {
> - rowSum += sliceGroupSizeAccu;
> - m_sliceBaseRow[++sidx] = i;
> - }
> -
> - m_rows[i].init(m_initSliceContext, sidx);
> - }
> - X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!");
> -
> - m_sliceBaseRow[0] = 0;
> - m_sliceBaseRow[m_param->maxSlices] = m_numRows;
> + // reset slice counter for rate control update
> + m_sliceCnt = 0;
>
> uint32_t numSubstreams = m_param->bEnableWavefront ?
> slice->m_sps->numCuInHeight : m_param->maxSlices;
> X265_CHECK(m_param->bEnableWavefront || (m_param->maxSlices == 1),
> "Multiple slices without WPP unsupport now!");
> @@ -586,8 +608,10 @@
> m_rows[i].rowGoOnCoder.setBitstream(&m_outStreams[i]);
> }
> else
> + {
> for (uint32_t i = 0; i < numSubstreams; i++)
> m_outStreams[i].resetBits();
> + }
>
> int prevBPSEI = m_rce.encodeOrder ? m_top->m_lastBPSEI : 0;
>
> @@ -697,10 +721,9 @@
> * compressed in a wave-front pattern if WPP is enabled. Row based
> loop
> * filters runs behind the CTU compression and reconstruction */
>
> - for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
> - {
> + for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
> m_rows[m_sliceBaseRow[sliceId]].active = true;
> - }
> +
> if (m_param->bEnableWavefront)
> {
> int i = 0;
> @@ -719,6 +742,7 @@
> }
> }
> }
> +
> if (m_param->bEnableWavefront)
> {
> for (uint32_t rowInSlice = 0; rowInSlice < m_sliceGroupSize;
> rowInSlice++)
> @@ -751,6 +775,7 @@
> m_mref[l][ref].applyWeight(rowIdx,
> m_numRows, sliceEndRow, sliceId);
> }
> }
> +
> enableRowEncoder(m_row_to_idx[row]); /* clear external
> dependency for this row */
> if (!rowInSlice)
> {
> @@ -980,9 +1005,8 @@
> // complete the slice header by writing WPP row-starts
> m_entropyCoder.setBitstream(&m_bs);
> if (slice->m_pps->bEntropyCodingSyncEnabled)
> - {
> m_entropyCoder.codeSliceHeaderWPPEntryPoints(
> &m_substreamSizes[prevSliceRow], (nextSliceRow - prevSliceRow - 1),
> maxStreamSize);
> - }
> +
> m_bs.writeByteAlignment();
>
> m_nalList.serialize(slice->m_nalUnitType, m_bs);
> @@ -1211,17 +1235,21 @@
> int64_t startTime = x265_mdate();
> if (ATOMIC_INC(&m_activeWorkerCount) == 1 && m_stallStartTime)
> m_totalNoWorkerTime += x265_mdate() - m_stallStartTime;
> +
> const uint32_t realRow = m_idx_to_row[row >> 1];
> const uint32_t typeNum = m_idx_to_row[row & 1];
> +
> if (!typeNum)
> processRowEncoder(realRow, m_tld[threadId]);
> else
> {
> m_frameFilter.processRow(realRow);
> +
> // NOTE: Active next row
> if (realRow != m_sliceBaseRow[m_rows[realRow].sliceId + 1] - 1)
> enqueueRowFilter(m_row_to_idx[realRow + 1]);
> }
> +
> if (ATOMIC_DEC(&m_activeWorkerCount) == 0)
> m_stallStartTime = x265_mdate();
>
> @@ -1264,20 +1292,18 @@
> const uint32_t lineStartCUAddr = row * numCols;
> bool bIsVbv = m_param->rc.vbvBufferSize > 0 &&
> m_param->rc.vbvMaxBitrate > 0;
>
> + const uint32_t sliceId = curRow.sliceId;
> uint32_t maxBlockCols = (m_frame->m_fencPic->m_picWidth + (16 - 1))
> / 16;
> - uint32_t maxBlockRows = (m_frame->m_fencPic->m_picHeight + (16 - 1))
> / 16;
> uint32_t noOfBlocks = m_param->maxCUSize / 16;
> const uint32_t bFirstRowInSlice = ((row == 0) || (m_rows[row -
> 1].sliceId != curRow.sliceId)) ? 1 : 0;
> const uint32_t bLastRowInSlice = ((row == m_numRows - 1) ||
> (m_rows[row + 1].sliceId != curRow.sliceId)) ? 1 : 0;
> - const uint32_t sliceId = curRow.sliceId;
> const uint32_t endRowInSlicePlus1 = m_sliceBaseRow[sliceId + 1];
> const uint32_t rowInSlice = row - m_sliceBaseRow[sliceId];
>
> - if (bFirstRowInSlice && !curRow.completed)
> - {
> - // Load SBAC coder context from previous row and initialize row
> state.
> - rowCoder.load(m_initSliceContext);
> - }
> + // Load SBAC coder context from previous row and initialize row state.
> + if (bFirstRowInSlice && !curRow.completed)
> + rowCoder.load(m_initSliceContext);
> +
> // calculate mean QP for consistent deltaQP signalling calculation
> if (m_param->bOptCUDeltaQP)
> {
> @@ -1287,15 +1313,12 @@
> if (m_param->bEnableWavefront || !row)
> {
> double meanQPOff = 0;
> - uint32_t loopIncr, count = 0;
> bool isReferenced = IS_REFERENCED(m_frame);
> double *qpoffs = (isReferenced && m_param->rc.cuTree) ?
> m_frame->m_lowres.qpCuTreeOffset : m_frame->m_lowres.qpAqOffset;
> if (qpoffs)
> {
> - if (m_param->rc.qgSize == 8)
> - loopIncr = 8;
> - else
> - loopIncr = 16;
> + uint32_t loopIncr = (m_param->rc.qgSize == 8) ? 8 :
> 16;
> +
> uint32_t cuYStart = 0, height = m_frame->m_fencPic->m_
> picHeight;
> if (m_param->bEnableWavefront)
> {
> @@ -1305,6 +1328,7 @@
>
> uint32_t qgSize = m_param->rc.qgSize, width =
> m_frame->m_fencPic->m_picWidth;
> uint32_t maxOffsetCols = (m_frame->m_fencPic->m_picWidth
> + (loopIncr - 1)) / loopIncr;
> + uint32_t count = 0;
> for (uint32_t cuY = cuYStart; cuY < height && (cuY <
> m_frame->m_fencPic->m_picHeight); cuY += qgSize)
> {
> for (uint32_t cuX = 0; cuX < width; cuX += qgSize)
> @@ -1336,7 +1360,8 @@
> }
> curRow.avgQPComputed = 1;
> }
> - }
> + }
> +
> // Initialize restrict on MV range in slices
> tld.analysis.m_sliceMinY = -(int16_t)(rowInSlice * m_param->maxCUSize
> * 4) + 3 * 4;
> tld.analysis.m_sliceMaxY = (int16_t)((endRowInSlicePlus1 - 1 - row) *
> (m_param->maxCUSize * 4) - 4 * 4);
> @@ -1364,16 +1389,16 @@
> curRow.bufferedEntropy.copyState(rowCoder);
> curRow.bufferedEntropy.loadContexts(rowCoder);
> }
> - if (!row && m_vbvResetTriggerRow != intRow)
> + if (bFirstRowInSlice && m_vbvResetTriggerRow != intRow)
> {
> curEncData.m_rowStat[row].rowQp = curEncData.m_avgQpRc;
> curEncData.m_rowStat[row].rowQpScale =
> x265_qp2qScale(curEncData.m_avgQpRc);
> }
>
> FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr];
> - if (m_param->bEnableWavefront && row >= col && row &&
> m_vbvResetTriggerRow != intRow)
> + if (m_param->bEnableWavefront && rowInSlice >= col &&
> !bFirstRowInSlice && m_vbvResetTriggerRow != intRow)
> cuStat.baseQp = curEncData.m_cuStat[cuAddr - numCols +
> 1].baseQp;
> - else if (!m_param->bEnableWavefront && row &&
> m_vbvResetTriggerRow != intRow)
> + else if (!m_param->bEnableWavefront && !bFirstRowInSlice &&
> m_vbvResetTriggerRow != intRow)
> cuStat.baseQp = curEncData.m_rowStat[row - 1].rowQp;
> else
> cuStat.baseQp = curEncData.m_rowStat[row].rowQp;
> @@ -1385,7 +1410,8 @@
> {
> cuStat.vbvCost = 0;
> cuStat.intraVbvCost = 0;
> - for (uint32_t h = 0; h < noOfBlocks && block_y <
> maxBlockRows; h++, block_y++)
> +
> + for (uint32_t h = 0; h < noOfBlocks && block_y <
> m_sliceMaxBlockRow[sliceId + 1]; h++, block_y++)
> {
> uint32_t idx = block_x + (block_y * maxBlockCols);
>
> @@ -1433,11 +1459,12 @@
> {
> // NOTE: in VBV mode, we may reencode anytime, so we can't do
> Deblock stage-Horizon and SAO
> if (!bIsVbv)
> - {
> + {
> // Delay one row to avoid intra prediction conflict
> if (m_pool && !bFirstRowInSlice)
> - {
> + {
> int allowCol = col;
> +
> // avoid race condition on last column
> if (rowInSlice >= 2)
> {
> @@ -1446,11 +1473,13 @@
> }
> m_frameFilter.m_parallelFilter[row -
> 1].m_allowedCol.set(allowCol);
> }
> +
> // Last Row may start early
> if (m_pool && bLastRowInSlice)
> {
> // Deblocking last row
> int allowCol = col;
> +
> // avoid race condition on last column
> if (rowInSlice >= 2)
> {
> @@ -1472,6 +1501,7 @@
>
> FrameStats frameLog;
> curEncData.m_rowStat[row].sumQpAq += collectCTUStatistics(*ctu,
> &frameLog);
> +
> // copy number of intra, inter cu per row into frame stats for 2
> pass
> if (m_param->rc.bStatWrite)
> {
> @@ -1485,10 +1515,8 @@
> int shift = 2 * (m_param->maxCUDepth - depth);
> int cuSize = m_param->maxCUSize >> depth;
>
> - if (cuSize == 8)
> - curRow.rowStats.intra8x8Cnt +=
> (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN);
> - else
> - curRow.rowStats.intra8x8Cnt +=
> (int)(frameLog.cntIntra[depth] << shift);
> + curRow.rowStats.intra8x8Cnt += (cuSize == 8) ?
> (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN) :
> +
> (int)(frameLog.cntIntra[depth] << shift);
>
> curRow.rowStats.inter8x8Cnt +=
> (int)(frameLog.cntInter[depth] << shift);
> curRow.rowStats.skip8x8Cnt += (int)((frameLog.cntSkipCu[depth]
> + frameLog.cntMergeCu[depth]) << shift);
> @@ -1518,12 +1546,13 @@
> if (bIsVbv)
> {
> // Update encoded bits, satdCost, baseQP for each CU if tune
> grain is disabled
> - if ((m_param->bEnableWavefront && (!cuAddr ||
> !m_param->rc.bEnableConstVbv)) || !m_param->bEnableWavefront)
> + FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr];
> + if ((m_param->bEnableWavefront && ((cuAddr ==
> m_sliceBaseRow[sliceId] * numCols) || !m_param->rc.bEnableConstVbv)) ||
> !m_param->bEnableWavefront)
> {
> - curEncData.m_rowStat[row].rowSatd +=
> curEncData.m_cuStat[cuAddr].vbvCost;
> - curEncData.m_rowStat[row].rowIntraSatd +=
> curEncData.m_cuStat[cuAddr].intraVbvCost;
> - curEncData.m_rowStat[row].encodedBits +=
> curEncData.m_cuStat[cuAddr].totalBits;
> - curEncData.m_rowStat[row].sumQpRc +=
> curEncData.m_cuStat[cuAddr].baseQp;
> + curEncData.m_rowStat[row].rowSatd += cuStat.vbvCost;
> + curEncData.m_rowStat[row].rowIntraSatd +=
> cuStat.intraVbvCost;
> + curEncData.m_rowStat[row].encodedBits +=
> cuStat.totalBits;
> + curEncData.m_rowStat[row].sumQpRc += cuStat.baseQp;
> curEncData.m_rowStat[row].numEncodedCUs = cuAddr;
> }
>
> @@ -1531,7 +1560,7 @@
> if (!m_param->bEnableWavefront && col == numCols - 1)
> {
> double qpBase = curEncData.m_cuStat[cuAddr].baseQp;
> - int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame,
> row, &m_rce, qpBase);
> + int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame,
> row, &m_rce, qpBase, m_sliceBaseRow, sliceId);
> qpBase = x265_clip3((double)m_param->rc.qpMin,
> (double)m_param->rc.qpMax, qpBase);
> curEncData.m_rowStat[row].rowQp = qpBase;
> curEncData.m_rowStat[row].rowQpScale =
> x265_qp2qScale(qpBase);
> @@ -1557,15 +1586,16 @@
> }
> }
> // If current block is at row diagonal checkpoint, call vbv
> ratecontrol.
> - else if (m_param->bEnableWavefront && row == col && row)
> + else if (m_param->bEnableWavefront && rowInSlice == col &&
> !bFirstRowInSlice)
> {
> if (m_param->rc.bEnableConstVbv)
> {
> - int32_t startCuAddr = numCols * row;
> - int32_t EndCuAddr = startCuAddr + col;
> - for (int32_t r = row; r >= 0; r--)
> + uint32_t startCuAddr = numCols * row;
> + uint32_t EndCuAddr = startCuAddr + col;
> +
> + for (int32_t r = row; r >= (int32_t)m_sliceBaseRow[sliceId];
> r--)
> {
> - for (int32_t c = startCuAddr; c <= EndCuAddr && c
> <= (int32_t)numCols * (r + 1) - 1; c++)
> + for (uint32_t c = startCuAddr; c <= EndCuAddr &&
> c <= numCols * (r + 1) - 1; c++)
> {
> curEncData.m_rowStat[r].rowSatd +=
> curEncData.m_cuStat[c].vbvCost;
> curEncData.m_rowStat[r].rowIntraSatd +=
> curEncData.m_cuStat[c].intraVbvCost;
> @@ -1578,10 +1608,10 @@
> }
> }
> double qpBase = curEncData.m_cuStat[cuAddr].baseQp;
> - int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame,
> row, &m_rce, qpBase);
> + int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame,
> row, &m_rce, qpBase, m_sliceBaseRow, sliceId);
> qpBase = x265_clip3((double)m_param->rc.qpMin,
> (double)m_param->rc.qpMax, qpBase);
> curEncData.m_rowStat[row].rowQp = qpBase;
> - curEncData.m_rowStat[row].rowQpScale =
> x265_qp2qScale(qpBase);
> + curEncData.m_rowStat[row].rowQpScale =
> x265_qp2qScale(qpBase);
>
> if (reEncode < 0)
> {
> @@ -1592,7 +1622,7 @@
> m_vbvResetTriggerRow = row;
> m_bAllRowsStop = true;
>
> - for (uint32_t r = m_numRows - 1; r >= row; r--)
> + for (uint32_t r = m_sliceBaseRow[sliceId + 1] - 1; r
> >= row; r--)
> {
> CTURow& stopRow = m_rows[r];
>
> @@ -1670,14 +1700,15 @@
> return;
> }
> }
> +
> /* this row of CTUs has been compressed */
> if (m_param->bEnableWavefront && m_param->rc.bEnableConstVbv)
> {
> - if (row == m_numRows - 1)
> + if (bLastRowInSlice)
> {
> - for (int32_t r = 0; r < (int32_t)m_numRows; r++)
> + for (uint32_t r = m_sliceBaseRow[sliceId]; r <
> m_sliceBaseRow[sliceId + 1]; r++)
> {
> - for (int32_t c = curEncData.m_rowStat[r].numEncodedCUs +
> 1; c < (int32_t)numCols * (r + 1); c++)
> + for (uint32_t c = curEncData.m_rowStat[r].numEncodedCUs
> + 1; c < numCols * (r + 1); c++)
> {
> curEncData.m_rowStat[r].rowSatd +=
> curEncData.m_cuStat[c].vbvCost;
> curEncData.m_rowStat[r].rowIntraSatd +=
> curEncData.m_cuStat[c].intraVbvCost;
> @@ -1695,26 +1726,41 @@
> * after half the frame is encoded, but after this initial period we
> update
> * after refLagRows (the number of rows reference frames must have
> completed
> * before referencees may begin encoding) */
> - uint32_t rowCount = 0;
> if (m_param->rc.rateControlMode == X265_RC_ABR || bIsVbv)
> {
> + uint32_t rowCount = 0;
> + uint32_t maxRows = m_sliceBaseRow[sliceId + 1] -
> m_sliceBaseRow[sliceId];
> if (!m_rce.encodeOrder)
> - rowCount = m_numRows - 1;
> + rowCount = maxRows - 1;
> else if ((uint32_t)m_rce.encodeOrder <= 2 * (m_param->fpsNum /
> m_param->fpsDenom))
> - rowCount = X265_MIN((m_numRows + 1) / 2, m_numRows - 1);
> + rowCount = X265_MIN((maxRows + 1) / 2, maxRows - 1);
> else
> - rowCount = X265_MIN(m_refLagRows, m_numRows - 1);
> - if (row == rowCount)
> + rowCount = X265_MIN(m_refLagRows /
> m_param->maxSlices, maxRows - 1);
> +
> + if (rowInSlice == rowCount)
> {
> - m_rce.rowTotalBits = 0;
> + m_rowSliceTotalBits[sliceId] = 0;
> if (bIsVbv)
> - for (uint32_t i = 0; i < rowCount; i++)
> - m_rce.rowTotalBits += curEncData.m_rowStat[i].
> encodedBits;
> + {
> + for (uint32_t i = m_sliceBaseRow[sliceId]; i < rowCount +
> m_sliceBaseRow[sliceId]; i++)
> + m_rowSliceTotalBits[sliceId] +=
> curEncData.m_rowStat[i].encodedBits;
> + }
> else
> - for (uint32_t cuAddr = 0; cuAddr < rowCount * numCols;
> cuAddr++)
> - m_rce.rowTotalBits += curEncData.m_cuStat[cuAddr].
> totalBits;
> + {
> + uint32_t startAddr = rowCount * numCols * sliceId;
> + uint32_t finishAddr = startAddr + rowCount * numCols;
> +
> + for (uint32_t cuAddr = startAddr; cuAddr < finishAddr;
> cuAddr++)
> + m_rowSliceTotalBits[sliceId] +=
> curEncData.m_cuStat[cuAddr].totalBits;
> + }
>
> - m_top->m_rateControl->rateControlUpdateStats(&m_rce);
> + if (ATOMIC_INC(&m_sliceCnt) == (int)m_param->maxSlices)
> + {
> + m_rce.rowTotalBits = 0;
> + for (uint32_t i = 0; i < m_param->maxSlices; i++)
> + m_rce.rowTotalBits += m_rowSliceTotalBits[i];
> + m_top->m_rateControl->rateControlUpdateStats(&m_rce);
> + }
> }
> }
>
> @@ -1742,11 +1788,13 @@
> if (rowInSlice >= m_filterRowDelay)
> {
> enableRowFilter(m_row_to_idx[row - m_filterRowDelay]);
> +
> /* NOTE: Activate filter if first row (row 0) */
> if (rowInSlice == m_filterRowDelay)
> enqueueRowFilter(m_row_to_idx[row - m_filterRowDelay]);
> tryWakeOne();
> }
> +
> if (bLastRowInSlice)
> {
> for (uint32_t i = endRowInSlicePlus1 - m_filterRowDelay; i <
> endRowInSlicePlus1; i++)
> diff -r e62b12bd8b45 -r c838e60c7c6b source/encoder/frameencoder.h
> --- a/source/encoder/frameencoder.h Thu Jun 29 13:13:56 2017 +0530
> +++ b/source/encoder/frameencoder.h Fri Sep 22 20:20:58 2017 +0530
> @@ -138,6 +138,7 @@
> volatile bool m_bAllRowsStop;
> volatile int m_completionCount;
> volatile int m_vbvResetTriggerRow;
> + volatile int m_sliceCnt;
>
> uint32_t m_numRows;
> uint32_t m_numCols;
> @@ -147,8 +148,10 @@
>
> CTURow* m_rows;
> uint16_t m_sliceAddrBits;
> - uint16_t m_sliceGroupSize;
> - uint32_t* m_sliceBaseRow;
> + uint32_t m_sliceGroupSize;
> + uint32_t* m_sliceBaseRow;
> + uint32_t* m_sliceMaxBlockRow;
> + int64_t m_rowSliceTotalBits[2];
> RateControlEntry m_rce;
> SEIDecodedPictureHash m_seiReconPictureDigest;
>
> diff -r e62b12bd8b45 -r c838e60c7c6b source/encoder/ratecontrol.cpp
> --- a/source/encoder/ratecontrol.cpp Thu Jun 29 13:13:56 2017 +0530
> +++ b/source/encoder/ratecontrol.cpp Fri Sep 22 20:20:58 2017 +0530
> @@ -732,7 +732,6 @@
> m_bitrate = m_param->rc.bitrate * 1000;
> }
>
> -
> void RateControl::initHRD(SPS& sps)
> {
> int vbvBufferSize = m_param->rc.vbvBufferSize * 1000;
> @@ -765,6 +764,7 @@
>
> #undef MAX_DURATION
> }
> +
> bool RateControl::analyseABR2Pass(uint64_t allAvailableBits)
> {
> double rateFactor, stepMult;
> @@ -1473,6 +1473,7 @@
>
> return q;
> }
> +
> double RateControl::countExpectedBits(int startPos, int endPos)
> {
> double expectedBits = 0;
> @@ -1484,6 +1485,7 @@
> }
> return expectedBits;
> }
> +
> bool RateControl::findUnderflow(double *fills, int *t0, int *t1, int
> over, int endPos)
> {
> /* find an interval ending on an overflow or underflow (depending on
> whether
> @@ -1531,6 +1533,7 @@
> }
> return adjusted;
> }
> +
> bool RateControl::cuTreeReadFor2Pass(Frame* frame)
> {
> int index = m_encOrder[frame->m_poc];
> @@ -1579,24 +1582,24 @@
> double RateControl::tuneAbrQScaleFromFeedback(double qScale)
> {
> double abrBuffer = 2 * m_rateTolerance * m_bitrate;
> - /* use framesDone instead of POC as poc count is not serial with
> bframes enabled */
> - double overflow = 1.0;
> - double timeDone = (double)(m_framesDone -
> m_param->frameNumThreads + 1) * m_frameDuration;
> - double wantedBits = timeDone * m_bitrate;
> - int64_t encodedBits = m_totalBits;
> - if (m_param->totalFrames && m_param->totalFrames <= 2 * m_fps)
> - {
> - abrBuffer = m_param->totalFrames * (m_bitrate / m_fps);
> - encodedBits = m_encodedBits;
> - }
> + /* use framesDone instead of POC as poc count is not serial with
> bframes enabled */
> + double overflow = 1.0;
> + double timeDone = (double)(m_framesDone - m_param->frameNumThreads +
> 1) * m_frameDuration;
> + double wantedBits = timeDone * m_bitrate;
> + int64_t encodedBits = m_totalBits;
> + if (m_param->totalFrames && m_param->totalFrames <= 2 * m_fps)
> + {
> + abrBuffer = m_param->totalFrames * (m_bitrate / m_fps);
> + encodedBits = m_encodedBits;
> + }
>
> - if (wantedBits > 0 && encodedBits > 0 &&
> (!m_partialResidualFrames ||
> - m_param->rc.bStrictCbr || m_isGrainEnabled))
> - {
> - abrBuffer *= X265_MAX(1, sqrt(timeDone));
> - overflow = x265_clip3(.5, 2.0, 1.0 + (encodedBits -
> wantedBits) / abrBuffer);
> - qScale *= overflow;
> - }
> + if (wantedBits > 0 && encodedBits > 0 && (!m_partialResidualFrames ||
> + m_param->rc.bStrictCbr || m_isGrainEnabled))
> + {
> + abrBuffer *= X265_MAX(1, sqrt(timeDone));
> + overflow = x265_clip3(.5, 2.0, 1.0 + (encodedBits - wantedBits) /
> abrBuffer);
> + qScale *= overflow;
> + }
> return qScale;
> }
>
> @@ -2330,17 +2333,18 @@
> return totalSatdBits + encodedBitsSoFar;
> }
>
> -int RateControl::rowVbvRateControl(Frame* curFrame, uint32_t row,
> RateControlEntry* rce, double& qpVbv)
> +int RateControl::rowVbvRateControl(Frame* curFrame, uint32_t row,
> RateControlEntry* rce, double& qpVbv, uint32_t* m_sliceBaseRow, uint32_t
> sliceId)
> {
> FrameData& curEncData = *curFrame->m_encData;
> double qScaleVbv = x265_qp2qScale(qpVbv);
> uint64_t rowSatdCost = curEncData.m_rowStat[row].rowSatd;
> double encodedBits = curEncData.m_rowStat[row].encodedBits;
> + uint32_t rowInSlice = row - m_sliceBaseRow[sliceId];
>
> - if (m_param->bEnableWavefront && row == 1)
> + if (m_param->bEnableWavefront && rowInSlice == 1)
> {
> - rowSatdCost += curEncData.m_rowStat[0].rowSatd;
> - encodedBits += curEncData.m_rowStat[0].encodedBits;
> + rowSatdCost += curEncData.m_rowStat[row - 1].rowSatd;
> + encodedBits += curEncData.m_rowStat[row - 1].encodedBits;
> }
> rowSatdCost >>= X265_DEPTH - 8;
> updatePredictor(rce->rowPred[0], qScaleVbv, (double)rowSatdCost,
> encodedBits);
> @@ -2350,8 +2354,8 @@
> if (qpVbv < refFrame->m_encData->m_rowStat[row].rowQp)
> {
> uint64_t intraRowSatdCost = curEncData.m_rowStat[row].
> rowIntraSatd;
> - if (m_param->bEnableWavefront && row == 1)
> - intraRowSatdCost += curEncData.m_rowStat[0].rowIntraSatd;
> + if (m_param->bEnableWavefront && rowInSlice == 1)
> + intraRowSatdCost += curEncData.m_rowStat[row -
> 1].rowIntraSatd;
> intraRowSatdCost >>= X265_DEPTH - 8;
> updatePredictor(rce->rowPred[1], qScaleVbv,
> (double)intraRowSatdCost, encodedBits);
> }
> @@ -2376,7 +2380,7 @@
> const SPS& sps = *curEncData.m_slice->m_sps;
> double maxFrameError = X265_MAX(0.05, 1.0 / sps.numCuInHeight);
>
> - if (row < sps.numCuInHeight - 1)
> + if (row < m_sliceBaseRow[sliceId + 1] - 1)
> {
> /* More threads means we have to be more cautious in letting
> ratecontrol use up extra bits. */
> double rcTol = bufferLeftPlanned / m_param->frameNumThreads *
> m_rateTolerance;
> @@ -2693,8 +2697,8 @@
> m_encodedBitsWindow[pos % s_slidingWindowFrames] = actualBits;
> if(rce->sliceType != I_SLICE)
> {
> - int qp = int (rce->qpaRc + 0.5);
> - m_qpToEncodedBits[qp] = m_qpToEncodedBits[qp] == 0 ? actualBits
> : (m_qpToEncodedBits[qp] + actualBits) * 0.5;
> + int qp = int (rce->qpaRc + 0.5);
> + m_qpToEncodedBits[qp] = m_qpToEncodedBits[qp] == 0 ?
> actualBits : (m_qpToEncodedBits[qp] + actualBits) * 0.5;
> }
> curFrame->m_rcData->wantedBitsWindow = m_wantedBitsWindow;
> curFrame->m_rcData->cplxrSum = m_cplxrSum;
> @@ -2779,7 +2783,8 @@
> curFrame->m_encData->m_frameStats.percent8x8Skip * m_ncu) <
> 0)
> goto writeFailure;
> }
> - else{
> + else
> + {
> RPS* rpsWriter = &curFrame->m_encData->m_slice->m_rps;
> int i, num = rpsWriter->numberOfPictures;
> char deltaPOC[128];
> diff -r e62b12bd8b45 -r c838e60c7c6b source/encoder/ratecontrol.h
> --- a/source/encoder/ratecontrol.h Thu Jun 29 13:13:56 2017 +0530
> +++ b/source/encoder/ratecontrol.h Fri Sep 22 20:20:58 2017 +0530
> @@ -244,7 +244,7 @@
> int rateControlStart(Frame* curFrame, RateControlEntry* rce,
> Encoder* enc);
> void rateControlUpdateStats(RateControlEntry* rce);
> int rateControlEnd(Frame* curFrame, int64_t bits, RateControlEntry*
> rce, int *filler);
> - int rowVbvRateControl(Frame* curFrame, uint32_t row,
> RateControlEntry* rce, double& qpVbv);
> + int rowVbvRateControl(Frame* curFrame, uint32_t row,
> RateControlEntry* rce, double& qpVbv, uint32_t* m_sliceBaseRow, uint32_t
> sliceId);
> int rateControlSliceType(int frameNum);
> bool cuTreeReadFor2Pass(Frame* curFrame);
> void hrdFullness(SEIBufferingPeriod* sei);
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20170928/3f016a83/attachment-0001.html>
More information about the x265-devel
mailing list