[x265] [PATCH] vbv hanging issue; fix for multiple slices

Ashok Kumar Mishra ashok at multicorewareinc.com
Thu Sep 28 10:45:58 CEST 2017


Kindly ignore this patch. I am sending the updated patch after review.

On Thu, Sep 28, 2017 at 1:27 PM, Ashok Kumar Mishra <
ashok at multicorewareinc.com> wrote:

> Both the patches are same, you can apply on top of my previous two patches.
>
> On Thu, Sep 28, 2017 at 12:07 PM, Pradeep Ramachandran <
> pradeep at multicorewareinc.com> wrote:
>
>> On Tue, Sep 26, 2017 at 6:49 PM, Ashok Kumar Mishra <
>> ashok at multicorewareinc.com> wrote:
>>
>>> Please find the attached patch.
>>>
>>
>> This patch is confusing - is this to be applied on top of the previous
>> patch (which didn't work), or is this a replacement patch (which didn't
>> work either as I can't find the parent).
>>
>>
>>>
>>> On Thu, Sep 21, 2017 at 8:21 PM, <ashok at multicorewareinc.com> wrote:
>>>
>>>> # HG changeset patch
>>>> # User Ashok Kumar Mishra <ashok at multicorewareinc.com>
>>>> # Date 1506005452 -19800
>>>> #      Thu Sep 21 20:20:52 2017 +0530
>>>> # Node ID 546387e0b983ac1d68cda73777b34a122928cd32
>>>> # Parent  71f700844b0b2a9120bfd8a2d1f13e219aa20677
>>>> vbv hanging issue; fix for multiple slices
>>>> When multiple slices are enabled, vbv rate control must take care of
>>>> correct rows in slices, since multiple slices are encoding
>>>> simultaneously.
>>>>
>>>> diff -r 71f700844b0b -r 546387e0b983 source/encoder/frameencoder.cpp
>>>> --- a/source/encoder/frameencoder.cpp   Tue Sep 12 18:13:03 2017 +0530
>>>> +++ b/source/encoder/frameencoder.cpp   Thu Sep 21 20:20:52 2017 +0530
>>>> @@ -88,6 +88,7 @@
>>>>      delete[] m_outStreams;
>>>>      delete[] m_backupStreams;
>>>>      X265_FREE(m_sliceBaseRow);
>>>> +    X265_FREE(m_sliceMaxBlockRow);
>>>>      X265_FREE(m_cuGeoms);
>>>>      X265_FREE(m_ctuGeomMap);
>>>>      X265_FREE(m_substreamSizes);
>>>> @@ -118,6 +119,40 @@
>>>>
>>>>      m_sliceBaseRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1);
>>>>      ok &= !!m_sliceBaseRow;
>>>> +    m_sliceGroupSize = (uint16_t)(m_numRows + m_param->maxSlices - 1)
>>>> / m_param->maxSlices;
>>>> +    uint32_t sliceGroupSizeAccu = (m_numRows << 8) /
>>>> m_param->maxSlices;
>>>> +    uint32_t rowSum = sliceGroupSizeAccu;
>>>> +    uint32_t sidx = 0;
>>>> +    for (uint32_t i = 0; i < m_numRows; i++)
>>>> +    {
>>>> +        const uint32_t rowRange = (rowSum >> 8);
>>>> +        if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))
>>>> +        {
>>>> +            rowSum += sliceGroupSizeAccu;
>>>> +            m_sliceBaseRow[++sidx] = i;
>>>> +        }
>>>> +    }
>>>> +    X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!");
>>>> +    m_sliceBaseRow[0] = 0;
>>>> +    m_sliceBaseRow[m_param->maxSlices] = m_numRows;
>>>> +
>>>> +    m_sliceMaxBlockRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1);
>>>> +    ok &= !!m_sliceMaxBlockRow;
>>>> +    uint32_t maxBlockRows = (m_param->sourceHeight + (16 - 1)) / 16;
>>>> +    sliceGroupSizeAccu = (maxBlockRows << 8) / m_param->maxSlices;
>>>> +    rowSum = sliceGroupSizeAccu;
>>>> +    sidx = 0;
>>>> +    for (uint32_t i = 0; i < maxBlockRows; i++)
>>>> +    {
>>>> +        const uint32_t rowRange = (rowSum >> 8);
>>>> +        if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))
>>>> +        {
>>>> +            rowSum += sliceGroupSizeAccu;
>>>> +            m_sliceMaxBlockRow[++sidx] = i;
>>>> +        }
>>>> +    }
>>>> +    m_sliceMaxBlockRow[0] = 0;
>>>> +    m_sliceMaxBlockRow[m_param->maxSlices] = maxBlockRows;
>>>>
>>>>      /* determine full motion search range */
>>>>      int range  = m_param->searchRange;       /* fpel search */
>>>> @@ -341,6 +376,8 @@
>>>>      m_completionCount = 0;
>>>>      m_bAllRowsStop = false;
>>>>      m_vbvResetTriggerRow = -1;
>>>> +    m_rowSliceTotalBits[0] = 0;
>>>> +    m_rowSliceTotalBits[1] = 0;
>>>>
>>>>      m_SSDY = m_SSDU = m_SSDV = 0;
>>>>      m_ssim = 0;
>>>> @@ -550,28 +587,13 @@
>>>>
>>>>      /* reset entropy coders and compute slice id */
>>>>      m_entropyCoder.load(m_initSliceContext);
>>>> -    const uint32_t sliceGroupSize = (m_numRows + m_param->maxSlices -
>>>> 1) / m_param->maxSlices;
>>>> -    const uint32_t sliceGroupSizeAccu = (m_numRows << 8) /
>>>> m_param->maxSlices;
>>>> -    m_sliceGroupSize = (uint16_t)sliceGroupSize;
>>>> +
>>>> +    for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
>>>> +        for (uint32_t row = m_sliceBaseRow[sliceId]; row <
>>>> m_sliceBaseRow[sliceId + 1]; row++)
>>>> +            m_rows[row].init(m_initSliceContext, sliceId);
>>>>
>>>> -    uint32_t rowSum = sliceGroupSizeAccu;
>>>> -    uint32_t sidx = 0;
>>>> -    for (uint32_t i = 0; i < m_numRows; i++)
>>>> -    {
>>>> -        const uint32_t rowRange = (rowSum >> 8);
>>>> -
>>>> -        if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))
>>>> -        {
>>>> -            rowSum += sliceGroupSizeAccu;
>>>> -            m_sliceBaseRow[++sidx] = i;
>>>> -        }
>>>> -
>>>> -        m_rows[i].init(m_initSliceContext, sidx);
>>>> -    }
>>>> -    X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!");
>>>> -
>>>> -    m_sliceBaseRow[0] = 0;
>>>> -    m_sliceBaseRow[m_param->maxSlices] = m_numRows;
>>>> +    // reset slice counter for rate control update
>>>> +    m_sliceCnt = 0;
>>>>
>>>>      uint32_t numSubstreams = m_param->bEnableWavefront ?
>>>> slice->m_sps->numCuInHeight : m_param->maxSlices;
>>>>      X265_CHECK(m_param->bEnableWavefront || (m_param->maxSlices ==
>>>> 1), "Multiple slices without WPP unsupport now!");
>>>> @@ -586,8 +608,10 @@
>>>>                  m_rows[i].rowGoOnCoder.setBitstream(&m_outStreams[i]);
>>>>      }
>>>>      else
>>>> +    {
>>>>          for (uint32_t i = 0; i < numSubstreams; i++)
>>>>              m_outStreams[i].resetBits();
>>>> +    }
>>>>
>>>>      int prevBPSEI = m_rce.encodeOrder ? m_top->m_lastBPSEI : 0;
>>>>
>>>> @@ -697,10 +721,9 @@
>>>>       * compressed in a wave-front pattern if WPP is enabled. Row based
>>>> loop
>>>>       * filters runs behind the CTU compression and reconstruction */
>>>>
>>>> -    for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
>>>> -    {
>>>> +    for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
>>>>          m_rows[m_sliceBaseRow[sliceId]].active = true;
>>>> -    }
>>>> +
>>>>      if (m_param->bEnableWavefront)
>>>>      {
>>>>          int i = 0;
>>>> @@ -982,9 +1005,8 @@
>>>>              // complete the slice header by writing WPP row-starts
>>>>              m_entropyCoder.setBitstream(&m_bs);
>>>>              if (slice->m_pps->bEntropyCodingSyncEnabled)
>>>> -            {
>>>>                  m_entropyCoder.codeSliceHeade
>>>> rWPPEntryPoints(&m_substreamSizes[prevSliceRow], (nextSliceRow -
>>>> prevSliceRow - 1), maxStreamSize);
>>>> -            }
>>>> +
>>>>              m_bs.writeByteAlignment();
>>>>
>>>>              m_nalList.serialize(slice->m_nalUnitType, m_bs);
>>>> @@ -1270,20 +1292,17 @@
>>>>      const uint32_t lineStartCUAddr = row * numCols;
>>>>      bool bIsVbv = m_param->rc.vbvBufferSize > 0 &&
>>>> m_param->rc.vbvMaxBitrate > 0;
>>>>
>>>> +    const uint32_t sliceId = curRow.sliceId;
>>>>      uint32_t maxBlockCols = (m_frame->m_fencPic->m_picWidth + (16 -
>>>> 1)) / 16;
>>>> -    uint32_t maxBlockRows = (m_frame->m_fencPic->m_picHeight + (16 -
>>>> 1)) / 16;
>>>>      uint32_t noOfBlocks = m_param->maxCUSize / 16;
>>>>      const uint32_t bFirstRowInSlice = ((row == 0) || (m_rows[row -
>>>> 1].sliceId != curRow.sliceId)) ? 1 : 0;
>>>>      const uint32_t bLastRowInSlice = ((row == m_numRows - 1) ||
>>>> (m_rows[row + 1].sliceId != curRow.sliceId)) ? 1 : 0;
>>>> -    const uint32_t sliceId = curRow.sliceId;
>>>>      const uint32_t endRowInSlicePlus1 = m_sliceBaseRow[sliceId + 1];
>>>>      const uint32_t rowInSlice = row - m_sliceBaseRow[sliceId];
>>>>
>>>> -    if (bFirstRowInSlice && !curRow.completed)
>>>> -    {
>>>> -        // Load SBAC coder context from previous row and initialize
>>>> row state.
>>>> -        rowCoder.load(m_initSliceContext);
>>>> -    }
>>>> +    // Load SBAC coder context from previous row and initialize row
>>>> state.
>>>> +    if (bFirstRowInSlice && !curRow.completed)
>>>> +        rowCoder.load(m_initSliceContext);
>>>>
>>>>      // calculate mean QP for consistent deltaQP signalling calculation
>>>>      if (m_param->bOptCUDeltaQP)
>>>> @@ -1294,15 +1313,12 @@
>>>>              if (m_param->bEnableWavefront || !row)
>>>>              {
>>>>                  double meanQPOff = 0;
>>>> -                uint32_t loopIncr, count = 0;
>>>>                  bool isReferenced = IS_REFERENCED(m_frame);
>>>>                  double *qpoffs = (isReferenced && m_param->rc.cuTree)
>>>> ? m_frame->m_lowres.qpCuTreeOffset : m_frame->m_lowres.qpAqOffset;
>>>>                  if (qpoffs)
>>>>                  {
>>>> -                    if (m_param->rc.qgSize == 8)
>>>> -                        loopIncr = 8;
>>>> -                    else
>>>> -                        loopIncr = 16;
>>>> +                    uint32_t loopIncr = (m_param->rc.qgSize == 8) ? 8
>>>> : 16;
>>>> +
>>>>                      uint32_t cuYStart = 0, height =
>>>> m_frame->m_fencPic->m_picHeight;
>>>>                      if (m_param->bEnableWavefront)
>>>>                      {
>>>> @@ -1312,6 +1328,7 @@
>>>>
>>>>                      uint32_t qgSize = m_param->rc.qgSize, width =
>>>> m_frame->m_fencPic->m_picWidth;
>>>>                      uint32_t maxOffsetCols =
>>>> (m_frame->m_fencPic->m_picWidth + (loopIncr - 1)) / loopIncr;
>>>> +                    uint32_t count = 0;
>>>>                      for (uint32_t cuY = cuYStart; cuY < height && (cuY
>>>> < m_frame->m_fencPic->m_picHeight); cuY += qgSize)
>>>>                      {
>>>>                          for (uint32_t cuX = 0; cuX < width; cuX +=
>>>> qgSize)
>>>> @@ -1372,16 +1389,16 @@
>>>>                  curRow.bufferedEntropy.copyState(rowCoder);
>>>>                  curRow.bufferedEntropy.loadContexts(rowCoder);
>>>>              }
>>>> -            if (!row && m_vbvResetTriggerRow != intRow)
>>>> +            if (bFirstRowInSlice && m_vbvResetTriggerRow != intRow)
>>>>              {
>>>>                  curEncData.m_rowStat[row].rowQp =
>>>> curEncData.m_avgQpRc;
>>>>                  curEncData.m_rowStat[row].rowQpScale =
>>>> x265_qp2qScale(curEncData.m_avgQpRc);
>>>>              }
>>>>
>>>>              FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr];
>>>> -            if (m_param->bEnableWavefront && row >= col && row &&
>>>> m_vbvResetTriggerRow != intRow)
>>>> +            if (m_param->bEnableWavefront && rowInSlice >= col &&
>>>> !bFirstRowInSlice && m_vbvResetTriggerRow != intRow)
>>>>                  cuStat.baseQp = curEncData.m_cuStat[cuAddr - numCols +
>>>> 1].baseQp;
>>>> -            else if (!m_param->bEnableWavefront && row &&
>>>> m_vbvResetTriggerRow != intRow)
>>>> +            else if (!m_param->bEnableWavefront && !bFirstRowInSlice
>>>> && m_vbvResetTriggerRow != intRow)
>>>>                  cuStat.baseQp = curEncData.m_rowStat[row - 1].rowQp;
>>>>              else
>>>>                  cuStat.baseQp = curEncData.m_rowStat[row].rowQp;
>>>> @@ -1393,7 +1410,8 @@
>>>>              {
>>>>                  cuStat.vbvCost = 0;
>>>>                  cuStat.intraVbvCost = 0;
>>>> -                for (uint32_t h = 0; h < noOfBlocks && block_y <
>>>> maxBlockRows; h++, block_y++)
>>>> +
>>>> +                for (uint32_t h = 0; h < noOfBlocks && block_y <
>>>> m_sliceMaxBlockRow[sliceId + 1]; h++, block_y++)
>>>>                  {
>>>>                      uint32_t idx = block_x + (block_y * maxBlockCols);
>>>>
>>>> @@ -1497,10 +1515,8 @@
>>>>                  int shift = 2 * (m_param->maxCUDepth - depth);
>>>>                  int cuSize = m_param->maxCUSize >> depth;
>>>>
>>>> -                if (cuSize == 8)
>>>> -                    curRow.rowStats.intra8x8Cnt +=
>>>> (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN);
>>>> -                else
>>>> -                    curRow.rowStats.intra8x8Cnt +=
>>>> (int)(frameLog.cntIntra[depth] << shift);
>>>> +                curRow.rowStats.intra8x8Cnt += (cuSize == 8) ?
>>>> (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN) :
>>>> +
>>>>  (int)(frameLog.cntIntra[depth] << shift);
>>>>
>>>>                  curRow.rowStats.inter8x8Cnt +=
>>>> (int)(frameLog.cntInter[depth] << shift);
>>>>                  curRow.rowStats.skip8x8Cnt +=
>>>> (int)((frameLog.cntSkipCu[depth] + frameLog.cntMergeCu[depth]) <<
>>>> shift);
>>>> @@ -1530,12 +1546,13 @@
>>>>          if (bIsVbv)
>>>>          {
>>>>              // Update encoded bits, satdCost, baseQP for each CU if
>>>> tune grain is disabled
>>>> -            if ((m_param->bEnableWavefront && (!cuAddr ||
>>>> !m_param->rc.bEnableConstVbv)) || !m_param->bEnableWavefront)
>>>> +            FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr];
>>>> +            if ((m_param->bEnableWavefront && ((cuAddr ==
>>>> m_sliceBaseRow[sliceId] * numCols) || !m_param->rc.bEnableConstVbv)) ||
>>>> !m_param->bEnableWavefront)
>>>>              {
>>>> -                curEncData.m_rowStat[row].rowSatd +=
>>>> curEncData.m_cuStat[cuAddr].vbvCost;
>>>> -                curEncData.m_rowStat[row].rowIntraSatd +=
>>>> curEncData.m_cuStat[cuAddr].intraVbvCost;
>>>> -                curEncData.m_rowStat[row].encodedBits +=
>>>> curEncData.m_cuStat[cuAddr].totalBits;
>>>> -                curEncData.m_rowStat[row].sumQpRc +=
>>>> curEncData.m_cuStat[cuAddr].baseQp;
>>>> +                curEncData.m_rowStat[row].rowSatd += cuStat.vbvCost;
>>>> +                curEncData.m_rowStat[row].rowIntraSatd +=
>>>> cuStat.intraVbvCost;
>>>> +                curEncData.m_rowStat[row].encodedBits +=
>>>> cuStat.totalBits;
>>>> +                curEncData.m_rowStat[row].sumQpRc += cuStat.baseQp;
>>>>                  curEncData.m_rowStat[row].numEncodedCUs = cuAddr;
>>>>              }
>>>>
>>>> @@ -1543,7 +1560,7 @@
>>>>              if (!m_param->bEnableWavefront && col == numCols - 1)
>>>>              {
>>>>                  double qpBase = curEncData.m_cuStat[cuAddr].baseQp;
>>>> -                int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame,
>>>> row, &m_rce, qpBase);
>>>> +                int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame,
>>>> row, &m_rce, qpBase, m_sliceBaseRow, sliceId);
>>>>                  qpBase = x265_clip3((double)m_param->rc.qpMin,
>>>> (double)m_param->rc.qpMax, qpBase);
>>>>                  curEncData.m_rowStat[row].rowQp = qpBase;
>>>>                  curEncData.m_rowStat[row].rowQpScale =
>>>> x265_qp2qScale(qpBase);
>>>> @@ -1569,15 +1586,16 @@
>>>>                  }
>>>>              }
>>>>              // If current block is at row diagonal checkpoint, call
>>>> vbv ratecontrol.
>>>> -            else if (m_param->bEnableWavefront && row == col && row)
>>>> +            else if (m_param->bEnableWavefront && rowInSlice == col &&
>>>> !bFirstRowInSlice)
>>>>              {
>>>>                  if (m_param->rc.bEnableConstVbv)
>>>>                  {
>>>> -                    int32_t startCuAddr = numCols * row;
>>>> -                    int32_t EndCuAddr = startCuAddr + col;
>>>> -                    for (int32_t r = row; r >= 0; r--)
>>>> +                    uint32_t startCuAddr = numCols * row;
>>>> +                    uint32_t EndCuAddr = startCuAddr + col;
>>>> +
>>>> +                    for (int32_t r = row; r >=
>>>> (int32_t)m_sliceBaseRow[sliceId]; r--)
>>>>                      {
>>>> -                        for (int32_t c = startCuAddr; c <= EndCuAddr
>>>> && c <= (int32_t)numCols * (r + 1) - 1; c++)
>>>> +                        for (uint32_t c = startCuAddr; c <= EndCuAddr
>>>> && c <= numCols * (r + 1) - 1; c++)
>>>>                          {
>>>>                              curEncData.m_rowStat[r].rowSatd +=
>>>> curEncData.m_cuStat[c].vbvCost;
>>>>                              curEncData.m_rowStat[r].rowIntraSatd +=
>>>> curEncData.m_cuStat[c].intraVbvCost;
>>>> @@ -1590,10 +1608,10 @@
>>>>                      }
>>>>                  }
>>>>                  double qpBase = curEncData.m_cuStat[cuAddr].baseQp;
>>>> -                int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame,
>>>> row, &m_rce, qpBase);
>>>> +                int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame,
>>>> row, &m_rce, qpBase, m_sliceBaseRow, sliceId);
>>>>                  qpBase = x265_clip3((double)m_param->rc.qpMin,
>>>> (double)m_param->rc.qpMax, qpBase);
>>>>                  curEncData.m_rowStat[row].rowQp = qpBase;
>>>> -                curEncData.m_rowStat[row].rowQpScale =
>>>> x265_qp2qScale(qpBase);
>>>> +                curEncData.m_rowStat[row].rowQpScale =
>>>> x265_qp2qScale(qpBase);
>>>>
>>>>                  if (reEncode < 0)
>>>>                  {
>>>> @@ -1604,7 +1622,7 @@
>>>>                      m_vbvResetTriggerRow = row;
>>>>                      m_bAllRowsStop = true;
>>>>
>>>> -                    for (uint32_t r = m_numRows - 1; r >= row; r--)
>>>> +                    for (uint32_t r = m_sliceBaseRow[sliceId + 1] - 1;
>>>> r >= row; r--)
>>>>                      {
>>>>                          CTURow& stopRow = m_rows[r];
>>>>
>>>> @@ -1686,11 +1704,11 @@
>>>>      /* this row of CTUs has been compressed */
>>>>      if (m_param->bEnableWavefront && m_param->rc.bEnableConstVbv)
>>>>      {
>>>> -        if (row == m_numRows - 1)
>>>> +        if (bLastRowInSlice)
>>>>          {
>>>> -            for (int32_t r = 0; r < (int32_t)m_numRows; r++)
>>>> +            for (uint32_t r = m_sliceBaseRow[sliceId]; r <
>>>> m_sliceBaseRow[sliceId + 1]; r++)
>>>>              {
>>>> -                for (int32_t c = curEncData.m_rowStat[r].numEncodedCUs
>>>> + 1; c < (int32_t)numCols * (r + 1); c++)
>>>> +                for (uint32_t c = curEncData.m_rowStat[r].numEncodedCUs
>>>> + 1; c < numCols * (r + 1); c++)
>>>>                  {
>>>>                      curEncData.m_rowStat[r].rowSatd +=
>>>> curEncData.m_cuStat[c].vbvCost;
>>>>                      curEncData.m_rowStat[r].rowIntraSatd +=
>>>> curEncData.m_cuStat[c].intraVbvCost;
>>>> @@ -1708,26 +1726,41 @@
>>>>       * after half the frame is encoded, but after this initial period
>>>> we update
>>>>       * after refLagRows (the number of rows reference frames must have
>>>> completed
>>>>       * before referencees may begin encoding) */
>>>> -    uint32_t rowCount = 0;
>>>>      if (m_param->rc.rateControlMode == X265_RC_ABR || bIsVbv)
>>>>      {
>>>> +        uint32_t rowCount = 0;
>>>> +        uint32_t maxRows = m_sliceBaseRow[sliceId + 1] -
>>>> m_sliceBaseRow[sliceId];
>>>>          if (!m_rce.encodeOrder)
>>>> -            rowCount = m_numRows - 1;
>>>> +            rowCount = maxRows - 1;
>>>>          else if ((uint32_t)m_rce.encodeOrder <= 2 * (m_param->fpsNum /
>>>> m_param->fpsDenom))
>>>> -            rowCount = X265_MIN((m_numRows + 1) / 2, m_numRows - 1);
>>>> +            rowCount = X265_MIN((maxRows + 1) / 2, maxRows - 1);
>>>>          else
>>>> -            rowCount = X265_MIN(m_refLagRows, m_numRows - 1);
>>>> -        if (row == rowCount)
>>>> +            rowCount = X265_MIN(m_refLagRows, maxRows - 1);
>>>> +
>>>> +        if (rowInSlice == rowCount / m_param->maxSlices)
>>>>          {
>>>> -            m_rce.rowTotalBits = 0;
>>>> +            m_rowSliceTotalBits[sliceId] = 0;
>>>>              if (bIsVbv)
>>>> -                for (uint32_t i = 0; i < rowCount; i++)
>>>> -                    m_rce.rowTotalBits +=
>>>> curEncData.m_rowStat[i].encodedBits;
>>>> +            {
>>>> +                for (uint32_t i = m_sliceBaseRow[sliceId]; i <
>>>> (rowCount / m_param->maxSlices) + m_sliceBaseRow[sliceId]; i++)
>>>> +                    m_rowSliceTotalBits[sliceId] +=
>>>> curEncData.m_rowStat[i].encodedBits;
>>>> +            }
>>>>              else
>>>> -                for (uint32_t cuAddr = 0; cuAddr < rowCount * numCols;
>>>> cuAddr++)
>>>> -                    m_rce.rowTotalBits +=
>>>> curEncData.m_cuStat[cuAddr].totalBits;
>>>> +            {
>>>> +                uint32_t startAddr = rowCount * numCols * sliceId;
>>>> +                uint32_t finishAddr = startAddr + rowCount * numCols;
>>>> +
>>>> +                for (uint32_t cuAddr = startAddr; cuAddr < finishAddr;
>>>> cuAddr++)
>>>> +                    m_rowSliceTotalBits[sliceId] +=
>>>> curEncData.m_cuStat[cuAddr].totalBits;
>>>> +            }
>>>>
>>>> -            m_top->m_rateControl->rateControlUpdateStats(&m_rce);
>>>> +            if (ATOMIC_INC(&m_sliceCnt) == (int)m_param->maxSlices)
>>>> +            {
>>>> +                m_rce.rowTotalBits = 0;
>>>> +                for (uint32_t i = 0; i < m_param->maxSlices; i++)
>>>> +                    m_rce.rowTotalBits += m_rowSliceTotalBits[i];
>>>> +                m_top->m_rateControl->rateControlUpdateStats(&m_rce);
>>>> +            }
>>>>          }
>>>>      }
>>>>
>>>> diff -r 71f700844b0b -r 546387e0b983 source/encoder/frameencoder.h
>>>> --- a/source/encoder/frameencoder.h     Tue Sep 12 18:13:03 2017 +0530
>>>> +++ b/source/encoder/frameencoder.h     Thu Sep 21 20:20:52 2017 +0530
>>>> @@ -138,6 +138,7 @@
>>>>      volatile bool            m_bAllRowsStop;
>>>>      volatile int             m_completionCount;
>>>>      volatile int             m_vbvResetTriggerRow;
>>>> +    volatile int             m_sliceCnt;
>>>>
>>>>      uint32_t                 m_numRows;
>>>>      uint32_t                 m_numCols;
>>>> @@ -147,8 +148,10 @@
>>>>
>>>>      CTURow*                  m_rows;
>>>>      uint16_t                 m_sliceAddrBits;
>>>> -    uint16_t                 m_sliceGroupSize;
>>>> -    uint32_t*                m_sliceBaseRow;
>>>> +    uint32_t                 m_sliceGroupSize;
>>>> +    uint32_t*                m_sliceBaseRow;
>>>> +    uint32_t*                m_sliceMaxBlockRow;
>>>> +    int64_t                  m_rowSliceTotalBits[2];
>>>>      RateControlEntry         m_rce;
>>>>      SEIDecodedPictureHash    m_seiReconPictureDigest;
>>>>
>>>> diff -r 71f700844b0b -r 546387e0b983 source/encoder/ratecontrol.cpp
>>>> --- a/source/encoder/ratecontrol.cpp    Tue Sep 12 18:13:03 2017 +0530
>>>> +++ b/source/encoder/ratecontrol.cpp    Thu Sep 21 20:20:52 2017 +0530
>>>> @@ -732,7 +732,6 @@
>>>>      m_bitrate = m_param->rc.bitrate * 1000;
>>>>  }
>>>>
>>>> -
>>>>  void RateControl::initHRD(SPS& sps)
>>>>  {
>>>>      int vbvBufferSize = m_param->rc.vbvBufferSize * 1000;
>>>> @@ -765,6 +764,7 @@
>>>>
>>>>      #undef MAX_DURATION
>>>>  }
>>>> +
>>>>  bool RateControl::analyseABR2Pass(uint64_t allAvailableBits)
>>>>  {
>>>>      double rateFactor, stepMult;
>>>> @@ -1473,6 +1473,7 @@
>>>>
>>>>      return q;
>>>>  }
>>>> +
>>>>  double RateControl::countExpectedBits(int startPos, int endPos)
>>>>  {
>>>>      double expectedBits = 0;
>>>> @@ -1484,6 +1485,7 @@
>>>>      }
>>>>      return expectedBits;
>>>>  }
>>>> +
>>>>  bool RateControl::findUnderflow(double *fills, int *t0, int *t1, int
>>>> over, int endPos)
>>>>  {
>>>>      /* find an interval ending on an overflow or underflow (depending
>>>> on whether
>>>> @@ -1531,6 +1533,7 @@
>>>>      }
>>>>      return adjusted;
>>>>  }
>>>> +
>>>>  bool RateControl::cuTreeReadFor2Pass(Frame* frame)
>>>>  {
>>>>      int index = m_encOrder[frame->m_poc];
>>>> @@ -1579,24 +1582,24 @@
>>>>  double RateControl::tuneAbrQScaleFromFeedback(double qScale)
>>>>  {
>>>>      double abrBuffer = 2 * m_rateTolerance * m_bitrate;
>>>> -        /* use framesDone instead of POC as poc count is not serial
>>>> with bframes enabled */
>>>> -        double overflow = 1.0;
>>>> -        double timeDone = (double)(m_framesDone -
>>>> m_param->frameNumThreads + 1) * m_frameDuration;
>>>> -        double wantedBits = timeDone * m_bitrate;
>>>> -        int64_t encodedBits = m_totalBits;
>>>> -        if (m_param->totalFrames && m_param->totalFrames <= 2 * m_fps)
>>>> -        {
>>>> -            abrBuffer = m_param->totalFrames * (m_bitrate / m_fps);
>>>> -            encodedBits = m_encodedBits;
>>>> -        }
>>>> +    /* use framesDone instead of POC as poc count is not serial with
>>>> bframes enabled */
>>>> +    double overflow = 1.0;
>>>> +    double timeDone = (double)(m_framesDone - m_param->frameNumThreads
>>>> + 1) * m_frameDuration;
>>>> +    double wantedBits = timeDone * m_bitrate;
>>>> +    int64_t encodedBits = m_totalBits;
>>>> +    if (m_param->totalFrames && m_param->totalFrames <= 2 * m_fps)
>>>> +    {
>>>> +        abrBuffer = m_param->totalFrames * (m_bitrate / m_fps);
>>>> +        encodedBits = m_encodedBits;
>>>> +    }
>>>>
>>>> -        if (wantedBits > 0 && encodedBits > 0 &&
>>>> (!m_partialResidualFrames ||
>>>> -            m_param->rc.bStrictCbr || m_isGrainEnabled))
>>>> -        {
>>>> -            abrBuffer *= X265_MAX(1, sqrt(timeDone));
>>>> -            overflow = x265_clip3(.5, 2.0, 1.0 + (encodedBits -
>>>> wantedBits) / abrBuffer);
>>>> -            qScale *= overflow;
>>>> -        }
>>>> +    if (wantedBits > 0 && encodedBits > 0 && (!m_partialResidualFrames
>>>> ||
>>>> +        m_param->rc.bStrictCbr || m_isGrainEnabled))
>>>> +    {
>>>> +        abrBuffer *= X265_MAX(1, sqrt(timeDone));
>>>> +        overflow = x265_clip3(.5, 2.0, 1.0 + (encodedBits -
>>>> wantedBits) / abrBuffer);
>>>> +        qScale *= overflow;
>>>> +    }
>>>>      return qScale;
>>>>  }
>>>>
>>>> @@ -2330,17 +2333,18 @@
>>>>      return totalSatdBits + encodedBitsSoFar;
>>>>  }
>>>>
>>>> -int RateControl::rowVbvRateControl(Frame* curFrame, uint32_t row,
>>>> RateControlEntry* rce, double& qpVbv)
>>>> +int RateControl::rowVbvRateControl(Frame* curFrame, uint32_t row,
>>>> RateControlEntry* rce, double& qpVbv, uint32_t* m_sliceBaseRow, uint32_t
>>>> sliceId)
>>>>  {
>>>>      FrameData& curEncData = *curFrame->m_encData;
>>>>      double qScaleVbv = x265_qp2qScale(qpVbv);
>>>>      uint64_t rowSatdCost = curEncData.m_rowStat[row].rowSatd;
>>>>      double encodedBits = curEncData.m_rowStat[row].encodedBits;
>>>> +    uint32_t rowInSlice = row - m_sliceBaseRow[sliceId];
>>>>
>>>> -    if (m_param->bEnableWavefront && row == 1)
>>>> +    if (m_param->bEnableWavefront && rowInSlice == 1)
>>>>      {
>>>> -        rowSatdCost += curEncData.m_rowStat[0].rowSatd;
>>>> -        encodedBits += curEncData.m_rowStat[0].encodedBits;
>>>> +        rowSatdCost += curEncData.m_rowStat[row - 1].rowSatd;
>>>> +        encodedBits += curEncData.m_rowStat[row - 1].encodedBits;
>>>
>>>      }
>>>>      rowSatdCost >>= X265_DEPTH - 8;
>>>>      updatePredictor(rce->rowPred[0], qScaleVbv, (double)rowSatdCost,
>>>> encodedBits);
>>>> @@ -2350,8 +2354,8 @@
>>>>          if (qpVbv < refFrame->m_encData->m_rowStat[row].rowQp)
>>>>          {
>>>>              uint64_t intraRowSatdCost = curEncData.m_rowStat[row].rowI
>>>> ntraSatd;
>>>> -            if (m_param->bEnableWavefront && row == 1)
>>>> -                intraRowSatdCost += curEncData.m_rowStat[0].rowInt
>>>> raSatd;
>>>> +            if (m_param->bEnableWavefront && rowInSlice == 1)
>>>> +                intraRowSatdCost += curEncData.m_rowStat[row -
>>>> 1].rowIntraSatd;
>>>>              intraRowSatdCost >>= X265_DEPTH - 8;
>>>>              updatePredictor(rce->rowPred[1], qScaleVbv,
>>>> (double)intraRowSatdCost, encodedBits);
>>>>          }
>>>> @@ -2376,7 +2380,7 @@
>>>>      const SPS& sps = *curEncData.m_slice->m_sps;
>>>>      double maxFrameError = X265_MAX(0.05, 1.0 / sps.numCuInHeight);
>>>>
>>>> -    if (row < sps.numCuInHeight - 1)
>>>> +    if (row < m_sliceBaseRow[sliceId + 1] - 1)
>>>>      {
>>>>          /* More threads means we have to be more cautious in letting
>>>> ratecontrol use up extra bits. */
>>>>          double rcTol = bufferLeftPlanned / m_param->frameNumThreads *
>>>> m_rateTolerance;
>>>> @@ -2693,8 +2697,8 @@
>>>>              m_encodedBitsWindow[pos % s_slidingWindowFrames] =
>>>> actualBits;
>>>>          if(rce->sliceType != I_SLICE)
>>>>          {
>>>> -        int qp = int (rce->qpaRc + 0.5);
>>>> -        m_qpToEncodedBits[qp] =  m_qpToEncodedBits[qp] == 0 ?
>>>> actualBits : (m_qpToEncodedBits[qp] + actualBits) * 0.5;
>>>> +            int qp = int (rce->qpaRc + 0.5);
>>>> +            m_qpToEncodedBits[qp] =  m_qpToEncodedBits[qp] == 0 ?
>>>> actualBits : (m_qpToEncodedBits[qp] + actualBits) * 0.5;
>>>>          }
>>>>          curFrame->m_rcData->wantedBitsWindow = m_wantedBitsWindow;
>>>>          curFrame->m_rcData->cplxrSum = m_cplxrSum;
>>>> @@ -2779,7 +2783,8 @@
>>>>              curFrame->m_encData->m_frameStats.percent8x8Skip  *
>>>> m_ncu) < 0)
>>>>              goto writeFailure;
>>>>      }
>>>> -    else{
>>>> +    else
>>>> +    {
>>>>          RPS* rpsWriter = &curFrame->m_encData->m_slice->m_rps;
>>>>          int i, num = rpsWriter->numberOfPictures;
>>>>          char deltaPOC[128];
>>>> diff -r 71f700844b0b -r 546387e0b983 source/encoder/ratecontrol.h
>>>> --- a/source/encoder/ratecontrol.h      Tue Sep 12 18:13:03 2017 +0530
>>>> +++ b/source/encoder/ratecontrol.h      Thu Sep 21 20:20:52 2017 +0530
>>>> @@ -244,7 +244,7 @@
>>>>      int  rateControlStart(Frame* curFrame, RateControlEntry* rce,
>>>> Encoder* enc);
>>>>      void rateControlUpdateStats(RateControlEntry* rce);
>>>>      int  rateControlEnd(Frame* curFrame, int64_t bits,
>>>> RateControlEntry* rce, int *filler);
>>>> -    int  rowVbvRateControl(Frame* curFrame, uint32_t row,
>>>> RateControlEntry* rce, double& qpVbv);
>>>> +    int  rowVbvRateControl(Frame* curFrame, uint32_t row,
>>>> RateControlEntry* rce, double& qpVbv, uint32_t* m_sliceBaseRow, uint32_t
>>>> sliceId);
>>>>      int  rateControlSliceType(int frameNum);
>>>>      bool cuTreeReadFor2Pass(Frame* curFrame);
>>>>      void hrdFullness(SEIBufferingPeriod* sei);
>>>>
>>>
>>>
>>> _______________________________________________
>>> x265-devel mailing list
>>> x265-devel at videolan.org
>>> https://mailman.videolan.org/listinfo/x265-devel
>>>
>>>
>>
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20170928/3bd7d807/attachment-0001.html>


More information about the x265-devel mailing list