[x265] [PATCH] vbv hanging issue; fix for multiple slices

Ashok Kumar Mishra ashok at multicorewareinc.com
Thu Sep 28 12:26:14 CEST 2017


Yes, sending once again.

On Thu, Sep 28, 2017 at 2:59 PM, Pradeep Ramachandran <
pradeep at multicorewareinc.com> wrote:

>
> On Thu, Sep 28, 2017 at 2:16 PM, <ashok at multicorewareinc.com> wrote:
>
>> # HG changeset patch
>> # User Ashok Kumar Mishra <ashok at multicorewareinc.com>
>> # Date 1506091858 -19800
>> #      Fri Sep 22 20:20:58 2017 +0530
>> # Node ID c838e60c7c6ba0ab07e2d4130a5c2ba22e0b1eea
>> # Parent  e62b12bd8b4573b15290ebf110e01c8fafce55be
>> vbv hanging issue; fix for multiple slices
>> When multiple slices are enabled, vbv rate control must take care of
>> correct rows in slices, since multiple slices are encoding simultaneously.
>>
>>
> This patch doesn't apply on the current tip of the default branch. Please
> fix and resend.
>
>
>> diff -r e62b12bd8b45 -r c838e60c7c6b source/encoder/frameencoder.cpp
>> --- a/source/encoder/frameencoder.cpp   Thu Jun 29 13:13:56 2017 +0530
>> +++ b/source/encoder/frameencoder.cpp   Fri Sep 22 20:20:58 2017 +0530
>> @@ -88,6 +88,7 @@
>>      delete[] m_outStreams;
>>      delete[] m_backupStreams;
>>      X265_FREE(m_sliceBaseRow);
>> +    X265_FREE(m_sliceMaxBlockRow);
>>      X265_FREE(m_cuGeoms);
>>      X265_FREE(m_ctuGeomMap);
>>      X265_FREE(m_substreamSizes);
>> @@ -118,6 +119,40 @@
>>
>>      m_sliceBaseRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1);
>>      ok &= !!m_sliceBaseRow;
>> +    m_sliceGroupSize = (uint16_t)(m_numRows + m_param->maxSlices - 1) /
>> m_param->maxSlices;
>> +    uint32_t sliceGroupSizeAccu = (m_numRows << 8) / m_param->maxSlices;
>> +    uint32_t rowSum = sliceGroupSizeAccu;
>> +    uint32_t sidx = 0;
>> +    for (uint32_t i = 0; i < m_numRows; i++)
>> +    {
>> +        const uint32_t rowRange = (rowSum >> 8);
>> +        if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))
>> +        {
>> +            rowSum += sliceGroupSizeAccu;
>> +            m_sliceBaseRow[++sidx] = i;
>> +        }
>> +    }
>> +    X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!");
>> +    m_sliceBaseRow[0] = 0;
>> +    m_sliceBaseRow[m_param->maxSlices] = m_numRows;
>> +
>> +    m_sliceMaxBlockRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1);
>> +    ok &= !!m_sliceMaxBlockRow;
>> +    uint32_t maxBlockRows = (m_param->sourceHeight + (16 - 1)) / 16;
>> +    sliceGroupSizeAccu = (maxBlockRows << 8) / m_param->maxSlices;
>> +    rowSum = sliceGroupSizeAccu;
>> +    sidx = 0;
>> +    for (uint32_t i = 0; i < maxBlockRows; i++)
>> +    {
>> +        const uint32_t rowRange = (rowSum >> 8);
>> +        if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))
>> +        {
>> +            rowSum += sliceGroupSizeAccu;
>> +            m_sliceMaxBlockRow[++sidx] = i;
>> +        }
>> +    }
>> +    m_sliceMaxBlockRow[0] = 0;
>> +    m_sliceMaxBlockRow[m_param->maxSlices] = maxBlockRows;
>>
>>      /* determine full motion search range */
>>      int range  = m_param->searchRange;       /* fpel search */
>> @@ -341,6 +376,8 @@
>>      m_completionCount = 0;
>>      m_bAllRowsStop = false;
>>      m_vbvResetTriggerRow = -1;
>> +    m_rowSliceTotalBits[0] = 0;
>> +    m_rowSliceTotalBits[1] = 0;
>>
>>      m_SSDY = m_SSDU = m_SSDV = 0;
>>      m_ssim = 0;
>> @@ -550,28 +587,13 @@
>>
>>      /* reset entropy coders and compute slice id */
>>      m_entropyCoder.load(m_initSliceContext);
>> -    const uint32_t sliceGroupSize = (m_numRows + m_param->maxSlices - 1)
>> / m_param->maxSlices;
>> -    const uint32_t sliceGroupSizeAccu = (m_numRows << 8) /
>> m_param->maxSlices;
>> -    m_sliceGroupSize = (uint16_t)sliceGroupSize;
>> +
>> +    for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
>> +        for (uint32_t row = m_sliceBaseRow[sliceId]; row <
>> m_sliceBaseRow[sliceId + 1]; row++)
>> +            m_rows[row].init(m_initSliceContext, sliceId);
>>
>> -    uint32_t rowSum = sliceGroupSizeAccu;
>> -    uint32_t sidx = 0;
>> -    for (uint32_t i = 0; i < m_numRows; i++)
>> -    {
>> -        const uint32_t rowRange = (rowSum >> 8);
>> -
>> -        if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))
>> -        {
>> -            rowSum += sliceGroupSizeAccu;
>> -            m_sliceBaseRow[++sidx] = i;
>> -        }
>> -
>> -        m_rows[i].init(m_initSliceContext, sidx);
>> -    }
>> -    X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!");
>> -
>> -    m_sliceBaseRow[0] = 0;
>> -    m_sliceBaseRow[m_param->maxSlices] = m_numRows;
>> +    // reset slice counter for rate control update
>> +    m_sliceCnt = 0;
>>
>>      uint32_t numSubstreams = m_param->bEnableWavefront ?
>> slice->m_sps->numCuInHeight : m_param->maxSlices;
>>      X265_CHECK(m_param->bEnableWavefront || (m_param->maxSlices == 1),
>> "Multiple slices without WPP unsupport now!");
>> @@ -586,8 +608,10 @@
>>                  m_rows[i].rowGoOnCoder.setBitstream(&m_outStreams[i]);
>>      }
>>      else
>> +    {
>>          for (uint32_t i = 0; i < numSubstreams; i++)
>>              m_outStreams[i].resetBits();
>> +    }
>>
>>      int prevBPSEI = m_rce.encodeOrder ? m_top->m_lastBPSEI : 0;
>>
>> @@ -697,10 +721,9 @@
>>       * compressed in a wave-front pattern if WPP is enabled. Row based
>> loop
>>       * filters runs behind the CTU compression and reconstruction */
>>
>> -    for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
>> -    {
>> +    for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
>>          m_rows[m_sliceBaseRow[sliceId]].active = true;
>> -    }
>> +
>>      if (m_param->bEnableWavefront)
>>      {
>>          int i = 0;
>> @@ -719,6 +742,7 @@
>>              }
>>          }
>>      }
>> +
>>      if (m_param->bEnableWavefront)
>>      {
>>          for (uint32_t rowInSlice = 0; rowInSlice < m_sliceGroupSize;
>> rowInSlice++)
>> @@ -751,6 +775,7 @@
>>                              m_mref[l][ref].applyWeight(rowIdx,
>> m_numRows, sliceEndRow, sliceId);
>>                      }
>>                  }
>> +
>>                  enableRowEncoder(m_row_to_idx[row]); /* clear external
>> dependency for this row */
>>                  if (!rowInSlice)
>>                  {
>> @@ -980,9 +1005,8 @@
>>              // complete the slice header by writing WPP row-starts
>>              m_entropyCoder.setBitstream(&m_bs);
>>              if (slice->m_pps->bEntropyCodingSyncEnabled)
>> -            {
>>                  m_entropyCoder.codeSliceHeaderWPPEntryPoints(&m_substreamSizes[prevSliceRow],
>> (nextSliceRow - prevSliceRow - 1), maxStreamSize);
>> -            }
>> +
>>              m_bs.writeByteAlignment();
>>
>>              m_nalList.serialize(slice->m_nalUnitType, m_bs);
>> @@ -1211,17 +1235,21 @@
>>      int64_t startTime = x265_mdate();
>>      if (ATOMIC_INC(&m_activeWorkerCount) == 1 && m_stallStartTime)
>>          m_totalNoWorkerTime += x265_mdate() - m_stallStartTime;
>> +
>>      const uint32_t realRow = m_idx_to_row[row >> 1];
>>      const uint32_t typeNum = m_idx_to_row[row & 1];
>> +
>>      if (!typeNum)
>>          processRowEncoder(realRow, m_tld[threadId]);
>>      else
>>      {
>>          m_frameFilter.processRow(realRow);
>> +
>>          // NOTE: Active next row
>>          if (realRow != m_sliceBaseRow[m_rows[realRow].sliceId + 1] - 1)
>>              enqueueRowFilter(m_row_to_idx[realRow + 1]);
>>      }
>> +
>>      if (ATOMIC_DEC(&m_activeWorkerCount) == 0)
>>          m_stallStartTime = x265_mdate();
>>
>> @@ -1264,20 +1292,18 @@
>>      const uint32_t lineStartCUAddr = row * numCols;
>>      bool bIsVbv = m_param->rc.vbvBufferSize > 0 &&
>> m_param->rc.vbvMaxBitrate > 0;
>>
>> +    const uint32_t sliceId = curRow.sliceId;
>>      uint32_t maxBlockCols = (m_frame->m_fencPic->m_picWidth + (16 - 1))
>> / 16;
>> -    uint32_t maxBlockRows = (m_frame->m_fencPic->m_picHeight + (16 -
>> 1)) / 16;
>>      uint32_t noOfBlocks = m_param->maxCUSize / 16;
>>      const uint32_t bFirstRowInSlice = ((row == 0) || (m_rows[row -
>> 1].sliceId != curRow.sliceId)) ? 1 : 0;
>>      const uint32_t bLastRowInSlice = ((row == m_numRows - 1) ||
>> (m_rows[row + 1].sliceId != curRow.sliceId)) ? 1 : 0;
>> -    const uint32_t sliceId = curRow.sliceId;
>>      const uint32_t endRowInSlicePlus1 = m_sliceBaseRow[sliceId + 1];
>>      const uint32_t rowInSlice = row - m_sliceBaseRow[sliceId];
>>
>> -    if (bFirstRowInSlice && !curRow.completed)
>> -    {
>> -        // Load SBAC coder context from previous row and initialize row
>> state.
>> -        rowCoder.load(m_initSliceContext);
>> -    }
>> +    // Load SBAC coder context from previous row and initialize row
>> state.
>> +    if (bFirstRowInSlice && !curRow.completed)
>> +        rowCoder.load(m_initSliceContext);
>> +
>>      // calculate mean QP for consistent deltaQP signalling calculation
>>      if (m_param->bOptCUDeltaQP)
>>      {
>> @@ -1287,15 +1313,12 @@
>>              if (m_param->bEnableWavefront || !row)
>>              {
>>                  double meanQPOff = 0;
>> -                uint32_t loopIncr, count = 0;
>>                  bool isReferenced = IS_REFERENCED(m_frame);
>>                  double *qpoffs = (isReferenced && m_param->rc.cuTree) ?
>> m_frame->m_lowres.qpCuTreeOffset : m_frame->m_lowres.qpAqOffset;
>>                  if (qpoffs)
>>                  {
>> -                    if (m_param->rc.qgSize == 8)
>> -                        loopIncr = 8;
>> -                    else
>> -                        loopIncr = 16;
>> +                    uint32_t loopIncr = (m_param->rc.qgSize == 8) ? 8 :
>> 16;
>> +
>>                      uint32_t cuYStart = 0, height =
>> m_frame->m_fencPic->m_picHeight;
>>                      if (m_param->bEnableWavefront)
>>                      {
>> @@ -1305,6 +1328,7 @@
>>
>>                      uint32_t qgSize = m_param->rc.qgSize, width =
>> m_frame->m_fencPic->m_picWidth;
>>                      uint32_t maxOffsetCols =
>> (m_frame->m_fencPic->m_picWidth + (loopIncr - 1)) / loopIncr;
>> +                    uint32_t count = 0;
>>                      for (uint32_t cuY = cuYStart; cuY < height && (cuY <
>> m_frame->m_fencPic->m_picHeight); cuY += qgSize)
>>                      {
>>                          for (uint32_t cuX = 0; cuX < width; cuX +=
>> qgSize)
>> @@ -1336,7 +1360,8 @@
>>              }
>>              curRow.avgQPComputed = 1;
>>          }
>> -    }
>> +    }
>> +
>>      // Initialize restrict on MV range in slices
>>      tld.analysis.m_sliceMinY = -(int16_t)(rowInSlice *
>> m_param->maxCUSize * 4) + 3 * 4;
>>      tld.analysis.m_sliceMaxY = (int16_t)((endRowInSlicePlus1 - 1 - row)
>> * (m_param->maxCUSize * 4) - 4 * 4);
>> @@ -1364,16 +1389,16 @@
>>                  curRow.bufferedEntropy.copyState(rowCoder);
>>                  curRow.bufferedEntropy.loadContexts(rowCoder);
>>              }
>> -            if (!row && m_vbvResetTriggerRow != intRow)
>> +            if (bFirstRowInSlice && m_vbvResetTriggerRow != intRow)
>>              {
>>                  curEncData.m_rowStat[row].rowQp = curEncData.m_avgQpRc;
>>                  curEncData.m_rowStat[row].rowQpScale =
>> x265_qp2qScale(curEncData.m_avgQpRc);
>>              }
>>
>>              FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr];
>> -            if (m_param->bEnableWavefront && row >= col && row &&
>> m_vbvResetTriggerRow != intRow)
>> +            if (m_param->bEnableWavefront && rowInSlice >= col &&
>> !bFirstRowInSlice && m_vbvResetTriggerRow != intRow)
>>                  cuStat.baseQp = curEncData.m_cuStat[cuAddr - numCols +
>> 1].baseQp;
>> -            else if (!m_param->bEnableWavefront && row &&
>> m_vbvResetTriggerRow != intRow)
>> +            else if (!m_param->bEnableWavefront && !bFirstRowInSlice &&
>> m_vbvResetTriggerRow != intRow)
>>                  cuStat.baseQp = curEncData.m_rowStat[row - 1].rowQp;
>>              else
>>                  cuStat.baseQp = curEncData.m_rowStat[row].rowQp;
>> @@ -1385,7 +1410,8 @@
>>              {
>>                  cuStat.vbvCost = 0;
>>                  cuStat.intraVbvCost = 0;
>> -                for (uint32_t h = 0; h < noOfBlocks && block_y <
>> maxBlockRows; h++, block_y++)
>> +
>> +                for (uint32_t h = 0; h < noOfBlocks && block_y <
>> m_sliceMaxBlockRow[sliceId + 1]; h++, block_y++)
>>                  {
>>                      uint32_t idx = block_x + (block_y * maxBlockCols);
>>
>> @@ -1433,11 +1459,12 @@
>>          {
>>              // NOTE: in VBV mode, we may reencode anytime, so we can't
>> do Deblock stage-Horizon and SAO
>>              if (!bIsVbv)
>> -            {
>> +            {
>>                  // Delay one row to avoid intra prediction conflict
>>                  if (m_pool && !bFirstRowInSlice)
>> -                {
>> +                {
>>                      int allowCol = col;
>> +
>>                      // avoid race condition on last column
>>                      if (rowInSlice >= 2)
>>                      {
>> @@ -1446,11 +1473,13 @@
>>                      }
>>                      m_frameFilter.m_parallelFilter[row -
>> 1].m_allowedCol.set(allowCol);
>>                  }
>> +
>>                  // Last Row may start early
>>                  if (m_pool && bLastRowInSlice)
>>                  {
>>                      // Deblocking last row
>>                      int allowCol = col;
>> +
>>                      // avoid race condition on last column
>>                      if (rowInSlice >= 2)
>>                      {
>> @@ -1472,6 +1501,7 @@
>>
>>          FrameStats frameLog;
>>          curEncData.m_rowStat[row].sumQpAq += collectCTUStatistics(*ctu,
>> &frameLog);
>> +
>>          // copy number of intra, inter cu per row into frame stats for 2
>> pass
>>          if (m_param->rc.bStatWrite)
>>          {
>> @@ -1485,10 +1515,8 @@
>>                  int shift = 2 * (m_param->maxCUDepth - depth);
>>                  int cuSize = m_param->maxCUSize >> depth;
>>
>> -                if (cuSize == 8)
>> -                    curRow.rowStats.intra8x8Cnt +=
>> (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN);
>> -                else
>> -                    curRow.rowStats.intra8x8Cnt +=
>> (int)(frameLog.cntIntra[depth] << shift);
>> +                curRow.rowStats.intra8x8Cnt += (cuSize == 8) ?
>> (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN) :
>> +
>>  (int)(frameLog.cntIntra[depth] << shift);
>>
>>                  curRow.rowStats.inter8x8Cnt +=
>> (int)(frameLog.cntInter[depth] << shift);
>>                  curRow.rowStats.skip8x8Cnt +=
>> (int)((frameLog.cntSkipCu[depth] + frameLog.cntMergeCu[depth]) << shift);
>> @@ -1518,12 +1546,13 @@
>>          if (bIsVbv)
>>          {
>>              // Update encoded bits, satdCost, baseQP for each CU if tune
>> grain is disabled
>> -            if ((m_param->bEnableWavefront && (!cuAddr ||
>> !m_param->rc.bEnableConstVbv)) || !m_param->bEnableWavefront)
>> +            FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr];
>> +            if ((m_param->bEnableWavefront && ((cuAddr ==
>> m_sliceBaseRow[sliceId] * numCols) || !m_param->rc.bEnableConstVbv)) ||
>> !m_param->bEnableWavefront)
>>              {
>> -                curEncData.m_rowStat[row].rowSatd +=
>> curEncData.m_cuStat[cuAddr].vbvCost;
>> -                curEncData.m_rowStat[row].rowIntraSatd +=
>> curEncData.m_cuStat[cuAddr].intraVbvCost;
>> -                curEncData.m_rowStat[row].encodedBits +=
>> curEncData.m_cuStat[cuAddr].totalBits;
>> -                curEncData.m_rowStat[row].sumQpRc +=
>> curEncData.m_cuStat[cuAddr].baseQp;
>> +                curEncData.m_rowStat[row].rowSatd += cuStat.vbvCost;
>> +                curEncData.m_rowStat[row].rowIntraSatd +=
>> cuStat.intraVbvCost;
>> +                curEncData.m_rowStat[row].encodedBits +=
>> cuStat.totalBits;
>> +                curEncData.m_rowStat[row].sumQpRc += cuStat.baseQp;
>>                  curEncData.m_rowStat[row].numEncodedCUs = cuAddr;
>>              }
>>
>> @@ -1531,7 +1560,7 @@
>>              if (!m_param->bEnableWavefront && col == numCols - 1)
>>              {
>>                  double qpBase = curEncData.m_cuStat[cuAddr].baseQp;
>> -                int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame,
>> row, &m_rce, qpBase);
>> +                int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame,
>> row, &m_rce, qpBase, m_sliceBaseRow, sliceId);
>>                  qpBase = x265_clip3((double)m_param->rc.qpMin,
>> (double)m_param->rc.qpMax, qpBase);
>>                  curEncData.m_rowStat[row].rowQp = qpBase;
>>                  curEncData.m_rowStat[row].rowQpScale =
>> x265_qp2qScale(qpBase);
>> @@ -1557,15 +1586,16 @@
>>                  }
>>              }
>>              // If current block is at row diagonal checkpoint, call vbv
>> ratecontrol.
>> -            else if (m_param->bEnableWavefront && row == col && row)
>> +            else if (m_param->bEnableWavefront && rowInSlice == col &&
>> !bFirstRowInSlice)
>>              {
>>                  if (m_param->rc.bEnableConstVbv)
>>                  {
>> -                    int32_t startCuAddr = numCols * row;
>> -                    int32_t EndCuAddr = startCuAddr + col;
>> -                    for (int32_t r = row; r >= 0; r--)
>> +                    uint32_t startCuAddr = numCols * row;
>> +                    uint32_t EndCuAddr = startCuAddr + col;
>> +
>> +                    for (int32_t r = row; r >=
>> (int32_t)m_sliceBaseRow[sliceId]; r--)
>>                      {
>> -                        for (int32_t c = startCuAddr; c <= EndCuAddr &&
>> c <= (int32_t)numCols * (r + 1) - 1; c++)
>> +                        for (uint32_t c = startCuAddr; c <= EndCuAddr &&
>> c <= numCols * (r + 1) - 1; c++)
>>                          {
>>                              curEncData.m_rowStat[r].rowSatd +=
>> curEncData.m_cuStat[c].vbvCost;
>>                              curEncData.m_rowStat[r].rowIntraSatd +=
>> curEncData.m_cuStat[c].intraVbvCost;
>> @@ -1578,10 +1608,10 @@
>>                      }
>>                  }
>>                  double qpBase = curEncData.m_cuStat[cuAddr].baseQp;
>> -                int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame,
>> row, &m_rce, qpBase);
>> +                int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame,
>> row, &m_rce, qpBase, m_sliceBaseRow, sliceId);
>>                  qpBase = x265_clip3((double)m_param->rc.qpMin,
>> (double)m_param->rc.qpMax, qpBase);
>>                  curEncData.m_rowStat[row].rowQp = qpBase;
>> -                curEncData.m_rowStat[row].rowQpScale =
>> x265_qp2qScale(qpBase);
>> +                curEncData.m_rowStat[row].rowQpScale =
>> x265_qp2qScale(qpBase);
>>
>>                  if (reEncode < 0)
>>                  {
>> @@ -1592,7 +1622,7 @@
>>                      m_vbvResetTriggerRow = row;
>>                      m_bAllRowsStop = true;
>>
>> -                    for (uint32_t r = m_numRows - 1; r >= row; r--)
>> +                    for (uint32_t r = m_sliceBaseRow[sliceId + 1] - 1; r
>> >= row; r--)
>>                      {
>>                          CTURow& stopRow = m_rows[r];
>>
>> @@ -1670,14 +1700,15 @@
>>              return;
>>          }
>>      }
>> +
>>      /* this row of CTUs has been compressed */
>>      if (m_param->bEnableWavefront && m_param->rc.bEnableConstVbv)
>>      {
>> -        if (row == m_numRows - 1)
>> +        if (bLastRowInSlice)
>>          {
>> -            for (int32_t r = 0; r < (int32_t)m_numRows; r++)
>> +            for (uint32_t r = m_sliceBaseRow[sliceId]; r <
>> m_sliceBaseRow[sliceId + 1]; r++)
>>              {
>> -                for (int32_t c = curEncData.m_rowStat[r].numEncodedCUs
>> + 1; c < (int32_t)numCols * (r + 1); c++)
>> +                for (uint32_t c = curEncData.m_rowStat[r].numEncodedCUs
>> + 1; c < numCols * (r + 1); c++)
>>                  {
>>                      curEncData.m_rowStat[r].rowSatd +=
>> curEncData.m_cuStat[c].vbvCost;
>>                      curEncData.m_rowStat[r].rowIntraSatd +=
>> curEncData.m_cuStat[c].intraVbvCost;
>> @@ -1695,26 +1726,41 @@
>>       * after half the frame is encoded, but after this initial period we
>> update
>>       * after refLagRows (the number of rows reference frames must have
>> completed
>>       * before referencees may begin encoding) */
>> -    uint32_t rowCount = 0;
>>      if (m_param->rc.rateControlMode == X265_RC_ABR || bIsVbv)
>>      {
>> +        uint32_t rowCount = 0;
>> +        uint32_t maxRows = m_sliceBaseRow[sliceId + 1] -
>> m_sliceBaseRow[sliceId];
>>          if (!m_rce.encodeOrder)
>> -            rowCount = m_numRows - 1;
>> +            rowCount = maxRows - 1;
>>          else if ((uint32_t)m_rce.encodeOrder <= 2 * (m_param->fpsNum /
>> m_param->fpsDenom))
>> -            rowCount = X265_MIN((m_numRows + 1) / 2, m_numRows - 1);
>> +            rowCount = X265_MIN((maxRows + 1) / 2, maxRows - 1);
>>          else
>> -            rowCount = X265_MIN(m_refLagRows, m_numRows - 1);
>> -        if (row == rowCount)
>> +                       rowCount = X265_MIN(m_refLagRows /
>> m_param->maxSlices, maxRows - 1);
>> +
>> +        if (rowInSlice == rowCount)
>>          {
>> -            m_rce.rowTotalBits = 0;
>> +            m_rowSliceTotalBits[sliceId] = 0;
>>              if (bIsVbv)
>> -                for (uint32_t i = 0; i < rowCount; i++)
>> -                    m_rce.rowTotalBits += curEncData.m_rowStat[i].encode
>> dBits;
>> +            {
>> +                for (uint32_t i = m_sliceBaseRow[sliceId]; i < rowCount
>> + m_sliceBaseRow[sliceId]; i++)
>> +                    m_rowSliceTotalBits[sliceId] +=
>> curEncData.m_rowStat[i].encodedBits;
>> +            }
>>              else
>> -                for (uint32_t cuAddr = 0; cuAddr < rowCount * numCols;
>> cuAddr++)
>> -                    m_rce.rowTotalBits += curEncData.m_cuStat[cuAddr].to
>> talBits;
>> +            {
>> +                uint32_t startAddr = rowCount * numCols * sliceId;
>> +                uint32_t finishAddr = startAddr + rowCount * numCols;
>> +
>> +                for (uint32_t cuAddr = startAddr; cuAddr < finishAddr;
>> cuAddr++)
>> +                    m_rowSliceTotalBits[sliceId] +=
>> curEncData.m_cuStat[cuAddr].totalBits;
>> +            }
>>
>> -            m_top->m_rateControl->rateControlUpdateStats(&m_rce);
>> +            if (ATOMIC_INC(&m_sliceCnt) == (int)m_param->maxSlices)
>> +            {
>> +                m_rce.rowTotalBits = 0;
>> +                for (uint32_t i = 0; i < m_param->maxSlices; i++)
>> +                    m_rce.rowTotalBits += m_rowSliceTotalBits[i];
>> +                m_top->m_rateControl->rateControlUpdateStats(&m_rce);
>> +            }
>>          }
>>      }
>>
>> @@ -1742,11 +1788,13 @@
>>          if (rowInSlice >= m_filterRowDelay)
>>          {
>>              enableRowFilter(m_row_to_idx[row - m_filterRowDelay]);
>> +
>>              /* NOTE: Activate filter if first row (row 0) */
>>              if (rowInSlice == m_filterRowDelay)
>>                  enqueueRowFilter(m_row_to_idx[row - m_filterRowDelay]);
>>              tryWakeOne();
>>          }
>> +
>>          if (bLastRowInSlice)
>>          {
>>              for (uint32_t i = endRowInSlicePlus1 - m_filterRowDelay; i <
>> endRowInSlicePlus1; i++)
>> diff -r e62b12bd8b45 -r c838e60c7c6b source/encoder/frameencoder.h
>> --- a/source/encoder/frameencoder.h     Thu Jun 29 13:13:56 2017 +0530
>> +++ b/source/encoder/frameencoder.h     Fri Sep 22 20:20:58 2017 +0530
>> @@ -138,6 +138,7 @@
>>      volatile bool            m_bAllRowsStop;
>>      volatile int             m_completionCount;
>>      volatile int             m_vbvResetTriggerRow;
>> +    volatile int             m_sliceCnt;
>>
>>      uint32_t                 m_numRows;
>>      uint32_t                 m_numCols;
>> @@ -147,8 +148,10 @@
>>
>>      CTURow*                  m_rows;
>>      uint16_t                 m_sliceAddrBits;
>> -    uint16_t                 m_sliceGroupSize;
>> -    uint32_t*                m_sliceBaseRow;
>> +    uint32_t                 m_sliceGroupSize;
>> +    uint32_t*                m_sliceBaseRow;
>> +    uint32_t*                m_sliceMaxBlockRow;
>> +    int64_t                  m_rowSliceTotalBits[2];
>>      RateControlEntry         m_rce;
>>      SEIDecodedPictureHash    m_seiReconPictureDigest;
>>
>> diff -r e62b12bd8b45 -r c838e60c7c6b source/encoder/ratecontrol.cpp
>> --- a/source/encoder/ratecontrol.cpp    Thu Jun 29 13:13:56 2017 +0530
>> +++ b/source/encoder/ratecontrol.cpp    Fri Sep 22 20:20:58 2017 +0530
>> @@ -732,7 +732,6 @@
>>      m_bitrate = m_param->rc.bitrate * 1000;
>>  }
>>
>> -
>>  void RateControl::initHRD(SPS& sps)
>>  {
>>      int vbvBufferSize = m_param->rc.vbvBufferSize * 1000;
>> @@ -765,6 +764,7 @@
>>
>>      #undef MAX_DURATION
>>  }
>> +
>>  bool RateControl::analyseABR2Pass(uint64_t allAvailableBits)
>>  {
>>      double rateFactor, stepMult;
>> @@ -1473,6 +1473,7 @@
>>
>>      return q;
>>  }
>> +
>>  double RateControl::countExpectedBits(int startPos, int endPos)
>>  {
>>      double expectedBits = 0;
>> @@ -1484,6 +1485,7 @@
>>      }
>>      return expectedBits;
>>  }
>> +
>>  bool RateControl::findUnderflow(double *fills, int *t0, int *t1, int
>> over, int endPos)
>>  {
>>      /* find an interval ending on an overflow or underflow (depending on
>> whether
>> @@ -1531,6 +1533,7 @@
>>      }
>>      return adjusted;
>>  }
>> +
>>  bool RateControl::cuTreeReadFor2Pass(Frame* frame)
>>  {
>>      int index = m_encOrder[frame->m_poc];
>> @@ -1579,24 +1582,24 @@
>>  double RateControl::tuneAbrQScaleFromFeedback(double qScale)
>>  {
>>      double abrBuffer = 2 * m_rateTolerance * m_bitrate;
>> -        /* use framesDone instead of POC as poc count is not serial with
>> bframes enabled */
>> -        double overflow = 1.0;
>> -        double timeDone = (double)(m_framesDone -
>> m_param->frameNumThreads + 1) * m_frameDuration;
>> -        double wantedBits = timeDone * m_bitrate;
>> -        int64_t encodedBits = m_totalBits;
>> -        if (m_param->totalFrames && m_param->totalFrames <= 2 * m_fps)
>> -        {
>> -            abrBuffer = m_param->totalFrames * (m_bitrate / m_fps);
>> -            encodedBits = m_encodedBits;
>> -        }
>> +    /* use framesDone instead of POC as poc count is not serial with
>> bframes enabled */
>> +    double overflow = 1.0;
>> +    double timeDone = (double)(m_framesDone - m_param->frameNumThreads +
>> 1) * m_frameDuration;
>> +    double wantedBits = timeDone * m_bitrate;
>> +    int64_t encodedBits = m_totalBits;
>> +    if (m_param->totalFrames && m_param->totalFrames <= 2 * m_fps)
>> +    {
>> +        abrBuffer = m_param->totalFrames * (m_bitrate / m_fps);
>> +        encodedBits = m_encodedBits;
>> +    }
>>
>> -        if (wantedBits > 0 && encodedBits > 0 &&
>> (!m_partialResidualFrames ||
>> -            m_param->rc.bStrictCbr || m_isGrainEnabled))
>> -        {
>> -            abrBuffer *= X265_MAX(1, sqrt(timeDone));
>> -            overflow = x265_clip3(.5, 2.0, 1.0 + (encodedBits -
>> wantedBits) / abrBuffer);
>> -            qScale *= overflow;
>> -        }
>> +    if (wantedBits > 0 && encodedBits > 0 && (!m_partialResidualFrames ||
>> +        m_param->rc.bStrictCbr || m_isGrainEnabled))
>> +    {
>> +        abrBuffer *= X265_MAX(1, sqrt(timeDone));
>> +        overflow = x265_clip3(.5, 2.0, 1.0 + (encodedBits - wantedBits)
>> / abrBuffer);
>> +        qScale *= overflow;
>> +    }
>>      return qScale;
>>  }
>>
>> @@ -2330,17 +2333,18 @@
>>      return totalSatdBits + encodedBitsSoFar;
>>  }
>>
>> -int RateControl::rowVbvRateControl(Frame* curFrame, uint32_t row,
>> RateControlEntry* rce, double& qpVbv)
>> +int RateControl::rowVbvRateControl(Frame* curFrame, uint32_t row,
>> RateControlEntry* rce, double& qpVbv, uint32_t* m_sliceBaseRow, uint32_t
>> sliceId)
>>  {
>>      FrameData& curEncData = *curFrame->m_encData;
>>      double qScaleVbv = x265_qp2qScale(qpVbv);
>>      uint64_t rowSatdCost = curEncData.m_rowStat[row].rowSatd;
>>      double encodedBits = curEncData.m_rowStat[row].encodedBits;
>> +    uint32_t rowInSlice = row - m_sliceBaseRow[sliceId];
>>
>> -    if (m_param->bEnableWavefront && row == 1)
>> +    if (m_param->bEnableWavefront && rowInSlice == 1)
>>      {
>> -        rowSatdCost += curEncData.m_rowStat[0].rowSatd;
>> -        encodedBits += curEncData.m_rowStat[0].encodedBits;
>> +        rowSatdCost += curEncData.m_rowStat[row - 1].rowSatd;
>> +        encodedBits += curEncData.m_rowStat[row - 1].encodedBits;
>>      }
>>      rowSatdCost >>= X265_DEPTH - 8;
>>      updatePredictor(rce->rowPred[0], qScaleVbv, (double)rowSatdCost,
>> encodedBits);
>> @@ -2350,8 +2354,8 @@
>>          if (qpVbv < refFrame->m_encData->m_rowStat[row].rowQp)
>>          {
>>              uint64_t intraRowSatdCost = curEncData.m_rowStat[row].rowI
>> ntraSatd;
>> -            if (m_param->bEnableWavefront && row == 1)
>> -                intraRowSatdCost += curEncData.m_rowStat[0].rowInt
>> raSatd;
>> +            if (m_param->bEnableWavefront && rowInSlice == 1)
>> +                intraRowSatdCost += curEncData.m_rowStat[row -
>> 1].rowIntraSatd;
>>              intraRowSatdCost >>= X265_DEPTH - 8;
>>              updatePredictor(rce->rowPred[1], qScaleVbv,
>> (double)intraRowSatdCost, encodedBits);
>>          }
>> @@ -2376,7 +2380,7 @@
>>      const SPS& sps = *curEncData.m_slice->m_sps;
>>      double maxFrameError = X265_MAX(0.05, 1.0 / sps.numCuInHeight);
>>
>> -    if (row < sps.numCuInHeight - 1)
>> +    if (row < m_sliceBaseRow[sliceId + 1] - 1)
>>      {
>>          /* More threads means we have to be more cautious in letting
>> ratecontrol use up extra bits. */
>>          double rcTol = bufferLeftPlanned / m_param->frameNumThreads *
>> m_rateTolerance;
>> @@ -2693,8 +2697,8 @@
>>              m_encodedBitsWindow[pos % s_slidingWindowFrames] =
>> actualBits;
>>          if(rce->sliceType != I_SLICE)
>>          {
>> -        int qp = int (rce->qpaRc + 0.5);
>> -        m_qpToEncodedBits[qp] =  m_qpToEncodedBits[qp] == 0 ? actualBits
>> : (m_qpToEncodedBits[qp] + actualBits) * 0.5;
>> +            int qp = int (rce->qpaRc + 0.5);
>> +            m_qpToEncodedBits[qp] =  m_qpToEncodedBits[qp] == 0 ?
>> actualBits : (m_qpToEncodedBits[qp] + actualBits) * 0.5;
>>          }
>>          curFrame->m_rcData->wantedBitsWindow = m_wantedBitsWindow;
>>          curFrame->m_rcData->cplxrSum = m_cplxrSum;
>> @@ -2779,7 +2783,8 @@
>>              curFrame->m_encData->m_frameStats.percent8x8Skip  * m_ncu)
>> < 0)
>>              goto writeFailure;
>>      }
>> -    else{
>> +    else
>> +    {
>>          RPS* rpsWriter = &curFrame->m_encData->m_slice->m_rps;
>>          int i, num = rpsWriter->numberOfPictures;
>>          char deltaPOC[128];
>> diff -r e62b12bd8b45 -r c838e60c7c6b source/encoder/ratecontrol.h
>> --- a/source/encoder/ratecontrol.h      Thu Jun 29 13:13:56 2017 +0530
>> +++ b/source/encoder/ratecontrol.h      Fri Sep 22 20:20:58 2017 +0530
>> @@ -244,7 +244,7 @@
>>      int  rateControlStart(Frame* curFrame, RateControlEntry* rce,
>> Encoder* enc);
>>      void rateControlUpdateStats(RateControlEntry* rce);
>>      int  rateControlEnd(Frame* curFrame, int64_t bits, RateControlEntry*
>> rce, int *filler);
>> -    int  rowVbvRateControl(Frame* curFrame, uint32_t row,
>> RateControlEntry* rce, double& qpVbv);
>> +    int  rowVbvRateControl(Frame* curFrame, uint32_t row,
>> RateControlEntry* rce, double& qpVbv, uint32_t* m_sliceBaseRow, uint32_t
>> sliceId);
>>      int  rateControlSliceType(int frameNum);
>>      bool cuTreeReadFor2Pass(Frame* curFrame);
>>      void hrdFullness(SEIBufferingPeriod* sei);
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20170928/95e03cb7/attachment-0001.html>


More information about the x265-devel mailing list