[x265-commits] [x265] vbv hanging issue; fix for multiple slices

Ashok Kumar Mishra ashok at multicorewareinc.com
Wed Oct 4 01:03:02 CEST 2017


details:   http://hg.videolan.org/x265/rev/667bbf65185e
branches:  
changeset: 11883:667bbf65185e
user:      Ashok Kumar Mishra <ashok at multicorewareinc.com>
date:      Fri Sep 22 20:20:58 2017 +0530
description:
vbv hanging issue; fix for multiple slices
When multiple slices are enabled, vbv rate control must take care of
correct rows in slices, since multiple slices are encoding simultaneously.
Subject: [x265] export x265_encoder_ctu_info() in the Windows DLL

details:   http://hg.videolan.org/x265/rev/8db95a1da9f8
branches:  
changeset: 11884:8db95a1da9f8
user:      Sean McGovern <gseanmcg at gmail.com>
date:      Sun Oct 01 10:16:28 2017 -0400
description:
export x265_encoder_ctu_info() in the Windows DLL

diffstat:

 source/encoder/frameencoder.cpp |  181 +++++++++++++++++++++++----------------
 source/encoder/frameencoder.h   |    7 +-
 source/encoder/ratecontrol.cpp  |   61 +++++++------
 source/encoder/ratecontrol.h    |    2 +-
 source/x265.def.in              |    1 +
 5 files changed, 147 insertions(+), 105 deletions(-)

diffs (truncated from 558 to 300 lines):

diff -r 0967d0add97e -r 8db95a1da9f8 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Mon Sep 25 18:35:07 2017 +0530
+++ b/source/encoder/frameencoder.cpp	Sun Oct 01 10:16:28 2017 -0400
@@ -88,6 +88,7 @@ void FrameEncoder::destroy()
     delete[] m_outStreams;
     delete[] m_backupStreams;
     X265_FREE(m_sliceBaseRow);
+    X265_FREE(m_sliceMaxBlockRow);
     X265_FREE(m_cuGeoms);
     X265_FREE(m_ctuGeomMap);
     X265_FREE(m_substreamSizes);
@@ -118,6 +119,40 @@ bool FrameEncoder::init(Encoder *top, in
 
     m_sliceBaseRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1);
     ok &= !!m_sliceBaseRow;
+    m_sliceGroupSize = (uint16_t)(m_numRows + m_param->maxSlices - 1) / m_param->maxSlices;
+    uint32_t sliceGroupSizeAccu = (m_numRows << 8) / m_param->maxSlices;    
+    uint32_t rowSum = sliceGroupSizeAccu;
+    uint32_t sidx = 0;
+    for (uint32_t i = 0; i < m_numRows; i++)
+    {
+        const uint32_t rowRange = (rowSum >> 8);
+        if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))
+        {
+            rowSum += sliceGroupSizeAccu;
+            m_sliceBaseRow[++sidx] = i;
+        }        
+    }
+    X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!");
+    m_sliceBaseRow[0] = 0;
+    m_sliceBaseRow[m_param->maxSlices] = m_numRows;
+
+    m_sliceMaxBlockRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1);
+    ok &= !!m_sliceMaxBlockRow;
+    uint32_t maxBlockRows = (m_param->sourceHeight + (16 - 1)) / 16;
+    sliceGroupSizeAccu = (maxBlockRows << 8) / m_param->maxSlices;
+    rowSum = sliceGroupSizeAccu;
+    sidx = 0;
+    for (uint32_t i = 0; i < maxBlockRows; i++)
+    {
+        const uint32_t rowRange = (rowSum >> 8);
+        if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))
+        {
+            rowSum += sliceGroupSizeAccu;
+            m_sliceMaxBlockRow[++sidx] = i;
+        }
+    }
+    m_sliceMaxBlockRow[0] = 0;
+    m_sliceMaxBlockRow[m_param->maxSlices] = maxBlockRows;
 
     /* determine full motion search range */
     int range  = m_param->searchRange;       /* fpel search */
@@ -341,6 +376,8 @@ void FrameEncoder::compressFrame()
     m_completionCount = 0;
     m_bAllRowsStop = false;
     m_vbvResetTriggerRow = -1;
+    m_rowSliceTotalBits[0] = 0;
+    m_rowSliceTotalBits[1] = 0;
 
     m_SSDY = m_SSDU = m_SSDV = 0;
     m_ssim = 0;
@@ -550,28 +587,13 @@ void FrameEncoder::compressFrame()
 
     /* reset entropy coders and compute slice id */
     m_entropyCoder.load(m_initSliceContext);
-    const uint32_t sliceGroupSize = (m_numRows + m_param->maxSlices - 1) / m_param->maxSlices;
-    const uint32_t sliceGroupSizeAccu = (m_numRows << 8) / m_param->maxSlices;
-    m_sliceGroupSize = (uint16_t)sliceGroupSize;
+	
+    for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)   
+        for (uint32_t row = m_sliceBaseRow[sliceId]; row < m_sliceBaseRow[sliceId + 1]; row++)
+            m_rows[row].init(m_initSliceContext, sliceId);   
 
-    uint32_t rowSum = sliceGroupSizeAccu;
-    uint32_t sidx = 0;
-    for (uint32_t i = 0; i < m_numRows; i++)
-    {
-        const uint32_t rowRange = (rowSum >> 8);
-
-        if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))
-        {
-            rowSum += sliceGroupSizeAccu;
-            m_sliceBaseRow[++sidx] = i;
-        }
-
-        m_rows[i].init(m_initSliceContext, sidx);
-    }
-    X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!");
-
-    m_sliceBaseRow[0] = 0;
-    m_sliceBaseRow[m_param->maxSlices] = m_numRows;
+    // reset slice counter for rate control update
+    m_sliceCnt = 0;
 
     uint32_t numSubstreams = m_param->bEnableWavefront ? slice->m_sps->numCuInHeight : m_param->maxSlices;
     X265_CHECK(m_param->bEnableWavefront || (m_param->maxSlices == 1), "Multiple slices without WPP unsupport now!");
@@ -586,8 +608,10 @@ void FrameEncoder::compressFrame()
                 m_rows[i].rowGoOnCoder.setBitstream(&m_outStreams[i]);
     }
     else
+    {
         for (uint32_t i = 0; i < numSubstreams; i++)
             m_outStreams[i].resetBits();
+    }
 
     int prevBPSEI = m_rce.encodeOrder ? m_top->m_lastBPSEI : 0;
 
@@ -697,10 +721,9 @@ void FrameEncoder::compressFrame()
      * compressed in a wave-front pattern if WPP is enabled. Row based loop
      * filters runs behind the CTU compression and reconstruction */
 
-    for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
-    {
+    for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)    
         m_rows[m_sliceBaseRow[sliceId]].active = true;
-    }
+    
     if (m_param->bEnableWavefront)
     {
         int i = 0;
@@ -982,9 +1005,8 @@ void FrameEncoder::compressFrame()
             // complete the slice header by writing WPP row-starts
             m_entropyCoder.setBitstream(&m_bs);
             if (slice->m_pps->bEntropyCodingSyncEnabled)
-            {
                 m_entropyCoder.codeSliceHeaderWPPEntryPoints(&m_substreamSizes[prevSliceRow], (nextSliceRow - prevSliceRow - 1), maxStreamSize);
-            }
+            
             m_bs.writeByteAlignment();
 
             m_nalList.serialize(slice->m_nalUnitType, m_bs);
@@ -1270,20 +1292,17 @@ void FrameEncoder::processRowEncoder(int
     const uint32_t lineStartCUAddr = row * numCols;
     bool bIsVbv = m_param->rc.vbvBufferSize > 0 && m_param->rc.vbvMaxBitrate > 0;
 
+    const uint32_t sliceId = curRow.sliceId;
     uint32_t maxBlockCols = (m_frame->m_fencPic->m_picWidth + (16 - 1)) / 16;
-    uint32_t maxBlockRows = (m_frame->m_fencPic->m_picHeight + (16 - 1)) / 16;
     uint32_t noOfBlocks = m_param->maxCUSize / 16;
     const uint32_t bFirstRowInSlice = ((row == 0) || (m_rows[row - 1].sliceId != curRow.sliceId)) ? 1 : 0;
     const uint32_t bLastRowInSlice = ((row == m_numRows - 1) || (m_rows[row + 1].sliceId != curRow.sliceId)) ? 1 : 0;
-    const uint32_t sliceId = curRow.sliceId;
     const uint32_t endRowInSlicePlus1 = m_sliceBaseRow[sliceId + 1];
     const uint32_t rowInSlice = row - m_sliceBaseRow[sliceId];
 
-    if (bFirstRowInSlice && !curRow.completed)
-    {
-        // Load SBAC coder context from previous row and initialize row state.
-        rowCoder.load(m_initSliceContext);        
-    }
+    // Load SBAC coder context from previous row and initialize row state.
+    if (bFirstRowInSlice && !curRow.completed)        
+        rowCoder.load(m_initSliceContext);     
 
     // calculate mean QP for consistent deltaQP signalling calculation
     if (m_param->bOptCUDeltaQP)
@@ -1294,15 +1313,12 @@ void FrameEncoder::processRowEncoder(int
             if (m_param->bEnableWavefront || !row)
             {
                 double meanQPOff = 0;
-                uint32_t loopIncr, count = 0;
                 bool isReferenced = IS_REFERENCED(m_frame);
                 double *qpoffs = (isReferenced && m_param->rc.cuTree) ? m_frame->m_lowres.qpCuTreeOffset : m_frame->m_lowres.qpAqOffset;
                 if (qpoffs)
                 {
-                    if (m_param->rc.qgSize == 8)
-                        loopIncr = 8;
-                    else
-                        loopIncr = 16;
+                    uint32_t loopIncr = (m_param->rc.qgSize == 8) ? 8 : 16;
+
                     uint32_t cuYStart = 0, height = m_frame->m_fencPic->m_picHeight;
                     if (m_param->bEnableWavefront)
                     {
@@ -1312,6 +1328,7 @@ void FrameEncoder::processRowEncoder(int
 
                     uint32_t qgSize = m_param->rc.qgSize, width = m_frame->m_fencPic->m_picWidth;
                     uint32_t maxOffsetCols = (m_frame->m_fencPic->m_picWidth + (loopIncr - 1)) / loopIncr;
+                    uint32_t count = 0;
                     for (uint32_t cuY = cuYStart; cuY < height && (cuY < m_frame->m_fencPic->m_picHeight); cuY += qgSize)
                     {
                         for (uint32_t cuX = 0; cuX < width; cuX += qgSize)
@@ -1372,16 +1389,16 @@ void FrameEncoder::processRowEncoder(int
                 curRow.bufferedEntropy.copyState(rowCoder);
                 curRow.bufferedEntropy.loadContexts(rowCoder);
             }
-            if (!row && m_vbvResetTriggerRow != intRow)
+            if (bFirstRowInSlice && m_vbvResetTriggerRow != intRow)            
             {
                 curEncData.m_rowStat[row].rowQp = curEncData.m_avgQpRc;
                 curEncData.m_rowStat[row].rowQpScale = x265_qp2qScale(curEncData.m_avgQpRc);
             }
 
             FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr];
-            if (m_param->bEnableWavefront && row >= col && row && m_vbvResetTriggerRow != intRow)
+            if (m_param->bEnableWavefront && rowInSlice >= col && !bFirstRowInSlice && m_vbvResetTriggerRow != intRow)
                 cuStat.baseQp = curEncData.m_cuStat[cuAddr - numCols + 1].baseQp;
-            else if (!m_param->bEnableWavefront && row && m_vbvResetTriggerRow != intRow)
+            else if (!m_param->bEnableWavefront && !bFirstRowInSlice && m_vbvResetTriggerRow != intRow)
                 cuStat.baseQp = curEncData.m_rowStat[row - 1].rowQp;
             else
                 cuStat.baseQp = curEncData.m_rowStat[row].rowQp;
@@ -1393,7 +1410,8 @@ void FrameEncoder::processRowEncoder(int
             {
                 cuStat.vbvCost = 0;
                 cuStat.intraVbvCost = 0;
-                for (uint32_t h = 0; h < noOfBlocks && block_y < maxBlockRows; h++, block_y++)
+
+                for (uint32_t h = 0; h < noOfBlocks && block_y < m_sliceMaxBlockRow[sliceId + 1]; h++, block_y++)
                 {
                     uint32_t idx = block_x + (block_y * maxBlockCols);
 
@@ -1497,10 +1515,8 @@ void FrameEncoder::processRowEncoder(int
                 int shift = 2 * (m_param->maxCUDepth - depth);
                 int cuSize = m_param->maxCUSize >> depth;
 
-                if (cuSize == 8)
-                    curRow.rowStats.intra8x8Cnt += (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN);
-                else
-                    curRow.rowStats.intra8x8Cnt += (int)(frameLog.cntIntra[depth] << shift);
+                curRow.rowStats.intra8x8Cnt += (cuSize == 8) ? (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN) :
+                                                               (int)(frameLog.cntIntra[depth] << shift);
 
                 curRow.rowStats.inter8x8Cnt += (int)(frameLog.cntInter[depth] << shift);
                 curRow.rowStats.skip8x8Cnt += (int)((frameLog.cntSkipCu[depth] + frameLog.cntMergeCu[depth]) << shift);
@@ -1530,12 +1546,13 @@ void FrameEncoder::processRowEncoder(int
         if (bIsVbv)
         {   
             // Update encoded bits, satdCost, baseQP for each CU if tune grain is disabled
-            if ((m_param->bEnableWavefront && (!cuAddr || !m_param->rc.bEnableConstVbv)) || !m_param->bEnableWavefront)
+            FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr];    
+            if ((m_param->bEnableWavefront && ((cuAddr == m_sliceBaseRow[sliceId] * numCols) || !m_param->rc.bEnableConstVbv)) || !m_param->bEnableWavefront)
             {
-                curEncData.m_rowStat[row].rowSatd += curEncData.m_cuStat[cuAddr].vbvCost;
-                curEncData.m_rowStat[row].rowIntraSatd += curEncData.m_cuStat[cuAddr].intraVbvCost;
-                curEncData.m_rowStat[row].encodedBits += curEncData.m_cuStat[cuAddr].totalBits;
-                curEncData.m_rowStat[row].sumQpRc += curEncData.m_cuStat[cuAddr].baseQp;
+                curEncData.m_rowStat[row].rowSatd += cuStat.vbvCost;
+                curEncData.m_rowStat[row].rowIntraSatd += cuStat.intraVbvCost;
+                curEncData.m_rowStat[row].encodedBits += cuStat.totalBits;
+                curEncData.m_rowStat[row].sumQpRc += cuStat.baseQp;
                 curEncData.m_rowStat[row].numEncodedCUs = cuAddr;
             }
             
@@ -1543,7 +1560,7 @@ void FrameEncoder::processRowEncoder(int
             if (!m_param->bEnableWavefront && col == numCols - 1)
             {
                 double qpBase = curEncData.m_cuStat[cuAddr].baseQp;
-                int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame, row, &m_rce, qpBase);
+                int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame, row, &m_rce, qpBase, m_sliceBaseRow, sliceId);
                 qpBase = x265_clip3((double)m_param->rc.qpMin, (double)m_param->rc.qpMax, qpBase);
                 curEncData.m_rowStat[row].rowQp = qpBase;
                 curEncData.m_rowStat[row].rowQpScale = x265_qp2qScale(qpBase);
@@ -1569,15 +1586,16 @@ void FrameEncoder::processRowEncoder(int
                 }
             }
             // If current block is at row diagonal checkpoint, call vbv ratecontrol.
-            else if (m_param->bEnableWavefront && row == col && row)
+            else if (m_param->bEnableWavefront && rowInSlice == col && !bFirstRowInSlice)
             {
                 if (m_param->rc.bEnableConstVbv)
                 {
-                    int32_t startCuAddr = numCols * row;
-                    int32_t EndCuAddr = startCuAddr + col;
-                    for (int32_t r = row; r >= 0; r--)
+                    uint32_t startCuAddr = numCols * row;
+                    uint32_t EndCuAddr = startCuAddr + col;
+
+                    for (int32_t r = row; r >= (int32_t)m_sliceBaseRow[sliceId]; r--)
                     {
-                        for (int32_t c = startCuAddr; c <= EndCuAddr && c <= (int32_t)numCols * (r + 1) - 1; c++)
+                        for (uint32_t c = startCuAddr; c <= EndCuAddr && c <= numCols * (r + 1) - 1; c++)
                         {
                             curEncData.m_rowStat[r].rowSatd += curEncData.m_cuStat[c].vbvCost;
                             curEncData.m_rowStat[r].rowIntraSatd += curEncData.m_cuStat[c].intraVbvCost;
@@ -1590,10 +1608,10 @@ void FrameEncoder::processRowEncoder(int
                     }
                 }
                 double qpBase = curEncData.m_cuStat[cuAddr].baseQp;
-                int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame, row, &m_rce, qpBase);
+                int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame, row, &m_rce, qpBase, m_sliceBaseRow, sliceId);
                 qpBase = x265_clip3((double)m_param->rc.qpMin, (double)m_param->rc.qpMax, qpBase);
                 curEncData.m_rowStat[row].rowQp = qpBase;
-                curEncData.m_rowStat[row].rowQpScale =  x265_qp2qScale(qpBase);
+                curEncData.m_rowStat[row].rowQpScale = x265_qp2qScale(qpBase);
 
                 if (reEncode < 0)
                 {
@@ -1604,7 +1622,7 @@ void FrameEncoder::processRowEncoder(int
                     m_vbvResetTriggerRow = row;
                     m_bAllRowsStop = true;
 
-                    for (uint32_t r = m_numRows - 1; r >= row; r--)
+                    for (uint32_t r = m_sliceBaseRow[sliceId + 1] - 1; r >= row; r--)
                     {
                         CTURow& stopRow = m_rows[r];
 
@@ -1686,11 +1704,11 @@ void FrameEncoder::processRowEncoder(int
     /* this row of CTUs has been compressed */
     if (m_param->bEnableWavefront && m_param->rc.bEnableConstVbv)


More information about the x265-commits mailing list