<div dir="ltr"><div class="gmail_extra"><br><div class="gmail_quote">On Thu, Sep 28, 2017 at 2:16 PM,  <span dir="ltr"><<a href="mailto:ashok@multicorewareinc.com" target="_blank">ashok@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><span class=""># HG changeset patch<br>
# User Ashok Kumar Mishra <<a href="mailto:ashok@multicorewareinc.com">ashok@multicorewareinc.com</a>><br>
</span># Date 1506091858 -19800<br>
#      Fri Sep 22 20:20:58 2017 +0530<br>
# Node ID c838e60c7c6ba0ab07e2d4130a5c2b<wbr>a22e0b1eea<br>
# Parent  e62b12bd8b4573b15290ebf110e01c<wbr>8fafce55be<br>
<span class="">vbv hanging issue; fix for multiple slices<br>
When multiple slices are enabled, vbv rate control must take care of<br>
correct rows in slices, since multiple slices are encoding simultaneously.<br>
<br></span></blockquote><div><br></div><div>This patch doesn't apply on the current tip of the default branch. Please fix and resend.</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><span class="">
</span>diff -r e62b12bd8b45 -r c838e60c7c6b source/encoder/frameencoder.<wbr>cpp<br>
--- a/source/encoder/frameencoder.<wbr>cpp   Thu Jun 29 13:13:56 2017 +0530<br>
+++ b/source/encoder/frameencoder.<wbr>cpp   Fri Sep 22 20:20:58 2017 +0530<br>
<div><div class="h5">@@ -88,6 +88,7 @@<br>
     delete[] m_outStreams;<br>
     delete[] m_backupStreams;<br>
     X265_FREE(m_sliceBaseRow);<br>
+    X265_FREE(m_sliceMaxBlockRow);<br>
     X265_FREE(m_cuGeoms);<br>
     X265_FREE(m_ctuGeomMap);<br>
     X265_FREE(m_substreamSizes);<br>
@@ -118,6 +119,40 @@<br>
<br>
     m_sliceBaseRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1);<br>
     ok &= !!m_sliceBaseRow;<br>
+    m_sliceGroupSize = (uint16_t)(m_numRows + m_param->maxSlices - 1) / m_param->maxSlices;<br>
+    uint32_t sliceGroupSizeAccu = (m_numRows << 8) / m_param->maxSlices;<br>
+    uint32_t rowSum = sliceGroupSizeAccu;<br>
+    uint32_t sidx = 0;<br>
+    for (uint32_t i = 0; i < m_numRows; i++)<br>
+    {<br>
+        const uint32_t rowRange = (rowSum >> 8);<br>
+        if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))<br>
+        {<br>
+            rowSum += sliceGroupSizeAccu;<br>
+            m_sliceBaseRow[++sidx] = i;<br>
+        }<br>
+    }<br>
+    X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!");<br>
+    m_sliceBaseRow[0] = 0;<br>
+    m_sliceBaseRow[m_param-><wbr>maxSlices] = m_numRows;<br>
+<br>
+    m_sliceMaxBlockRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1);<br>
+    ok &= !!m_sliceMaxBlockRow;<br>
+    uint32_t maxBlockRows = (m_param->sourceHeight + (16 - 1)) / 16;<br>
+    sliceGroupSizeAccu = (maxBlockRows << 8) / m_param->maxSlices;<br>
+    rowSum = sliceGroupSizeAccu;<br>
+    sidx = 0;<br>
+    for (uint32_t i = 0; i < maxBlockRows; i++)<br>
+    {<br>
+        const uint32_t rowRange = (rowSum >> 8);<br>
+        if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))<br>
+        {<br>
+            rowSum += sliceGroupSizeAccu;<br>
+            m_sliceMaxBlockRow[++sidx] = i;<br>
+        }<br>
+    }<br>
+    m_sliceMaxBlockRow[0] = 0;<br>
+    m_sliceMaxBlockRow[m_param-><wbr>maxSlices] = maxBlockRows;<br>
<br>
     /* determine full motion search range */<br>
     int range  = m_param->searchRange;       /* fpel search */<br>
@@ -341,6 +376,8 @@<br>
     m_completionCount = 0;<br>
     m_bAllRowsStop = false;<br>
     m_vbvResetTriggerRow = -1;<br>
+    m_rowSliceTotalBits[0] = 0;<br>
+    m_rowSliceTotalBits[1] = 0;<br>
<br>
     m_SSDY = m_SSDU = m_SSDV = 0;<br>
     m_ssim = 0;<br>
@@ -550,28 +587,13 @@<br>
<br>
     /* reset entropy coders and compute slice id */<br>
     m_entropyCoder.load(m_<wbr>initSliceContext);<br>
-    const uint32_t sliceGroupSize = (m_numRows + m_param->maxSlices - 1) / m_param->maxSlices;<br>
-    const uint32_t sliceGroupSizeAccu = (m_numRows << 8) / m_param->maxSlices;<br>
-    m_sliceGroupSize = (uint16_t)sliceGroupSize;<br>
+<br>
+    for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)<br>
+        for (uint32_t row = m_sliceBaseRow[sliceId]; row < m_sliceBaseRow[sliceId + 1]; row++)<br>
+            m_rows[row].init(m_<wbr>initSliceContext, sliceId);<br>
<br>
-    uint32_t rowSum = sliceGroupSizeAccu;<br>
-    uint32_t sidx = 0;<br>
-    for (uint32_t i = 0; i < m_numRows; i++)<br>
-    {<br>
-        const uint32_t rowRange = (rowSum >> 8);<br>
-<br>
-        if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))<br>
-        {<br>
-            rowSum += sliceGroupSizeAccu;<br>
-            m_sliceBaseRow[++sidx] = i;<br>
-        }<br>
-<br>
-        m_rows[i].init(m_<wbr>initSliceContext, sidx);<br>
-    }<br>
-    X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!");<br>
-<br>
-    m_sliceBaseRow[0] = 0;<br>
-    m_sliceBaseRow[m_param-><wbr>maxSlices] = m_numRows;<br>
+    // reset slice counter for rate control update<br>
+    m_sliceCnt = 0;<br>
<br>
     uint32_t numSubstreams = m_param->bEnableWavefront ? slice->m_sps->numCuInHeight : m_param->maxSlices;<br>
     X265_CHECK(m_param-><wbr>bEnableWavefront || (m_param->maxSlices == 1), "Multiple slices without WPP unsupport now!");<br>
@@ -586,8 +608,10 @@<br>
                 m_rows[i].rowGoOnCoder.<wbr>setBitstream(&m_outStreams[i])<wbr>;<br>
     }<br>
     else<br>
+    {<br>
         for (uint32_t i = 0; i < numSubstreams; i++)<br>
             m_outStreams[i].resetBits();<br>
+    }<br>
<br>
     int prevBPSEI = m_rce.encodeOrder ? m_top->m_lastBPSEI : 0;<br>
<br>
@@ -697,10 +721,9 @@<br>
      * compressed in a wave-front pattern if WPP is enabled. Row based loop<br>
      * filters runs behind the CTU compression and reconstruction */<br>
<br>
-    for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)<br>
-    {<br>
+    for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)<br>
         m_rows[m_sliceBaseRow[sliceId]<wbr>].active = true;<br>
-    }<br>
+<br>
     if (m_param->bEnableWavefront)<br>
     {<br>
         int i = 0;<br>
</div></div>@@ -719,6 +742,7 @@<br>
             }<br>
         }<br>
     }<br>
+<br>
     if (m_param->bEnableWavefront)<br>
     {<br>
         for (uint32_t rowInSlice = 0; rowInSlice < m_sliceGroupSize; rowInSlice++)<br>
@@ -751,6 +775,7 @@<br>
                             m_mref[l][ref].applyWeight(<wbr>rowIdx, m_numRows, sliceEndRow, sliceId);<br>
                     }<br>
                 }<br>
+<br>
                 enableRowEncoder(m_row_to_idx[<wbr>row]); /* clear external dependency for this row */<br>
                 if (!rowInSlice)<br>
                 {<br>
@@ -980,9 +1005,8 @@<br>
<span class="">             // complete the slice header by writing WPP row-starts<br>
             m_entropyCoder.setBitstream(&<wbr>m_bs);<br>
             if (slice->m_pps-><wbr>bEntropyCodingSyncEnabled)<br>
-            {<br>
                 m_entropyCoder.<wbr>codeSliceHeaderWPPEntryPoints(<wbr>&m_substreamSizes[<wbr>prevSliceRow], (nextSliceRow - prevSliceRow - 1), maxStreamSize);<br>
-            }<br>
+<br>
             m_bs.writeByteAlignment();<br>
<br>
             m_nalList.serialize(slice->m_<wbr>nalUnitType, m_bs);<br>
</span>@@ -1211,17 +1235,21 @@<br>
     int64_t startTime = x265_mdate();<br>
     if (ATOMIC_INC(&m_<wbr>activeWorkerCount) == 1 && m_stallStartTime)<br>
         m_totalNoWorkerTime += x265_mdate() - m_stallStartTime;<br>
+<br>
     const uint32_t realRow = m_idx_to_row[row >> 1];<br>
     const uint32_t typeNum = m_idx_to_row[row & 1];<br>
+<br>
     if (!typeNum)<br>
         processRowEncoder(realRow, m_tld[threadId]);<br>
     else<br>
     {<br>
         m_frameFilter.processRow(<wbr>realRow);<br>
+<br>
         // NOTE: Active next row<br>
         if (realRow != m_sliceBaseRow[m_rows[realRow]<wbr>.sliceId + 1] - 1)<br>
             enqueueRowFilter(m_row_to_idx[<wbr>realRow + 1]);<br>
     }<br>
+<br>
     if (ATOMIC_DEC(&m_<wbr>activeWorkerCount) == 0)<br>
         m_stallStartTime = x265_mdate();<br>
<br>
@@ -1264,20 +1292,18 @@<br>
<span class="">     const uint32_t lineStartCUAddr = row * numCols;<br>
     bool bIsVbv = m_param->rc.vbvBufferSize > 0 && m_param->rc.vbvMaxBitrate > 0;<br>
<br>
+    const uint32_t sliceId = curRow.sliceId;<br>
     uint32_t maxBlockCols = (m_frame->m_fencPic->m_<wbr>picWidth + (16 - 1)) / 16;<br>
-    uint32_t maxBlockRows = (m_frame->m_fencPic->m_<wbr>picHeight + (16 - 1)) / 16;<br>
     uint32_t noOfBlocks = m_param->maxCUSize / 16;<br>
     const uint32_t bFirstRowInSlice = ((row == 0) || (m_rows[row - 1].sliceId != curRow.sliceId)) ? 1 : 0;<br>
     const uint32_t bLastRowInSlice = ((row == m_numRows - 1) || (m_rows[row + 1].sliceId != curRow.sliceId)) ? 1 : 0;<br>
-    const uint32_t sliceId = curRow.sliceId;<br>
     const uint32_t endRowInSlicePlus1 = m_sliceBaseRow[sliceId + 1];<br>
     const uint32_t rowInSlice = row - m_sliceBaseRow[sliceId];<br>
<br>
-    if (bFirstRowInSlice && !curRow.completed)<br>
-    {<br>
-        // Load SBAC coder context from previous row and initialize row state.<br>
-        rowCoder.load(m_<wbr>initSliceContext);<br>
-    }<br>
+    // Load SBAC coder context from previous row and initialize row state.<br>
+    if (bFirstRowInSlice && !curRow.completed)<br>
+        rowCoder.load(m_<wbr>initSliceContext);<br>
</span>+<br>
<span class="">     // calculate mean QP for consistent deltaQP signalling calculation<br>
     if (m_param->bOptCUDeltaQP)<br>
     {<br>
</span>@@ -1287,15 +1313,12 @@<br>
<span class="">             if (m_param->bEnableWavefront || !row)<br>
             {<br>
                 double meanQPOff = 0;<br>
-                uint32_t loopIncr, count = 0;<br>
                 bool isReferenced = IS_REFERENCED(m_frame);<br>
                 double *qpoffs = (isReferenced && m_param->rc.cuTree) ? m_frame->m_lowres.<wbr>qpCuTreeOffset : m_frame->m_lowres.qpAqOffset;<br>
                 if (qpoffs)<br>
                 {<br>
-                    if (m_param->rc.qgSize == 8)<br>
-                        loopIncr = 8;<br>
-                    else<br>
-                        loopIncr = 16;<br>
+                    uint32_t loopIncr = (m_param->rc.qgSize == 8) ? 8 : 16;<br>
+<br>
                     uint32_t cuYStart = 0, height = m_frame->m_fencPic->m_<wbr>picHeight;<br>
                     if (m_param->bEnableWavefront)<br>
                     {<br>
</span>@@ -1305,6 +1328,7 @@<br>
<span class=""><br>
                     uint32_t qgSize = m_param->rc.qgSize, width = m_frame->m_fencPic->m_<wbr>picWidth;<br>
                     uint32_t maxOffsetCols = (m_frame->m_fencPic->m_<wbr>picWidth + (loopIncr - 1)) / loopIncr;<br>
+                    uint32_t count = 0;<br>
                     for (uint32_t cuY = cuYStart; cuY < height && (cuY < m_frame->m_fencPic->m_<wbr>picHeight); cuY += qgSize)<br>
                     {<br>
                         for (uint32_t cuX = 0; cuX < width; cuX += qgSize)<br>
</span>@@ -1336,7 +1360,8 @@<br>
             }<br>
             curRow.avgQPComputed = 1;<br>
         }<br>
-    }<br>
+    }<br>
+<br>
     // Initialize restrict on MV range in slices<br>
     tld.analysis.m_sliceMinY = -(int16_t)(rowInSlice * m_param->maxCUSize * 4) + 3 * 4;<br>
     tld.analysis.m_sliceMaxY = (int16_t)((endRowInSlicePlus1 - 1 - row) * (m_param->maxCUSize * 4) - 4 * 4);<br>
@@ -1364,16 +1389,16 @@<br>
<span class="">                 curRow.bufferedEntropy.<wbr>copyState(rowCoder);<br>
                 curRow.bufferedEntropy.<wbr>loadContexts(rowCoder);<br>
             }<br>
-            if (!row && m_vbvResetTriggerRow != intRow)<br>
+            if (bFirstRowInSlice && m_vbvResetTriggerRow != intRow)<br>
             {<br>
                 curEncData.m_rowStat[row].<wbr>rowQp = curEncData.m_avgQpRc;<br>
                 curEncData.m_rowStat[row].<wbr>rowQpScale = x265_qp2qScale(curEncData.m_<wbr>avgQpRc);<br>
             }<br>
<br>
             FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr];<br>
-            if (m_param->bEnableWavefront && row >= col && row && m_vbvResetTriggerRow != intRow)<br>
+            if (m_param->bEnableWavefront && rowInSlice >= col && !bFirstRowInSlice && m_vbvResetTriggerRow != intRow)<br>
                 cuStat.baseQp = curEncData.m_cuStat[cuAddr - numCols + 1].baseQp;<br>
-            else if (!m_param->bEnableWavefront && row && m_vbvResetTriggerRow != intRow)<br>
+            else if (!m_param->bEnableWavefront && !bFirstRowInSlice && m_vbvResetTriggerRow != intRow)<br>
                 cuStat.baseQp = curEncData.m_rowStat[row - 1].rowQp;<br>
             else<br>
                 cuStat.baseQp = curEncData.m_rowStat[row].<wbr>rowQp;<br>
</span>@@ -1385,7 +1410,8 @@<br>
<span class="">             {<br>
                 cuStat.vbvCost = 0;<br>
                 cuStat.intraVbvCost = 0;<br>
-                for (uint32_t h = 0; h < noOfBlocks && block_y < maxBlockRows; h++, block_y++)<br>
+<br>
+                for (uint32_t h = 0; h < noOfBlocks && block_y < m_sliceMaxBlockRow[sliceId + 1]; h++, block_y++)<br>
                 {<br>
                     uint32_t idx = block_x + (block_y * maxBlockCols);<br>
<br>
</span>@@ -1433,11 +1459,12 @@<br>
         {<br>
             // NOTE: in VBV mode, we may reencode anytime, so we can't do Deblock stage-Horizon and SAO<br>
             if (!bIsVbv)<br>
-            {<br>
+            {<br>
                 // Delay one row to avoid intra prediction conflict<br>
                 if (m_pool && !bFirstRowInSlice)<br>
-                {<br>
+                {<br>
                     int allowCol = col;<br>
+<br>
                     // avoid race condition on last column<br>
                     if (rowInSlice >= 2)<br>
                     {<br>
@@ -1446,11 +1473,13 @@<br>
                     }<br>
                     m_frameFilter.m_<wbr>parallelFilter[row - 1].m_allowedCol.set(allowCol);<br>
                 }<br>
+<br>
                 // Last Row may start early<br>
                 if (m_pool && bLastRowInSlice)<br>
                 {<br>
                     // Deblocking last row<br>
                     int allowCol = col;<br>
+<br>
                     // avoid race condition on last column<br>
                     if (rowInSlice >= 2)<br>
                     {<br>
@@ -1472,6 +1501,7 @@<br>
<br>
         FrameStats frameLog;<br>
         curEncData.m_rowStat[row].<wbr>sumQpAq += collectCTUStatistics(*ctu, &frameLog);<br>
+<br>
         // copy number of intra, inter cu per row into frame stats for 2 pass<br>
         if (m_param->rc.bStatWrite)<br>
         {<br>
@@ -1485,10 +1515,8 @@<br>
<span class="">                 int shift = 2 * (m_param->maxCUDepth - depth);<br>
                 int cuSize = m_param->maxCUSize >> depth;<br>
<br>
-                if (cuSize == 8)<br>
-                    curRow.rowStats.intra8x8Cnt += (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN);<br>
-                else<br>
-                    curRow.rowStats.intra8x8Cnt += (int)(frameLog.cntIntra[depth] << shift);<br>
+                curRow.rowStats.intra8x8Cnt += (cuSize == 8) ? (int)(frameLog.cntIntra[depth] + frameLog.cntIntraNxN) :<br>
+                                                               (int)(frameLog.cntIntra[depth] << shift);<br>
<br>
                 curRow.rowStats.inter8x8Cnt += (int)(frameLog.cntInter[depth] << shift);<br>
                 curRow.rowStats.skip8x8Cnt += (int)((frameLog.cntSkipCu[<wbr>depth] + frameLog.cntMergeCu[depth]) << shift);<br>
</span>@@ -1518,12 +1546,13 @@<br>
<span class="">         if (bIsVbv)<br>
         {<br>
             // Update encoded bits, satdCost, baseQP for each CU if tune grain is disabled<br>
-            if ((m_param->bEnableWavefront && (!cuAddr || !m_param->rc.bEnableConstVbv)) || !m_param->bEnableWavefront)<br>
+            FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr];<br>
+            if ((m_param->bEnableWavefront && ((cuAddr == m_sliceBaseRow[sliceId] * numCols) || !m_param->rc.bEnableConstVbv)) || !m_param->bEnableWavefront)<br>
             {<br>
-                curEncData.m_rowStat[row].<wbr>rowSatd += curEncData.m_cuStat[cuAddr].<wbr>vbvCost;<br>
-                curEncData.m_rowStat[row].<wbr>rowIntraSatd += curEncData.m_cuStat[cuAddr].<wbr>intraVbvCost;<br>
-                curEncData.m_rowStat[row].<wbr>encodedBits += curEncData.m_cuStat[cuAddr].<wbr>totalBits;<br>
-                curEncData.m_rowStat[row].<wbr>sumQpRc += curEncData.m_cuStat[cuAddr].<wbr>baseQp;<br>
+                curEncData.m_rowStat[row].<wbr>rowSatd += cuStat.vbvCost;<br>
+                curEncData.m_rowStat[row].<wbr>rowIntraSatd += cuStat.intraVbvCost;<br>
+                curEncData.m_rowStat[row].<wbr>encodedBits += cuStat.totalBits;<br>
+                curEncData.m_rowStat[row].<wbr>sumQpRc += cuStat.baseQp;<br>
                 curEncData.m_rowStat[row].<wbr>numEncodedCUs = cuAddr;<br>
             }<br>
<br>
</span>@@ -1531,7 +1560,7 @@<br>
<span class="">             if (!m_param->bEnableWavefront && col == numCols - 1)<br>
             {<br>
                 double qpBase = curEncData.m_cuStat[cuAddr].<wbr>baseQp;<br>
-                int reEncode = m_top->m_rateControl-><wbr>rowVbvRateControl(m_frame, row, &m_rce, qpBase);<br>
+                int reEncode = m_top->m_rateControl-><wbr>rowVbvRateControl(m_frame, row, &m_rce, qpBase, m_sliceBaseRow, sliceId);<br>
                 qpBase = x265_clip3((double)m_param-><wbr>rc.qpMin, (double)m_param->rc.qpMax, qpBase);<br>
                 curEncData.m_rowStat[row].<wbr>rowQp = qpBase;<br>
                 curEncData.m_rowStat[row].<wbr>rowQpScale = x265_qp2qScale(qpBase);<br>
</span>@@ -1557,15 +1586,16 @@<br>
<span class="">                 }<br>
             }<br>
             // If current block is at row diagonal checkpoint, call vbv ratecontrol.<br>
-            else if (m_param->bEnableWavefront && row == col && row)<br>
+            else if (m_param->bEnableWavefront && rowInSlice == col && !bFirstRowInSlice)<br>
             {<br>
                 if (m_param->rc.bEnableConstVbv)<br>
                 {<br>
-                    int32_t startCuAddr = numCols * row;<br>
-                    int32_t EndCuAddr = startCuAddr + col;<br>
-                    for (int32_t r = row; r >= 0; r--)<br>
+                    uint32_t startCuAddr = numCols * row;<br>
+                    uint32_t EndCuAddr = startCuAddr + col;<br>
+<br>
+                    for (int32_t r = row; r >= (int32_t)m_sliceBaseRow[<wbr>sliceId]; r--)<br>
                     {<br>
-                        for (int32_t c = startCuAddr; c <= EndCuAddr && c <= (int32_t)numCols * (r + 1) - 1; c++)<br>
+                        for (uint32_t c = startCuAddr; c <= EndCuAddr && c <= numCols * (r + 1) - 1; c++)<br>
                         {<br>
                             curEncData.m_rowStat[r].<wbr>rowSatd += curEncData.m_cuStat[c].<wbr>vbvCost;<br>
                             curEncData.m_rowStat[r].<wbr>rowIntraSatd += curEncData.m_cuStat[c].<wbr>intraVbvCost;<br>
</span>@@ -1578,10 +1608,10 @@<br>
<span class="">                     }<br>
                 }<br>
                 double qpBase = curEncData.m_cuStat[cuAddr].<wbr>baseQp;<br>
-                int reEncode = m_top->m_rateControl-><wbr>rowVbvRateControl(m_frame, row, &m_rce, qpBase);<br>
+                int reEncode = m_top->m_rateControl-><wbr>rowVbvRateControl(m_frame, row, &m_rce, qpBase, m_sliceBaseRow, sliceId);<br>
                 qpBase = x265_clip3((double)m_param-><wbr>rc.qpMin, (double)m_param->rc.qpMax, qpBase);<br>
                 curEncData.m_rowStat[row].<wbr>rowQp = qpBase;<br>
-                curEncData.m_rowStat[row].<wbr>rowQpScale =  x265_qp2qScale(qpBase);<br>
+                curEncData.m_rowStat[row].<wbr>rowQpScale = x265_qp2qScale(qpBase);<br>
<br>
                 if (reEncode < 0)<br>
                 {<br>
</span>@@ -1592,7 +1622,7 @@<br>
<span class="">                     m_vbvResetTriggerRow = row;<br>
                     m_bAllRowsStop = true;<br>
<br>
-                    for (uint32_t r = m_numRows - 1; r >= row; r--)<br>
+                    for (uint32_t r = m_sliceBaseRow[sliceId + 1] - 1; r >= row; r--)<br>
                     {<br>
                         CTURow& stopRow = m_rows[r];<br>
<br>
</span>@@ -1670,14 +1700,15 @@<br>
             return;<br>
         }<br>
     }<br>
+<br>
<span class="">     /* this row of CTUs has been compressed */<br>
     if (m_param->bEnableWavefront && m_param->rc.bEnableConstVbv)<br>
     {<br>
-        if (row == m_numRows - 1)<br>
+        if (bLastRowInSlice)<br>
         {<br>
-            for (int32_t r = 0; r < (int32_t)m_numRows; r++)<br>
+            for (uint32_t r = m_sliceBaseRow[sliceId]; r < m_sliceBaseRow[sliceId + 1]; r++)<br>
             {<br>
-                for (int32_t c = curEncData.m_rowStat[r].<wbr>numEncodedCUs + 1; c < (int32_t)numCols * (r + 1); c++)<br>
+                for (uint32_t c = curEncData.m_rowStat[r].<wbr>numEncodedCUs + 1; c < numCols * (r + 1); c++)<br>
                 {<br>
                     curEncData.m_rowStat[r].<wbr>rowSatd += curEncData.m_cuStat[c].<wbr>vbvCost;<br>
                     curEncData.m_rowStat[r].<wbr>rowIntraSatd += curEncData.m_cuStat[c].<wbr>intraVbvCost;<br>
</span>@@ -1695,26 +1726,41 @@<br>
<span class="">      * after half the frame is encoded, but after this initial period we update<br>
      * after refLagRows (the number of rows reference frames must have completed<br>
      * before referencees may begin encoding) */<br>
-    uint32_t rowCount = 0;<br>
     if (m_param->rc.rateControlMode == X265_RC_ABR || bIsVbv)<br>
     {<br>
+        uint32_t rowCount = 0;<br>
+        uint32_t maxRows = m_sliceBaseRow[sliceId + 1] - m_sliceBaseRow[sliceId];<br>
         if (!m_rce.encodeOrder)<br>
-            rowCount = m_numRows - 1;<br>
+            rowCount = maxRows - 1;<br>
         else if ((uint32_t)m_rce.encodeOrder <= 2 * (m_param->fpsNum / m_param->fpsDenom))<br>
-            rowCount = X265_MIN((m_numRows + 1) / 2, m_numRows - 1);<br>
+            rowCount = X265_MIN((maxRows + 1) / 2, maxRows - 1);<br>
         else<br>
-            rowCount = X265_MIN(m_refLagRows, m_numRows - 1);<br>
-        if (row == rowCount)<br>
</span>+                       rowCount = X265_MIN(m_refLagRows / m_param->maxSlices, maxRows - 1);<br>
+<br>
+        if (rowInSlice == rowCount)<br>
<span class="">         {<br>
-            m_rce.rowTotalBits = 0;<br>
+            m_rowSliceTotalBits[sliceId] = 0;<br>
             if (bIsVbv)<br>
-                for (uint32_t i = 0; i < rowCount; i++)<br>
-                    m_rce.rowTotalBits += curEncData.m_rowStat[i].<wbr>encodedBits;<br>
+            {<br>
</span>+                for (uint32_t i = m_sliceBaseRow[sliceId]; i < rowCount + m_sliceBaseRow[sliceId]; i++)<br>
<span class="">+                    m_rowSliceTotalBits[sliceId] += curEncData.m_rowStat[i].<wbr>encodedBits;<br>
+            }<br>
             else<br>
-                for (uint32_t cuAddr = 0; cuAddr < rowCount * numCols; cuAddr++)<br>
-                    m_rce.rowTotalBits += curEncData.m_cuStat[cuAddr].<wbr>totalBits;<br>
+            {<br>
+                uint32_t startAddr = rowCount * numCols * sliceId;<br>
+                uint32_t finishAddr = startAddr + rowCount * numCols;<br>
+<br>
+                for (uint32_t cuAddr = startAddr; cuAddr < finishAddr; cuAddr++)<br>
+                    m_rowSliceTotalBits[sliceId] += curEncData.m_cuStat[cuAddr].<wbr>totalBits;<br>
+            }<br>
<br>
-            m_top->m_rateControl-><wbr>rateControlUpdateStats(&m_rce)<wbr>;<br>
+            if (ATOMIC_INC(&m_sliceCnt) == (int)m_param->maxSlices)<br>
+            {<br>
+                m_rce.rowTotalBits = 0;<br>
+                for (uint32_t i = 0; i < m_param->maxSlices; i++)<br>
+                    m_rce.rowTotalBits += m_rowSliceTotalBits[i];<br>
+                m_top->m_rateControl-><wbr>rateControlUpdateStats(&m_rce)<wbr>;<br>
+            }<br>
         }<br>
     }<br>
<br>
</span>@@ -1742,11 +1788,13 @@<br>
         if (rowInSlice >= m_filterRowDelay)<br>
         {<br>
             enableRowFilter(m_row_to_idx[<wbr>row - m_filterRowDelay]);<br>
+<br>
             /* NOTE: Activate filter if first row (row 0) */<br>
             if (rowInSlice == m_filterRowDelay)<br>
                 enqueueRowFilter(m_row_to_idx[<wbr>row - m_filterRowDelay]);<br>
             tryWakeOne();<br>
         }<br>
+<br>
         if (bLastRowInSlice)<br>
         {<br>
             for (uint32_t i = endRowInSlicePlus1 - m_filterRowDelay; i < endRowInSlicePlus1; i++)<br>
diff -r e62b12bd8b45 -r c838e60c7c6b source/encoder/frameencoder.h<br>
--- a/source/encoder/frameencoder.<wbr>h     Thu Jun 29 13:13:56 2017 +0530<br>
+++ b/source/encoder/frameencoder.<wbr>h     Fri Sep 22 20:20:58 2017 +0530<br>
<span class="">@@ -138,6 +138,7 @@<br>
     volatile bool            m_bAllRowsStop;<br>
     volatile int             m_completionCount;<br>
     volatile int             m_vbvResetTriggerRow;<br>
+    volatile int             m_sliceCnt;<br>
<br>
     uint32_t                 m_numRows;<br>
     uint32_t                 m_numCols;<br>
@@ -147,8 +148,10 @@<br>
<br>
     CTURow*                  m_rows;<br>
     uint16_t                 m_sliceAddrBits;<br>
-    uint16_t                 m_sliceGroupSize;<br>
-    uint32_t*                m_sliceBaseRow;<br>
+    uint32_t                 m_sliceGroupSize;<br>
+    uint32_t*                m_sliceBaseRow;<br>
+    uint32_t*                m_sliceMaxBlockRow;<br>
+    int64_t                  m_rowSliceTotalBits[2];<br>
     RateControlEntry         m_rce;<br>
     SEIDecodedPictureHash    m_seiReconPictureDigest;<br>
<br>
</span>diff -r e62b12bd8b45 -r c838e60c7c6b source/encoder/ratecontrol.cpp<br>
--- a/source/encoder/ratecontrol.<wbr>cpp    Thu Jun 29 13:13:56 2017 +0530<br>
+++ b/source/encoder/ratecontrol.<wbr>cpp    Fri Sep 22 20:20:58 2017 +0530<br>
<div><div class="h5">@@ -732,7 +732,6 @@<br>
     m_bitrate = m_param->rc.bitrate * 1000;<br>
 }<br>
<br>
-<br>
 void RateControl::initHRD(SPS& sps)<br>
 {<br>
     int vbvBufferSize = m_param->rc.vbvBufferSize * 1000;<br>
@@ -765,6 +764,7 @@<br>
<br>
     #undef MAX_DURATION<br>
 }<br>
+<br>
 bool RateControl::analyseABR2Pass(<wbr>uint64_t allAvailableBits)<br>
 {<br>
     double rateFactor, stepMult;<br>
@@ -1473,6 +1473,7 @@<br>
<br>
     return q;<br>
 }<br>
+<br>
 double RateControl::<wbr>countExpectedBits(int startPos, int endPos)<br>
 {<br>
     double expectedBits = 0;<br>
@@ -1484,6 +1485,7 @@<br>
     }<br>
     return expectedBits;<br>
 }<br>
+<br>
 bool RateControl::findUnderflow(<wbr>double *fills, int *t0, int *t1, int over, int endPos)<br>
 {<br>
     /* find an interval ending on an overflow or underflow (depending on whether<br>
@@ -1531,6 +1533,7 @@<br>
     }<br>
     return adjusted;<br>
 }<br>
+<br>
 bool RateControl::<wbr>cuTreeReadFor2Pass(Frame* frame)<br>
 {<br>
     int index = m_encOrder[frame->m_poc];<br>
@@ -1579,24 +1582,24 @@<br>
 double RateControl::<wbr>tuneAbrQScaleFromFeedback(<wbr>double qScale)<br>
 {<br>
     double abrBuffer = 2 * m_rateTolerance * m_bitrate;<br>
-        /* use framesDone instead of POC as poc count is not serial with bframes enabled */<br>
-        double overflow = 1.0;<br>
-        double timeDone = (double)(m_framesDone - m_param->frameNumThreads + 1) * m_frameDuration;<br>
-        double wantedBits = timeDone * m_bitrate;<br>
-        int64_t encodedBits = m_totalBits;<br>
-        if (m_param->totalFrames && m_param->totalFrames <= 2 * m_fps)<br>
-        {<br>
-            abrBuffer = m_param->totalFrames * (m_bitrate / m_fps);<br>
-            encodedBits = m_encodedBits;<br>
-        }<br>
+    /* use framesDone instead of POC as poc count is not serial with bframes enabled */<br>
+    double overflow = 1.0;<br>
+    double timeDone = (double)(m_framesDone - m_param->frameNumThreads + 1) * m_frameDuration;<br>
+    double wantedBits = timeDone * m_bitrate;<br>
+    int64_t encodedBits = m_totalBits;<br>
+    if (m_param->totalFrames && m_param->totalFrames <= 2 * m_fps)<br>
+    {<br>
+        abrBuffer = m_param->totalFrames * (m_bitrate / m_fps);<br>
+        encodedBits = m_encodedBits;<br>
+    }<br>
<br>
-        if (wantedBits > 0 && encodedBits > 0 && (!m_partialResidualFrames ||<br>
-            m_param->rc.bStrictCbr || m_isGrainEnabled))<br>
-        {<br>
-            abrBuffer *= X265_MAX(1, sqrt(timeDone));<br>
-            overflow = x265_clip3(.5, 2.0, 1.0 + (encodedBits - wantedBits) / abrBuffer);<br>
-            qScale *= overflow;<br>
-        }<br>
+    if (wantedBits > 0 && encodedBits > 0 && (!m_partialResidualFrames ||<br>
+        m_param->rc.bStrictCbr || m_isGrainEnabled))<br>
+    {<br>
+        abrBuffer *= X265_MAX(1, sqrt(timeDone));<br>
+        overflow = x265_clip3(.5, 2.0, 1.0 + (encodedBits - wantedBits) / abrBuffer);<br>
+        qScale *= overflow;<br>
+    }<br>
     return qScale;<br>
 }<br>
<br>
@@ -2330,17 +2333,18 @@<br>
     return totalSatdBits + encodedBitsSoFar;<br>
 }<br>
<br>
-int RateControl::<wbr>rowVbvRateControl(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv)<br>
+int RateControl::<wbr>rowVbvRateControl(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv, uint32_t* m_sliceBaseRow, uint32_t sliceId)<br>
 {<br>
     FrameData& curEncData = *curFrame->m_encData;<br>
     double qScaleVbv = x265_qp2qScale(qpVbv);<br>
     uint64_t rowSatdCost = curEncData.m_rowStat[row].<wbr>rowSatd;<br>
     double encodedBits = curEncData.m_rowStat[row].<wbr>encodedBits;<br>
+    uint32_t rowInSlice = row - m_sliceBaseRow[sliceId];<br>
<br>
-    if (m_param->bEnableWavefront && row == 1)<br>
+    if (m_param->bEnableWavefront && rowInSlice == 1)<br>
     {<br>
-        rowSatdCost += curEncData.m_rowStat[0].<wbr>rowSatd;<br>
-        encodedBits += curEncData.m_rowStat[0].<wbr>encodedBits;<br>
+        rowSatdCost += curEncData.m_rowStat[row - 1].rowSatd;<br>
+        encodedBits += curEncData.m_rowStat[row - 1].encodedBits;<br>
     }<br>
     rowSatdCost >>= X265_DEPTH - 8;<br>
     updatePredictor(rce->rowPred[<wbr>0], qScaleVbv, (double)rowSatdCost, encodedBits);<br>
@@ -2350,8 +2354,8 @@<br>
         if (qpVbv < refFrame->m_encData->m_<wbr>rowStat[row].rowQp)<br>
         {<br>
             uint64_t intraRowSatdCost = curEncData.m_rowStat[row].<wbr>rowIntraSatd;<br>
-            if (m_param->bEnableWavefront && row == 1)<br>
-                intraRowSatdCost += curEncData.m_rowStat[0].<wbr>rowIntraSatd;<br>
+            if (m_param->bEnableWavefront && rowInSlice == 1)<br>
+                intraRowSatdCost += curEncData.m_rowStat[row - 1].rowIntraSatd;<br>
             intraRowSatdCost >>= X265_DEPTH - 8;<br>
             updatePredictor(rce->rowPred[<wbr>1], qScaleVbv, (double)intraRowSatdCost, encodedBits);<br>
         }<br>
@@ -2376,7 +2380,7 @@<br>
     const SPS& sps = *curEncData.m_slice->m_sps;<br>
     double maxFrameError = X265_MAX(0.05, 1.0 / sps.numCuInHeight);<br>
<br>
-    if (row < sps.numCuInHeight - 1)<br>
+    if (row < m_sliceBaseRow[sliceId + 1] - 1)<br>
     {<br>
         /* More threads means we have to be more cautious in letting ratecontrol use up extra bits. */<br>
         double rcTol = bufferLeftPlanned / m_param->frameNumThreads * m_rateTolerance;<br>
@@ -2693,8 +2697,8 @@<br>
             m_encodedBitsWindow[pos % s_slidingWindowFrames] = actualBits;<br>
         if(rce->sliceType != I_SLICE)<br>
         {<br>
-        int qp = int (rce->qpaRc + 0.5);<br>
-        m_qpToEncodedBits[qp] =  m_qpToEncodedBits[qp] == 0 ? actualBits : (m_qpToEncodedBits[qp] + actualBits) * 0.5;<br>
+            int qp = int (rce->qpaRc + 0.5);<br>
+            m_qpToEncodedBits[qp] =  m_qpToEncodedBits[qp] == 0 ? actualBits : (m_qpToEncodedBits[qp] + actualBits) * 0.5;<br>
         }<br>
         curFrame->m_rcData-><wbr>wantedBitsWindow = m_wantedBitsWindow;<br>
         curFrame->m_rcData->cplxrSum = m_cplxrSum;<br>
@@ -2779,7 +2783,8 @@<br>
             curFrame->m_encData->m_<wbr>frameStats.percent8x8Skip  * m_ncu) < 0)<br>
             goto writeFailure;<br>
     }<br>
-    else{<br>
+    else<br>
+    {<br>
         RPS* rpsWriter = &curFrame->m_encData->m_slice-<wbr>>m_rps;<br>
         int i, num = rpsWriter->numberOfPictures;<br>
         char deltaPOC[128];<br>
</div></div>diff -r e62b12bd8b45 -r c838e60c7c6b source/encoder/ratecontrol.h<br>
--- a/source/encoder/ratecontrol.h      Thu Jun 29 13:13:56 2017 +0530<br>
+++ b/source/encoder/ratecontrol.h      Fri Sep 22 20:20:58 2017 +0530<br>
<div class="HOEnZb"><div class="h5">@@ -244,7 +244,7 @@<br>
     int  rateControlStart(Frame* curFrame, RateControlEntry* rce, Encoder* enc);<br>
     void rateControlUpdateStats(<wbr>RateControlEntry* rce);<br>
     int  rateControlEnd(Frame* curFrame, int64_t bits, RateControlEntry* rce, int *filler);<br>
-    int  rowVbvRateControl(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv);<br>
+    int  rowVbvRateControl(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv, uint32_t* m_sliceBaseRow, uint32_t sliceId);<br>
     int  rateControlSliceType(int frameNum);<br>
     bool cuTreeReadFor2Pass(Frame* curFrame);<br>
     void hrdFullness(<wbr>SEIBufferingPeriod* sei);<br>
______________________________<wbr>_________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/<wbr>listinfo/x265-devel</a><br>
</div></div></blockquote></div><br></div></div>