<div dir="ltr"><div class="gmail_extra"><br><div class="gmail_quote">On Wed, Oct 26, 2016 at 11:08 AM,  <span dir="ltr"><<a href="mailto:aruna@multicorewareinc.com" target="_blank">aruna@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Aruna Matheswaran<br>
# Date 1476352364 -19800<br>
#      Thu Oct 13 15:22:44 2016 +0530<br>
# Node ID f9e7422416c9d2d4f7b7618791a7c2<wbr>8592de4828<br>
# Parent  bc911034c2a07380630aff98fdda38<wbr>038b2ae62e<br>
Add VBV support for No-WPP<br></blockquote><div><br></div><div>Can you please share what improvement to encoding efficiency you see on command-lines with VBV with and without wpp?</div><div><br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<br>
diff -r bc911034c2a0 -r f9e7422416c9 source/common/bitstream.h<br>
--- a/source/common/bitstream.h Tue Oct 25 11:32:10 2016 +0530<br>
+++ b/source/common/bitstream.h Thu Oct 13 15:22:44 2016 +0530<br>
@@ -71,6 +71,7 @@<br>
     uint32_t getNumberOfWrittenBytes() const { return m_byteOccupancy; }<br>
     uint32_t getNumberOfWrittenBits()  const { return m_byteOccupancy * 8 + m_partialByteBits; }<br>
     const uint8_t* getFIFO() const           { return m_fifo; }<br>
+    void     copyBits(Bitstream* stream)     { m_partialByteBits = stream->m_partialByteBits; m_byteOccupancy = stream->m_byteOccupancy; m_partialByte = stream->m_partialByte; }<br>
<br>
     void     write(uint32_t val, uint32_t numBits);<br>
     void     writeByte(uint32_t val);<br>
diff -r bc911034c2a0 -r f9e7422416c9 source/common/framedata.h<br>
--- a/source/common/framedata.h Tue Oct 25 11:32:10 2016 +0530<br>
+++ b/source/common/framedata.h Thu Oct 13 15:22:44 2016 +0530<br>
@@ -126,10 +126,10 @@<br>
         uint32_t encodedBits;   /* sum of 'totalBits' of encoded CTUs */<br>
         uint32_t satdForVbv;    /* sum of lowres (estimated) costs for entire row */<br>
         uint32_t intraSatdForVbv; /* sum of lowres (estimated) intra costs for entire row */<br>
-        uint32_t diagSatd;<br>
-        uint32_t diagIntraSatd;<br>
-        double   diagQp;<br>
-        double   diagQpScale;<br>
+        uint32_t rowSatd;<br>
+        uint32_t rowIntraSatd;<br>
+        double   rowQp;<br>
+        double   rowQpScale;<br>
         double   sumQpRc;<br>
         double   sumQpAq;<br>
     };<br>
diff -r bc911034c2a0 -r f9e7422416c9 source/encoder/encoder.cpp<br>
--- a/source/encoder/encoder.cpp        Tue Oct 25 11:32:10 2016 +0530<br>
+++ b/source/encoder/encoder.cpp        Thu Oct 13 15:22:44 2016 +0530<br>
@@ -149,12 +149,6 @@<br>
         p->bEnableWavefront = p->bDistributeModeAnalysis = p->bDistributeMotionEstimation = p->lookaheadSlices = 0;<br>
     }<br>
<br>
-    if (!p->bEnableWavefront && p->rc.vbvBufferSize)<br>
-    {<br>
-        x265_log(p, X265_LOG_ERROR, "VBV requires wavefront parallelism\n");<br>
-        m_aborted = true;<br>
-    }<br>
-<br>
     x265_log(p, X265_LOG_INFO, "Slices                              : %d\n", p->maxSlices);<br>
<br>
     char buf[128];<br>
diff -r bc911034c2a0 -r f9e7422416c9 source/encoder/frameencoder.<wbr>cpp<br>
--- a/source/encoder/frameencoder.<wbr>cpp   Tue Oct 25 11:32:10 2016 +0530<br>
+++ b/source/encoder/frameencoder.<wbr>cpp   Thu Oct 13 15:22:44 2016 +0530<br>
@@ -50,6 +50,7 @@<br>
     m_bAllRowsStop = false;<br>
     m_vbvResetTriggerRow = -1;<br>
     m_outStreams = NULL;<br>
+    m_backupStreams = NULL;<br>
     m_substreamSizes = NULL;<br>
     m_nr = NULL;<br>
     m_tld = NULL;<br>
@@ -85,6 +86,7 @@<br>
<br>
     delete[] m_rows;<br>
     delete[] m_outStreams;<br>
+    delete[] m_backupStreams;<br>
     X265_FREE(m_sliceBaseRow);<br>
     X265_FREE(m_cuGeoms);<br>
     X265_FREE(m_ctuGeomMap);<br>
@@ -532,6 +534,8 @@<br>
     if (!m_outStreams)<br>
     {<br>
         m_outStreams = new Bitstream[numSubstreams];<br>
+        if (!m_param->bEnableWavefront)<br>
+            m_backupStreams = new Bitstream[numSubstreams];<br>
         m_substreamSizes = X265_MALLOC(uint32_t, numSubstreams);<br>
         if (!m_param->bEnableSAO)<br>
             for (uint32_t i = 0; i < numSubstreams; i++)<br>
@@ -1203,17 +1207,25 @@<br>
<br>
         if (bIsVbv)<br>
         {<br>
-            if (!row)<br>
+            if (col == 0 && !m_param->bEnableWavefront)<br>
+            {<br>
+                m_backupStreams[0].copyBits(&<wbr>m_outStreams[0]);<br>
+                curRow.bufferedEntropy.<wbr>copyState(rowCoder);<br>
+                curRow.bufferedEntropy.<wbr>loadContexts(rowCoder);<br>
+            }<br>
+            if (!row && m_vbvResetTriggerRow != intRow)<br>
             {<br>
-                curEncData.m_rowStat[row].<wbr>diagQp = curEncData.m_avgQpRc;<br>
-                curEncData.m_rowStat[row].<wbr>diagQpScale = x265_qp2qScale(curEncData.m_<wbr>avgQpRc);<br>
+                curEncData.m_rowStat[row].<wbr>rowQp = curEncData.m_avgQpRc;<br>
+                curEncData.m_rowStat[row].<wbr>rowQpScale = x265_qp2qScale(curEncData.m_<wbr>avgQpRc);<br>
             }<br>
<br>
             FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr];<br>
-            if (row >= col && row && m_vbvResetTriggerRow != intRow)<br>
-                cuStat.baseQp = curEncData.m_cuStat[cuAddr - numCols + 1].baseQp;<br>
+            if (m_param->bEnableWavefront && row >= col && row && m_vbvResetTriggerRow != intRow)<br>
+                cuStat.baseQp = curEncData.m_cuStat[cuAddr - numCols + 1].baseQp;<br>
+            else if (!m_param->bEnableWavefront && row && m_vbvResetTriggerRow != intRow)<br>
+                cuStat.baseQp = curEncData.m_rowStat[row - 1].rowQp;<br>
             else<br>
-                cuStat.baseQp = curEncData.m_rowStat[row].<wbr>diagQp;<br>
+                cuStat.baseQp = curEncData.m_rowStat[row].<wbr>rowQp;<br>
<br>
             /* TODO: use defines from slicetype.h for lowres block size */<br>
             uint32_t block_y = (ctu->m_cuPelY >> g_maxLog2CUSize) * noOfBlocks;<br>
@@ -1364,21 +1376,52 @@<br>
         if (bIsVbv)<br>
         {<br>
             // Update encoded bits, satdCost, baseQP for each CU<br>
-            curEncData.m_rowStat[row].<wbr>diagSatd      += curEncData.m_cuStat[cuAddr].<wbr>vbvCost;<br>
-            curEncData.m_rowStat[row].<wbr>diagIntraSatd += curEncData.m_cuStat[cuAddr].<wbr>intraVbvCost;<br>
+            curEncData.m_rowStat[row].<wbr>rowSatd      += curEncData.m_cuStat[cuAddr].<wbr>vbvCost;<br>
+            curEncData.m_rowStat[row].<wbr>rowIntraSatd += curEncData.m_cuStat[cuAddr].<wbr>intraVbvCost;<br>
             curEncData.m_rowStat[row].<wbr>encodedBits   += curEncData.m_cuStat[cuAddr].<wbr>totalBits;<br>
             curEncData.m_rowStat[row].<wbr>sumQpRc       += curEncData.m_cuStat[cuAddr].<wbr>baseQp;<br>
             curEncData.m_rowStat[row].<wbr>numEncodedCUs = cuAddr;<br>
<br>
+            // If current block is at row end checkpoint, call vbv ratecontrol.<br>
+<br>
+            if (!m_param->bEnableWavefront && col == numCols - 1)<br>
+            {<br>
+                double qpBase = curEncData.m_cuStat[cuAddr].<wbr>baseQp;<br>
+                int reEncode = m_top->m_rateControl-><wbr>rowVbvRateControl(m_frame, row, &m_rce, qpBase);<br>
+                qpBase = x265_clip3((double)m_param-><wbr>rc.qpMin, (double)m_param->rc.qpMax, qpBase);<br>
+                curEncData.m_rowStat[row].<wbr>rowQp = qpBase;<br>
+                curEncData.m_rowStat[row].<wbr>rowQpScale = x265_qp2qScale(qpBase);<br>
+                if (reEncode < 0)<br>
+                {<br>
+                    x265_log(m_param, X265_LOG_DEBUG, "POC %d row %d - encode restart required for VBV, to %.2f from %.2f\n",<br>
+                        m_frame->m_poc, row, qpBase, curEncData.m_cuStat[cuAddr].<wbr>baseQp);<br>
+<br>
+                    m_vbvResetTriggerRow = row;<br>
+                    m_outStreams[0].copyBits(&m_<wbr>backupStreams[0]);<br>
+<br>
+                    rowCoder.copyState(curRow.<wbr>bufferedEntropy);<br>
+                    rowCoder.loadContexts(curRow.<wbr>bufferedEntropy);<br>
+<br>
+                    curRow.completed = 0;<br>
+                    memset(&curRow.rowStats, 0, sizeof(curRow.rowStats));<br>
+                    curEncData.m_rowStat[row].<wbr>numEncodedCUs = 0;<br>
+                    curEncData.m_rowStat[row].<wbr>encodedBits = 0;<br>
+                    curEncData.m_rowStat[row].<wbr>rowSatd = 0;<br>
+                    curEncData.m_rowStat[row].<wbr>rowIntraSatd = 0;<br>
+                    curEncData.m_rowStat[row].<wbr>sumQpRc = 0;<br>
+                    curEncData.m_rowStat[row].<wbr>sumQpAq = 0;<br>
+                }<br>
+            }<br>
+<br>
             // If current block is at row diagonal checkpoint, call vbv ratecontrol.<br>
<br>
-            if (row == col && row)<br>
+            else if (m_param->bEnableWavefront && row == col && row)<br>
             {<br>
                 double qpBase = curEncData.m_cuStat[cuAddr].<wbr>baseQp;<br>
-                int reEncode = m_top->m_rateControl-><wbr>rowDiagonalVbvRateControl(m_<wbr>frame, row, &m_rce, qpBase);<br>
+                int reEncode = m_top->m_rateControl-><wbr>rowVbvRateControl(m_frame, row, &m_rce, qpBase);<br>
                 qpBase = x265_clip3((double)m_param-><wbr>rc.qpMin, (double)m_param->rc.qpMax, qpBase);<br>
-                curEncData.m_rowStat[row].<wbr>diagQp = qpBase;<br>
-                curEncData.m_rowStat[row].<wbr>diagQpScale =  x265_qp2qScale(qpBase);<br>
+                curEncData.m_rowStat[row].<wbr>rowQp = qpBase;<br>
+                curEncData.m_rowStat[row].<wbr>rowQpScale =  x265_qp2qScale(qpBase);<br>
<br>
                 if (reEncode < 0)<br>
                 {<br>
@@ -1431,8 +1474,8 @@<br>
                         memset(&stopRow.rowStats, 0, sizeof(stopRow.rowStats));<br>
                         curEncData.m_rowStat[r].<wbr>numEncodedCUs = 0;<br>
                         curEncData.m_rowStat[r].<wbr>encodedBits = 0;<br>
-                        curEncData.m_rowStat[r].<wbr>diagSatd = 0;<br>
-                        curEncData.m_rowStat[r].<wbr>diagIntraSatd = 0;<br>
+                        curEncData.m_rowStat[r].<wbr>rowSatd = 0;<br>
+                        curEncData.m_rowStat[r].<wbr>rowIntraSatd = 0;<br>
                         curEncData.m_rowStat[r].<wbr>sumQpRc = 0;<br>
                         curEncData.m_rowStat[r].<wbr>sumQpAq = 0;<br>
                     }<br>
diff -r bc911034c2a0 -r f9e7422416c9 source/encoder/frameencoder.h<br>
--- a/source/encoder/frameencoder.<wbr>h     Tue Oct 25 11:32:10 2016 +0530<br>
+++ b/source/encoder/frameencoder.<wbr>h     Thu Oct 13 15:22:44 2016 +0530<br>
@@ -184,6 +184,7 @@<br>
     NoiseReduction*          m_nr;<br>
     ThreadLocalData*         m_tld; /* for --no-wpp */<br>
     Bitstream*               m_outStreams;<br>
+    Bitstream*               m_backupStreams;<br>
     uint32_t*                m_substreamSizes;<br>
<br>
     CUGeom*                  m_cuGeoms;<br>
diff -r bc911034c2a0 -r f9e7422416c9 source/encoder/ratecontrol.cpp<br>
--- a/source/encoder/ratecontrol.<wbr>cpp    Tue Oct 25 11:32:10 2016 +0530<br>
+++ b/source/encoder/ratecontrol.<wbr>cpp    Thu Oct 13 15:22:44 2016 +0530<br>
@@ -2180,7 +2180,7 @@<br>
     for (uint32_t row = 0; row < maxRows; row++)<br>
     {<br>
         encodedBitsSoFar += curEncData.m_rowStat[row].<wbr>encodedBits;<br>
-        rowSatdCostSoFar = curEncData.m_rowStat[row].<wbr>diagSatd;<br>
+        rowSatdCostSoFar = curEncData.m_rowStat[row].<wbr>rowSatd;<br>
         uint32_t satdCostForPendingCus = curEncData.m_rowStat[row].<wbr>satdForVbv - rowSatdCostSoFar;<br>
         satdCostForPendingCus >>= X265_DEPTH - 8;<br>
         if (satdCostForPendingCus  > 0)<br>
@@ -2209,7 +2209,7 @@<br>
                 }<br>
<br>
                 refRowSatdCost >>= X265_DEPTH - 8;<br>
-                refQScale = refEncData.m_rowStat[row].<wbr>diagQpScale;<br>
+                refQScale = refEncData.m_rowStat[row].<wbr>rowQpScale;<br>
             }<br>
<br>
             if (picType == I_SLICE || qScale >= refQScale)<br>
@@ -2231,7 +2231,7 @@<br>
             }<br>
             else if (picType == P_SLICE)<br>
             {<br>
-                intraCostForPendingCus = curEncData.m_rowStat[row].<wbr>intraSatdForVbv - curEncData.m_rowStat[row].<wbr>diagIntraSatd;<br>
+                intraCostForPendingCus = curEncData.m_rowStat[row].<wbr>intraSatdForVbv - curEncData.m_rowStat[row].<wbr>rowIntraSatd;<br>
                 intraCostForPendingCus >>= X265_DEPTH - 8;<br>
                 /* Our QP is lower than the reference! */<br>
                 double pred_intra = predictSize(rce->rowPred[1], qScale, intraCostForPendingCus);<br>
@@ -2246,16 +2246,16 @@<br>
     return totalSatdBits + encodedBitsSoFar;<br>
 }<br>
<br>
-int RateControl::<wbr>rowDiagonalVbvRateControl(<wbr>Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv)<br>
+int RateControl::<wbr>rowVbvRateControl(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv)<br>
 {<br>
     FrameData& curEncData = *curFrame->m_encData;<br>
     double qScaleVbv = x265_qp2qScale(qpVbv);<br>
-    uint64_t rowSatdCost = curEncData.m_rowStat[row].<wbr>diagSatd;<br>
+    uint64_t rowSatdCost = curEncData.m_rowStat[row].<wbr>rowSatd;<br>
     double encodedBits = curEncData.m_rowStat[row].<wbr>encodedBits;<br>
<br>
-    if (row == 1)<br>
+    if (m_param->bEnableWavefront && row == 1)<br>
     {<br>
-        rowSatdCost += curEncData.m_rowStat[0].<wbr>diagSatd;<br>
+        rowSatdCost += curEncData.m_rowStat[0].<wbr>rowSatd;<br>
         encodedBits += curEncData.m_rowStat[0].<wbr>encodedBits;<br>
     }<br>
     rowSatdCost >>= X265_DEPTH - 8;<br>
@@ -2263,11 +2263,11 @@<br>
     if (curEncData.m_slice->m_<wbr>sliceType != I_SLICE)<br>
     {<br>
         Frame* refFrame = curEncData.m_slice->m_<wbr>refFrameList[0][0];<br>
-        if (qpVbv < refFrame->m_encData->m_<wbr>rowStat[row].diagQp)<br>
+        if (qpVbv < refFrame->m_encData->m_<wbr>rowStat[row].rowQp)<br>
         {<br>
-            uint64_t intraRowSatdCost = curEncData.m_rowStat[row].<wbr>diagIntraSatd;<br>
-            if (row == 1)<br>
-                intraRowSatdCost += curEncData.m_rowStat[0].<wbr>diagIntraSatd;<br>
+            uint64_t intraRowSatdCost = curEncData.m_rowStat[row].<wbr>rowIntraSatd;<br>
+            if (m_param->bEnableWavefront && row == 1)<br>
+                intraRowSatdCost += curEncData.m_rowStat[0].<wbr>rowIntraSatd;<br>
             intraRowSatdCost >>= X265_DEPTH - 8;<br>
             updatePredictor(rce->rowPred[<wbr>1], qScaleVbv, (double)intraRowSatdCost, encodedBits);<br>
         }<br>
@@ -2328,7 +2328,7 @@<br>
         }<br>
<br>
         while (qpVbv > qpMin<br>
-               && (qpVbv > curEncData.m_rowStat[0].diagQp || m_singleFrameVbv)<br>
+               && (qpVbv > curEncData.m_rowStat[0].rowQp || m_singleFrameVbv)<br>
                && (((accFrameBits < rce->frameSizePlanned * 0.8f && qpVbv <= prevRowQp)<br>
                    || accFrameBits < (rce->bufferFill - m_bufferSize + m_bufferRate) * 1.1)<br>
                    && (!m_param->rc.bStrictCbr ? 1 : abrOvershoot < 0)))<br>
@@ -2348,7 +2348,7 @@<br>
                 accFrameBits = predictRowsSizeSum(curFrame, rce, qpVbv, encodedBitsSoFar);<br>
                 abrOvershoot = (accFrameBits + m_totalBits - m_wantedBitsWindow) / totalBitsNeeded;<br>
             }<br>
-            if (qpVbv > curEncData.m_rowStat[0].diagQp &&<br>
+            if (qpVbv > curEncData.m_rowStat[0].rowQp &&<br>
                 abrOvershoot < -0.1 && timeDone > 0.5 && accFrameBits < rce->frameSizePlanned - rcTol)<br>
             {<br>
                 qpVbv -= stepSize;<br>
diff -r bc911034c2a0 -r f9e7422416c9 source/encoder/ratecontrol.h<br>
--- a/source/encoder/ratecontrol.h      Tue Oct 25 11:32:10 2016 +0530<br>
+++ b/source/encoder/ratecontrol.h      Thu Oct 13 15:22:44 2016 +0530<br>
@@ -243,7 +243,7 @@<br>
     int  rateControlStart(Frame* curFrame, RateControlEntry* rce, Encoder* enc);<br>
     void rateControlUpdateStats(<wbr>RateControlEntry* rce);<br>
     int  rateControlEnd(Frame* curFrame, int64_t bits, RateControlEntry* rce);<br>
-    int  rowDiagonalVbvRateControl(<wbr>Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv);<br>
+    int  rowVbvRateControl(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv);<br>
     int  rateControlSliceType(int frameNum);<br>
     bool cuTreeReadFor2Pass(Frame* curFrame);<br>
     void hrdFullness(<wbr>SEIBufferingPeriod* sei);<br>
diff -r bc911034c2a0 -r f9e7422416c9 source/test/rate-control-<wbr>tests.txt<br>
--- a/source/test/rate-control-<wbr>tests.txt        Tue Oct 25 11:32:10 2016 +0530<br>
+++ b/source/test/rate-control-<wbr>tests.txt        Thu Oct 13 15:22:44 2016 +0530<br>
@@ -21,6 +21,9 @@<br>
 big_buck_bunny_360p24.y4m,--<wbr>preset medium --bitrate 400 --vbv-bufsize 600 --vbv-maxrate 600 --aud --hrd --tune fast-decode<br>
 sita_1920x1080_30.yuv,--preset superfast --crf 25 --vbv-bufsize 3000 --vbv-maxrate 4000 --vbv-bufsize 5000 --hrd  --crf-max 30<br>
 sita_1920x1080_30.yuv,--preset superfast --bitrate 3000 --vbv-bufsize 3000 --vbv-maxrate 3000 --aud --strict-cbr<br>
+BasketballDrive_1920x1080_50.<wbr>y4m,--preset ultrafast --bitrate 3000 --vbv-bufsize 3000 --vbv-maxrate 3000 --no-wpp<br>
+big_buck_bunny_360p24.y4m,--<wbr>preset medium --bitrate 400 --vbv-bufsize 600 --vbv-maxrate 600 --no-wpp --aud --hrd --tune fast-decode<br>
+sita_1920x1080_30.yuv,--<wbr>preset superfast --bitrate 3000 --vbv-bufsize 3000 --vbv-maxrate 3000 --aud --strict-cbr --no-wpp<br>
<br>
<br>
<br>
______________________________<wbr>_________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/<wbr>listinfo/x265-devel</a><br>
</blockquote></div><br></div></div>