[x265] [PATCH] Add VBV support for No-WPP

aruna at multicorewareinc.com aruna at multicorewareinc.com
Wed Oct 26 07:38:16 CEST 2016


# HG changeset patch
# User Aruna Matheswaran
# Date 1476352364 -19800
#      Thu Oct 13 15:22:44 2016 +0530
# Node ID f9e7422416c9d2d4f7b7618791a7c28592de4828
# Parent  bc911034c2a07380630aff98fdda38038b2ae62e
Add VBV support for No-WPP

diff -r bc911034c2a0 -r f9e7422416c9 source/common/bitstream.h
--- a/source/common/bitstream.h	Tue Oct 25 11:32:10 2016 +0530
+++ b/source/common/bitstream.h	Thu Oct 13 15:22:44 2016 +0530
@@ -71,6 +71,7 @@
     uint32_t getNumberOfWrittenBytes() const { return m_byteOccupancy; }
     uint32_t getNumberOfWrittenBits()  const { return m_byteOccupancy * 8 + m_partialByteBits; }
     const uint8_t* getFIFO() const           { return m_fifo; }
+    void     copyBits(Bitstream* stream)     { m_partialByteBits = stream->m_partialByteBits; m_byteOccupancy = stream->m_byteOccupancy; m_partialByte = stream->m_partialByte; }
 
     void     write(uint32_t val, uint32_t numBits);
     void     writeByte(uint32_t val);
diff -r bc911034c2a0 -r f9e7422416c9 source/common/framedata.h
--- a/source/common/framedata.h	Tue Oct 25 11:32:10 2016 +0530
+++ b/source/common/framedata.h	Thu Oct 13 15:22:44 2016 +0530
@@ -126,10 +126,10 @@
         uint32_t encodedBits;   /* sum of 'totalBits' of encoded CTUs */
         uint32_t satdForVbv;    /* sum of lowres (estimated) costs for entire row */
         uint32_t intraSatdForVbv; /* sum of lowres (estimated) intra costs for entire row */
-        uint32_t diagSatd;
-        uint32_t diagIntraSatd;
-        double   diagQp;
-        double   diagQpScale;
+        uint32_t rowSatd;
+        uint32_t rowIntraSatd;
+        double   rowQp;
+        double   rowQpScale;
         double   sumQpRc;
         double   sumQpAq;
     };
diff -r bc911034c2a0 -r f9e7422416c9 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Tue Oct 25 11:32:10 2016 +0530
+++ b/source/encoder/encoder.cpp	Thu Oct 13 15:22:44 2016 +0530
@@ -149,12 +149,6 @@
         p->bEnableWavefront = p->bDistributeModeAnalysis = p->bDistributeMotionEstimation = p->lookaheadSlices = 0;
     }
 
-    if (!p->bEnableWavefront && p->rc.vbvBufferSize)
-    {
-        x265_log(p, X265_LOG_ERROR, "VBV requires wavefront parallelism\n");
-        m_aborted = true;
-    }
-
     x265_log(p, X265_LOG_INFO, "Slices                              : %d\n", p->maxSlices);
 
     char buf[128];
diff -r bc911034c2a0 -r f9e7422416c9 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Tue Oct 25 11:32:10 2016 +0530
+++ b/source/encoder/frameencoder.cpp	Thu Oct 13 15:22:44 2016 +0530
@@ -50,6 +50,7 @@
     m_bAllRowsStop = false;
     m_vbvResetTriggerRow = -1;
     m_outStreams = NULL;
+    m_backupStreams = NULL;
     m_substreamSizes = NULL;
     m_nr = NULL;
     m_tld = NULL;
@@ -85,6 +86,7 @@
 
     delete[] m_rows;
     delete[] m_outStreams;
+    delete[] m_backupStreams;
     X265_FREE(m_sliceBaseRow);
     X265_FREE(m_cuGeoms);
     X265_FREE(m_ctuGeomMap);
@@ -532,6 +534,8 @@
     if (!m_outStreams)
     {
         m_outStreams = new Bitstream[numSubstreams];
+        if (!m_param->bEnableWavefront)
+            m_backupStreams = new Bitstream[numSubstreams];
         m_substreamSizes = X265_MALLOC(uint32_t, numSubstreams);
         if (!m_param->bEnableSAO)
             for (uint32_t i = 0; i < numSubstreams; i++)
@@ -1203,17 +1207,25 @@
 
         if (bIsVbv)
         {
-            if (!row)
+            if (col == 0 && !m_param->bEnableWavefront)
+            {
+                m_backupStreams[0].copyBits(&m_outStreams[0]);
+                curRow.bufferedEntropy.copyState(rowCoder);
+                curRow.bufferedEntropy.loadContexts(rowCoder);
+            }
+            if (!row && m_vbvResetTriggerRow != intRow)
             {
-                curEncData.m_rowStat[row].diagQp = curEncData.m_avgQpRc;
-                curEncData.m_rowStat[row].diagQpScale = x265_qp2qScale(curEncData.m_avgQpRc);
+                curEncData.m_rowStat[row].rowQp = curEncData.m_avgQpRc;
+                curEncData.m_rowStat[row].rowQpScale = x265_qp2qScale(curEncData.m_avgQpRc);
             }
 
             FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr];
-            if (row >= col && row && m_vbvResetTriggerRow != intRow)
-                cuStat.baseQp = curEncData.m_cuStat[cuAddr - numCols + 1].baseQp;
+            if (m_param->bEnableWavefront && row >= col && row && m_vbvResetTriggerRow != intRow)
+                cuStat.baseQp = curEncData.m_cuStat[cuAddr - numCols + 1].baseQp;
+            else if (!m_param->bEnableWavefront && row && m_vbvResetTriggerRow != intRow)
+                cuStat.baseQp = curEncData.m_rowStat[row - 1].rowQp;
             else
-                cuStat.baseQp = curEncData.m_rowStat[row].diagQp;
+                cuStat.baseQp = curEncData.m_rowStat[row].rowQp;
 
             /* TODO: use defines from slicetype.h for lowres block size */
             uint32_t block_y = (ctu->m_cuPelY >> g_maxLog2CUSize) * noOfBlocks;
@@ -1364,21 +1376,52 @@
         if (bIsVbv)
         {
             // Update encoded bits, satdCost, baseQP for each CU
-            curEncData.m_rowStat[row].diagSatd      += curEncData.m_cuStat[cuAddr].vbvCost;
-            curEncData.m_rowStat[row].diagIntraSatd += curEncData.m_cuStat[cuAddr].intraVbvCost;
+            curEncData.m_rowStat[row].rowSatd      += curEncData.m_cuStat[cuAddr].vbvCost;
+            curEncData.m_rowStat[row].rowIntraSatd += curEncData.m_cuStat[cuAddr].intraVbvCost;
             curEncData.m_rowStat[row].encodedBits   += curEncData.m_cuStat[cuAddr].totalBits;
             curEncData.m_rowStat[row].sumQpRc       += curEncData.m_cuStat[cuAddr].baseQp;
             curEncData.m_rowStat[row].numEncodedCUs = cuAddr;
 
+            // If current block is at row end checkpoint, call vbv ratecontrol.
+
+            if (!m_param->bEnableWavefront && col == numCols - 1)
+            {
+                double qpBase = curEncData.m_cuStat[cuAddr].baseQp;
+                int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame, row, &m_rce, qpBase);
+                qpBase = x265_clip3((double)m_param->rc.qpMin, (double)m_param->rc.qpMax, qpBase);
+                curEncData.m_rowStat[row].rowQp = qpBase;
+                curEncData.m_rowStat[row].rowQpScale = x265_qp2qScale(qpBase);
+                if (reEncode < 0)
+                {
+                    x265_log(m_param, X265_LOG_DEBUG, "POC %d row %d - encode restart required for VBV, to %.2f from %.2f\n",
+                        m_frame->m_poc, row, qpBase, curEncData.m_cuStat[cuAddr].baseQp);
+
+                    m_vbvResetTriggerRow = row;
+                    m_outStreams[0].copyBits(&m_backupStreams[0]);
+
+                    rowCoder.copyState(curRow.bufferedEntropy);
+                    rowCoder.loadContexts(curRow.bufferedEntropy);
+
+                    curRow.completed = 0;
+                    memset(&curRow.rowStats, 0, sizeof(curRow.rowStats));
+                    curEncData.m_rowStat[row].numEncodedCUs = 0;
+                    curEncData.m_rowStat[row].encodedBits = 0;
+                    curEncData.m_rowStat[row].rowSatd = 0;
+                    curEncData.m_rowStat[row].rowIntraSatd = 0;
+                    curEncData.m_rowStat[row].sumQpRc = 0;
+                    curEncData.m_rowStat[row].sumQpAq = 0;
+                }
+            }
+
             // If current block is at row diagonal checkpoint, call vbv ratecontrol.
 
-            if (row == col && row)
+            else if (m_param->bEnableWavefront && row == col && row)
             {
                 double qpBase = curEncData.m_cuStat[cuAddr].baseQp;
-                int reEncode = m_top->m_rateControl->rowDiagonalVbvRateControl(m_frame, row, &m_rce, qpBase);
+                int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame, row, &m_rce, qpBase);
                 qpBase = x265_clip3((double)m_param->rc.qpMin, (double)m_param->rc.qpMax, qpBase);
-                curEncData.m_rowStat[row].diagQp = qpBase;
-                curEncData.m_rowStat[row].diagQpScale =  x265_qp2qScale(qpBase);
+                curEncData.m_rowStat[row].rowQp = qpBase;
+                curEncData.m_rowStat[row].rowQpScale =  x265_qp2qScale(qpBase);
 
                 if (reEncode < 0)
                 {
@@ -1431,8 +1474,8 @@
                         memset(&stopRow.rowStats, 0, sizeof(stopRow.rowStats));
                         curEncData.m_rowStat[r].numEncodedCUs = 0;
                         curEncData.m_rowStat[r].encodedBits = 0;
-                        curEncData.m_rowStat[r].diagSatd = 0;
-                        curEncData.m_rowStat[r].diagIntraSatd = 0;
+                        curEncData.m_rowStat[r].rowSatd = 0;
+                        curEncData.m_rowStat[r].rowIntraSatd = 0;
                         curEncData.m_rowStat[r].sumQpRc = 0;
                         curEncData.m_rowStat[r].sumQpAq = 0;
                     }
diff -r bc911034c2a0 -r f9e7422416c9 source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h	Tue Oct 25 11:32:10 2016 +0530
+++ b/source/encoder/frameencoder.h	Thu Oct 13 15:22:44 2016 +0530
@@ -184,6 +184,7 @@
     NoiseReduction*          m_nr;
     ThreadLocalData*         m_tld; /* for --no-wpp */
     Bitstream*               m_outStreams;
+    Bitstream*               m_backupStreams;
     uint32_t*                m_substreamSizes;
 
     CUGeom*                  m_cuGeoms;
diff -r bc911034c2a0 -r f9e7422416c9 source/encoder/ratecontrol.cpp
--- a/source/encoder/ratecontrol.cpp	Tue Oct 25 11:32:10 2016 +0530
+++ b/source/encoder/ratecontrol.cpp	Thu Oct 13 15:22:44 2016 +0530
@@ -2180,7 +2180,7 @@
     for (uint32_t row = 0; row < maxRows; row++)
     {
         encodedBitsSoFar += curEncData.m_rowStat[row].encodedBits;
-        rowSatdCostSoFar = curEncData.m_rowStat[row].diagSatd;
+        rowSatdCostSoFar = curEncData.m_rowStat[row].rowSatd;
         uint32_t satdCostForPendingCus = curEncData.m_rowStat[row].satdForVbv - rowSatdCostSoFar;
         satdCostForPendingCus >>= X265_DEPTH - 8;
         if (satdCostForPendingCus  > 0)
@@ -2209,7 +2209,7 @@
                 }
 
                 refRowSatdCost >>= X265_DEPTH - 8;
-                refQScale = refEncData.m_rowStat[row].diagQpScale;
+                refQScale = refEncData.m_rowStat[row].rowQpScale;
             }
 
             if (picType == I_SLICE || qScale >= refQScale)
@@ -2231,7 +2231,7 @@
             }
             else if (picType == P_SLICE)
             {
-                intraCostForPendingCus = curEncData.m_rowStat[row].intraSatdForVbv - curEncData.m_rowStat[row].diagIntraSatd;
+                intraCostForPendingCus = curEncData.m_rowStat[row].intraSatdForVbv - curEncData.m_rowStat[row].rowIntraSatd;
                 intraCostForPendingCus >>= X265_DEPTH - 8;
                 /* Our QP is lower than the reference! */
                 double pred_intra = predictSize(rce->rowPred[1], qScale, intraCostForPendingCus);
@@ -2246,16 +2246,16 @@
     return totalSatdBits + encodedBitsSoFar;
 }
 
-int RateControl::rowDiagonalVbvRateControl(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv)
+int RateControl::rowVbvRateControl(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv)
 {
     FrameData& curEncData = *curFrame->m_encData;
     double qScaleVbv = x265_qp2qScale(qpVbv);
-    uint64_t rowSatdCost = curEncData.m_rowStat[row].diagSatd;
+    uint64_t rowSatdCost = curEncData.m_rowStat[row].rowSatd;
     double encodedBits = curEncData.m_rowStat[row].encodedBits;
 
-    if (row == 1)
+    if (m_param->bEnableWavefront && row == 1)
     {
-        rowSatdCost += curEncData.m_rowStat[0].diagSatd;
+        rowSatdCost += curEncData.m_rowStat[0].rowSatd;
         encodedBits += curEncData.m_rowStat[0].encodedBits;
     }
     rowSatdCost >>= X265_DEPTH - 8;
@@ -2263,11 +2263,11 @@
     if (curEncData.m_slice->m_sliceType != I_SLICE)
     {
         Frame* refFrame = curEncData.m_slice->m_refFrameList[0][0];
-        if (qpVbv < refFrame->m_encData->m_rowStat[row].diagQp)
+        if (qpVbv < refFrame->m_encData->m_rowStat[row].rowQp)
         {
-            uint64_t intraRowSatdCost = curEncData.m_rowStat[row].diagIntraSatd;
-            if (row == 1)
-                intraRowSatdCost += curEncData.m_rowStat[0].diagIntraSatd;
+            uint64_t intraRowSatdCost = curEncData.m_rowStat[row].rowIntraSatd;
+            if (m_param->bEnableWavefront && row == 1)
+                intraRowSatdCost += curEncData.m_rowStat[0].rowIntraSatd;
             intraRowSatdCost >>= X265_DEPTH - 8;
             updatePredictor(rce->rowPred[1], qScaleVbv, (double)intraRowSatdCost, encodedBits);
         }
@@ -2328,7 +2328,7 @@
         }
 
         while (qpVbv > qpMin
-               && (qpVbv > curEncData.m_rowStat[0].diagQp || m_singleFrameVbv)
+               && (qpVbv > curEncData.m_rowStat[0].rowQp || m_singleFrameVbv)
                && (((accFrameBits < rce->frameSizePlanned * 0.8f && qpVbv <= prevRowQp)
                    || accFrameBits < (rce->bufferFill - m_bufferSize + m_bufferRate) * 1.1)
                    && (!m_param->rc.bStrictCbr ? 1 : abrOvershoot < 0)))
@@ -2348,7 +2348,7 @@
                 accFrameBits = predictRowsSizeSum(curFrame, rce, qpVbv, encodedBitsSoFar);
                 abrOvershoot = (accFrameBits + m_totalBits - m_wantedBitsWindow) / totalBitsNeeded;
             }
-            if (qpVbv > curEncData.m_rowStat[0].diagQp &&
+            if (qpVbv > curEncData.m_rowStat[0].rowQp &&
                 abrOvershoot < -0.1 && timeDone > 0.5 && accFrameBits < rce->frameSizePlanned - rcTol)
             {
                 qpVbv -= stepSize;
diff -r bc911034c2a0 -r f9e7422416c9 source/encoder/ratecontrol.h
--- a/source/encoder/ratecontrol.h	Tue Oct 25 11:32:10 2016 +0530
+++ b/source/encoder/ratecontrol.h	Thu Oct 13 15:22:44 2016 +0530
@@ -243,7 +243,7 @@
     int  rateControlStart(Frame* curFrame, RateControlEntry* rce, Encoder* enc);
     void rateControlUpdateStats(RateControlEntry* rce);
     int  rateControlEnd(Frame* curFrame, int64_t bits, RateControlEntry* rce);
-    int  rowDiagonalVbvRateControl(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv);
+    int  rowVbvRateControl(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv);
     int  rateControlSliceType(int frameNum);
     bool cuTreeReadFor2Pass(Frame* curFrame);
     void hrdFullness(SEIBufferingPeriod* sei);
diff -r bc911034c2a0 -r f9e7422416c9 source/test/rate-control-tests.txt
--- a/source/test/rate-control-tests.txt	Tue Oct 25 11:32:10 2016 +0530
+++ b/source/test/rate-control-tests.txt	Thu Oct 13 15:22:44 2016 +0530
@@ -21,6 +21,9 @@
 big_buck_bunny_360p24.y4m,--preset medium --bitrate 400 --vbv-bufsize 600 --vbv-maxrate 600 --aud --hrd --tune fast-decode
 sita_1920x1080_30.yuv,--preset superfast --crf 25 --vbv-bufsize 3000 --vbv-maxrate 4000 --vbv-bufsize 5000 --hrd  --crf-max 30
 sita_1920x1080_30.yuv,--preset superfast --bitrate 3000 --vbv-bufsize 3000 --vbv-maxrate 3000 --aud --strict-cbr
+BasketballDrive_1920x1080_50.y4m,--preset ultrafast --bitrate 3000 --vbv-bufsize 3000 --vbv-maxrate 3000 --no-wpp
+big_buck_bunny_360p24.y4m,--preset medium --bitrate 400 --vbv-bufsize 600 --vbv-maxrate 600 --no-wpp --aud --hrd --tune fast-decode
+sita_1920x1080_30.yuv,--preset superfast --bitrate 3000 --vbv-bufsize 3000 --vbv-maxrate 3000 --aud --strict-cbr --no-wpp
 
 
 


More information about the x265-devel mailing list