[x265] [PATCH RFC] rc: update ratecontrol stats in every frame, avoid frame parallelism lag in abr

santhoshini at multicorewareinc.com santhoshini at multicorewareinc.com
Fri Jul 18 07:25:46 CEST 2014


# HG changeset patch
# User Santhoshini Sekar <santhoshini at multicorewareinc.com>
# Date 1405661006 -19800
#      Fri Jul 18 10:53:26 2014 +0530
# Node ID 150a9b81cf871a3229116f560d8d2f5ad0fc7aac
# Parent  93ab6ed75b01449b7cbfbec518d6974134291852
rc: update ratecontrol stats in every frame, avoid frame parallelism lag in abr

RateControl statistics are updated for every frame when refLagRows number of
rows are completed in processRowEncoder. With this updated data rateControl
predicts more accurate QP

diff -r 93ab6ed75b01 -r 150a9b81cf87 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Thu Jul 17 20:54:19 2014 -0500
+++ b/source/encoder/analysis.cpp	Fri Jul 18 10:53:26 2014 +0530
@@ -66,7 +66,7 @@
 
     m_rdCost.setPsyRdScale(m_param->psyRd);
     m_bEnableRDOQ = top->m_bEnableRDOQ;
-    m_bFrameParallel = top->m_totalFrameThreads > 1;
+    m_bFrameParallel = m_param->frameNumThreads > 1;
     m_numLayers = top->m_quadtreeTULog2MaxSize - top->m_quadtreeTULog2MinSize + 1;
 
     return initSearch();
diff -r 93ab6ed75b01 -r 150a9b81cf87 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Thu Jul 17 20:54:19 2014 -0500
+++ b/source/encoder/encoder.cpp	Fri Jul 18 10:53:26 2014 +0530
@@ -157,7 +157,7 @@
 
     if (m_frameEncoder)
     {
-        for (int i = 0; i < m_totalFrameThreads; i++)
+        for (int i = 0; i < m_param->frameNumThreads; i++)
         {
             // Ensure frame encoder is idle before destroying it
             m_frameEncoder[i].getEncodedPicture(m_nalList);
@@ -310,20 +310,6 @@
     else
         m_lookahead->flush();
 
-    if (m_param->rc.rateControlMode == X265_RC_ABR)
-    {
-        // delay frame parallelism for non-VBV ABR
-        if (m_pocLast == 0 && !m_param->rc.vbvBufferSize && !m_param->rc.vbvMaxBitrate)
-            m_param->frameNumThreads = 1;
-        else if (m_param->frameNumThreads != m_totalFrameThreads)
-        {
-            // re-enable frame parallelism after the first few P frames are encoded
-            uint32_t frameCnt = (uint32_t)((0.5 * m_param->fpsNum / m_param->fpsDenom) / (m_param->bframes + 1));
-            if (m_analyzeP.m_numPics > frameCnt)
-                m_param->frameNumThreads = m_totalFrameThreads;
-        }
-    }
-
     FrameEncoder *curEncoder = &m_frameEncoder[m_curEncoder];
     m_curEncoder = (m_curEncoder + 1) % m_param->frameNumThreads;
     int ret = 0;
@@ -392,26 +378,11 @@
             if (bChroma)
                 m_numChromaWPBiFrames++;
         }
-
-        uint64_t bytes = 0;
-        for (uint32_t i = 0; i < m_nalList.m_numNal; i++)
+        if (m_aborted == true)
         {
-            int type = m_nalList.m_nal[i].type;
-
-            // exclude SEI
-            if (type != NAL_UNIT_PREFIX_SEI && type != NAL_UNIT_SUFFIX_SEI)
-            {
-                bytes += m_nalList.m_nal[i].sizeBytes;
-                // and exclude start code prefix
-                bytes -= (!i || type == NAL_UNIT_SPS || type == NAL_UNIT_PPS) ? 4 : 3;
-            }
-        }
-        if (m_rateControl->rateControlEnd(out, bytes << 3, &curEncoder->m_rce, &curEncoder->m_frameStats) < 0)
-        {
-            m_aborted = true;
             return -1;
         }
-        finishFrameStats(out, curEncoder, bytes << 3);
+        finishFrameStats(out, curEncoder, curEncoder->m_accessUnitBits);
         // Allow this frame to be recycled if no frame encoders are using it for reference
         if (!pic_out)
         {
@@ -466,12 +437,17 @@
         // determine references, setup RPS, etc
         m_dpb->prepareEncode(fenc);
 
-        // set slice QP
-        m_rateControl->rateControlStart(fenc, m_lookahead, &curEncoder->m_rce, this);
 
         // Allow FrameEncoder::compressFrame() to start in a worker thread
         curEncoder->m_enable.trigger();
     }
+    else if (!fenc && m_encodedFrameNum > 0)
+    {
+        // faked rateControlStart calls to avoid rateControlEnd of last frameNumThreads parallel frames from waiting
+        RateControlEntry rce;
+        rce.encodeOrder = m_encodedFrameNum++;
+        m_rateControl->rateControlStart(NULL, m_lookahead, &rce, this);
+    }
 
     return ret;
 }
@@ -1268,7 +1244,6 @@
     {
         x265_log(p, X265_LOG_INFO, "Warning: picture-based SAO used with frame parallelism\n");
     }
-    m_totalFrameThreads = m_param->frameNumThreads;
 
     if (p->keyframeMax < 0)
     {
diff -r 93ab6ed75b01 -r 150a9b81cf87 source/encoder/encoder.h
--- a/source/encoder/encoder.h	Thu Jul 17 20:54:19 2014 -0500
+++ b/source/encoder/encoder.h	Fri Jul 18 10:53:26 2014 +0530
@@ -73,7 +73,6 @@
 {
 private:
 
-    bool               m_aborted;          // fatal error detected
     int                m_pocLast;          ///< time index (POC)
     int                m_encodedFrameNum;
     int                m_outputCount;
@@ -84,7 +83,6 @@
     int64_t            m_prevReorderedPts[2];
 
     ThreadPool*        m_threadPool;
-    Lookahead*         m_lookahead;
     FrameEncoder*      m_frameEncoder;
     DPB*               m_dpb;
 
@@ -117,6 +115,9 @@
     ProfileTierLevel   m_ptl;
     TComScalingList    m_scalingList;      // quantization matrix information
 
+    Lookahead*         m_lookahead;
+
+    bool               m_aborted;         // fatal error detected
 
     /* profile & level */
     Profile::Name      m_profile;
@@ -145,8 +146,6 @@
     Window             m_conformanceWindow;
     Window             m_defaultDisplayWindow;
 
-    int                m_totalFrameThreads;
-
     uint32_t           m_numDelayedPic;
 
     Encoder();
diff -r 93ab6ed75b01 -r 150a9b81cf87 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Thu Jul 17 20:54:19 2014 -0500
+++ b/source/encoder/frameencoder.cpp	Fri Jul 18 10:53:26 2014 +0530
@@ -314,24 +314,6 @@
         m_nalList.serialize(NAL_UNIT_PREFIX_SEI, m_bs);
     }
 
-    int qp = slice->m_sliceQp;
-
-    int chromaQPOffset = slice->m_pps->chromaCbQpOffset;
-    int qpCb = Clip3(0, MAX_MAX_QP, qp + chromaQPOffset);
-    
-    double lambda = x265_lambda2_tab[qp];
-    /* Assuming qpCb and qpCr are the same, since SAO takes only a single chroma lambda. TODO: Check why */
-    double chromaLambda = x265_lambda2_tab[qpCb];
-
-    // NOTE: set SAO lambda every Frame
-    m_frameFilter.m_sao.lumaLambda = lambda;
-    m_frameFilter.m_sao.chromaLambda = chromaLambda;
-
-    // Clip qps back to 0-51 range before encoding
-    qp = Clip3(-QP_BD_OFFSET, MAX_QP, qp);
-    slice->m_sliceQp = qp;
-    m_frame->m_avgQpAq = qp;
-
     switch (slice->m_sliceType)
     {
     case I_SLICE:
@@ -479,6 +461,23 @@
         }
     }
 
+    uint64_t bytes = 0;
+    for (uint32_t i = 0; i < m_nalList.m_numNal; i++)
+    {
+        int type = m_nalList.m_nal[i].type;
+
+        // exclude SEI
+        if (type != NAL_UNIT_PREFIX_SEI && type != NAL_UNIT_SUFFIX_SEI)
+        {
+            bytes += m_nalList.m_nal[i].sizeBytes;
+            // and exclude start code prefix
+            bytes -= (!i || type == NAL_UNIT_SPS || type == NAL_UNIT_PPS) ? 4 : 3;
+        }
+    }
+    m_accessUnitBits = bytes << 3;
+    if (m_top->m_rateControl->rateControlEnd(m_frame, m_accessUnitBits, &m_rce, &m_frameStats) < 0)
+        m_top->m_aborted = true;
+
     noiseReductionUpdate();
 
     m_elapsedCompressTime = (double)(x265_mdate() - startCompressTime) / 1000000;
@@ -581,6 +580,27 @@
     PPAScopeEvent(FrameEncoder_compressRows);
     TComSlice* slice = m_frame->getSlice();
 
+    // set slice QP
+    m_top->m_rateControl->rateControlStart(m_frame, m_top->m_lookahead, &m_rce, m_top);
+
+    int qp = slice->m_sliceQp;
+
+    int chromaQPOffset = slice->m_pps->chromaCbQpOffset;
+    int qpCb = Clip3(0, MAX_MAX_QP, qp + chromaQPOffset);
+    
+    double lambda = x265_lambda2_tab[qp];
+    /* Assuming qpCb and qpCr are the same, since SAO takes only a single chroma lambda. TODO: Check why */
+    double chromaLambda = x265_lambda2_tab[qpCb];
+
+    // NOTE: set SAO lambda every Frame
+    m_frameFilter.m_sao.lumaLambda = lambda;
+    m_frameFilter.m_sao.chromaLambda = chromaLambda;
+
+    // Clip qps back to 0-51 range before encoding
+    qp = Clip3(-QP_BD_OFFSET, MAX_QP, qp);
+    slice->m_sliceQp = qp;
+    m_frame->m_avgQpAq = qp;
+
     // reset entropy coders
     m_sbacCoder.resetEntropy(slice);
     for (int i = 0; i < this->m_numRows; i++)
@@ -908,7 +928,34 @@
         }
     }
 
+    /* when a frame is half way through, update bits and complexity in rate control
+     * for it to be available for the next frame's QScale calculation. This makes it 
+     * more accurate with updated value */
+    int rowCount = 0;
+
+    /* for the first two seconds update when the frame is half done and for rest
+     * of the sequence update when refLagRows are completed */
+    if (m_param->rc.rateControlMode == X265_RC_ABR)
+    {
+        if (m_rce.encodeOrder <= 2 * (m_param->fpsNum / m_param->fpsDenom))
+            rowCount = m_numRows/2;
+        else
+            rowCount = m_refLagRows;
+    }
+
     // this row of CTUs has been encoded
+    if (row == rowCount)
+    {
+        int64_t bits = 0;
+        for(uint32_t col = 0; col < rowCount * numCols; col++)
+        {
+            TComDataCU* cu = m_frame->getCU(col);
+            bits += cu->m_totalBits;
+        }
+
+        m_rce.rowTotalBits = bits;
+        m_top->m_rateControl->rateControlUpdateStats(&m_rce);
+    }
 
     // trigger row-wise loop filters
     if (row >= m_filterRowDelay)
diff -r 93ab6ed75b01 -r 150a9b81cf87 source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h	Thu Jul 17 20:54:19 2014 -0500
+++ b/source/encoder/frameencoder.h	Fri Jul 18 10:53:26 2014 +0530
@@ -133,6 +133,8 @@
     FrameStats               m_frameStats;          // stats of current frame for multipass encodes
     volatile bool            m_bAllRowsStop;
     volatile int             m_vbvResetTriggerRow;
+    Frame*                   m_frame;
+    uint64_t                 m_accessUnitBits;
 
 protected:
 
@@ -152,7 +154,6 @@
     NALList                  m_nalList;
     ThreadLocalData          m_tld;
 
-    Frame*                   m_frame;
 
     int                      m_filterRowDelay;
     int                      m_filterRowDelayCus;
diff -r 93ab6ed75b01 -r 150a9b81cf87 source/encoder/ratecontrol.cpp
--- a/source/encoder/ratecontrol.cpp	Thu Jul 17 20:54:19 2014 -0500
+++ b/source/encoder/ratecontrol.cpp	Fri Jul 18 10:53:26 2014 +0530
@@ -296,10 +296,13 @@
 
     // validate for param->rc, maybe it is need to add a function like x265_parameters_valiate()
     m_residualFrames = 0;
+    m_partialResidualFrames = 0;
     m_residualCost = 0;
+    m_partialResidualCost = 0;
     m_rateFactorMaxIncrement = 0;
     m_rateFactorMaxDecrement = 0;
     m_fps = m_param->fpsNum / m_param->fpsDenom;
+    m_startEndOrder.set(0);
     if (m_param->rc.rateControlMode == X265_RC_CRF)
     {
         m_param->rc.qp = (int)m_param->rc.rfConstant;
@@ -666,6 +669,7 @@
     m_totalBits = 0;
     m_framesDone = 0;
     m_residualCost = 0;
+    m_partialResidualCost = 0;
 
     /* 720p videos seem to be a good cutoff for cplxrSum */
     double tuneCplxFactor = (m_param->rc.cuTree && m_ncu > 3600) ? 2.5 : 1;
@@ -979,6 +983,19 @@
 
 void RateControl::rateControlStart(Frame* pic, Lookahead *l, RateControlEntry* rce, Encoder* enc)
 {
+    int orderValue = m_startEndOrder.get();
+    int startOrdinal = rce->encodeOrder * 2;
+
+    while (orderValue != startOrdinal && pic)
+       orderValue = m_startEndOrder.waitForChange(orderValue);
+
+    if (!pic)
+    {
+        // faked rateControlStart calls
+        m_startEndOrder.incr();
+        return;
+    }
+
     m_curSlice = pic->getSlice();
     m_sliceType = m_curSlice->m_sliceType;
     rce->sliceType = m_sliceType;
@@ -991,6 +1008,8 @@
     rce->bLastMiniGopBFrame = pic->m_lowres.bLastMiniGopBFrame;
     rce->bufferRate = m_bufferRate;
     rce->poc = m_curSlice->m_poc;
+    rce->rowCplxrSum = 0.0;
+    rce->rowTotalBits = 0;
     if (m_isVbv)
     {
         if (rce->rowPreds[0][0].count == 0)
@@ -1044,6 +1063,8 @@
         m_qp = Clip3(MIN_QP, MAX_MAX_QP, m_qp);
         rce->qpaRc = pic->m_avgQpRc = pic->m_avgQpAq = m_qp;
     }
+    // Do not increment m_startEndOrder here. Make rateControlEnd of previous thread
+    // to wait until rateControlUpdateStats of this frame is called
     m_framesDone++;
     /* set the final QP to slice structure */
     m_curSlice->m_sliceQp = m_qp;
@@ -1278,7 +1299,7 @@
                 /* use framesDone instead of POC as poc count is not serial with bframes enabled */
                 double timeDone = (double)(m_framesDone - m_param->frameNumThreads + 1) * m_frameDuration;
                 wantedBits = timeDone * m_bitrate;
-                if (wantedBits > 0 && m_totalBits > 0 && !m_residualFrames)
+                if (wantedBits > 0 && m_totalBits > 0 && !m_partialResidualFrames)
                 {
                     abrBuffer *= X265_MAX(1, sqrt(timeDone));
                     overflow = Clip3(.5, 2.0, 1.0 + (m_totalBits - wantedBits) / abrBuffer);
@@ -1300,7 +1321,7 @@
                 double lqmin = 0, lqmax = 0;
                 lqmin = m_lastQScaleFor[m_sliceType] / m_lstep;
                 lqmax = m_lastQScaleFor[m_sliceType] * m_lstep;
-                if (!m_residualFrames)
+                if (!m_partialResidualFrames)
                 {
                     if (overflow > 1.1 && m_framesDone > 3)
                         lqmax *= m_lstep;
@@ -1342,16 +1363,17 @@
     if (rce->sliceType == I_SLICE)
     {
         /* previous I still had a residual; roll it into the new loan */
-        if (m_residualFrames)
-            rce->rowTotalBits += m_residualCost * m_residualFrames;
+        if (m_partialResidualFrames)
+            rce->rowTotalBits += m_partialResidualCost * m_partialResidualFrames;
 
-        m_residualFrames = X265_MIN(s_amortizeFrames, m_param->keyframeMax);
-        m_residualCost = (int)((rce->rowTotalBits * s_amortizeFraction) / m_residualFrames);
-        rce->rowTotalBits -= m_residualCost * m_residualFrames;
+        m_partialResidualFrames = X265_MIN(s_amortizeFrames, m_param->keyframeMax);
+        m_partialResidualCost = (int)((rce->rowTotalBits * s_amortizeFraction) /m_partialResidualFrames);
+        rce->rowTotalBits -= m_partialResidualCost * m_partialResidualFrames;
     }
-    else if (m_residualFrames)
+    else if (m_partialResidualFrames)
     {
-         rce->rowTotalBits += m_residualCost;
+         rce->rowTotalBits += m_partialResidualCost;
+         m_partialResidualFrames--;
     }
 
     if (rce->sliceType != B_SLICE)
@@ -1361,6 +1383,13 @@
 
     m_cplxrSum += rce->rowCplxrSum;
     m_totalBits += rce->rowTotalBits;
+
+    /* do not allow the next frame to enter rateControlStart() until this
+     * frame has updated its mid-frame statistics */
+    m_startEndOrder.incr();
+
+    if (rce->encodeOrder < m_param->frameNumThreads - 1)
+        m_startEndOrder.incr(); // faked rateControlEnd calls for negative frames
 }
 
 void RateControl::checkAndResetABR(RateControlEntry* rce, bool isFrameDone)
@@ -1819,6 +1848,11 @@
 /* After encoding one frame, update rate control state */
 int RateControl::rateControlEnd(Frame* pic, int64_t bits, RateControlEntry* rce, FrameStats* stats)
 {
+    int orderValue = m_startEndOrder.get();
+    int endOrdinal = (rce->encodeOrder + m_param->frameNumThreads) * 2 - 1;
+    while (orderValue != endOrdinal)
+        orderValue = m_startEndOrder.waitForChange(orderValue);
+
     int64_t actualBits = bits;
     if (m_isAbr)
     {
@@ -1918,17 +1952,19 @@
                 }
             }
             if (rce->sliceType != B_SLICE)
+            {
                 /* The factor 1.5 is to tune up the actual bits, otherwise the cplxrSum is scaled too low
                  * to improve short term compensation for next frame. */
-                m_cplxrSum += bits * x265_qp2qScale(rce->qpaRc) / rce->qRceq;
+                m_cplxrSum += (bits * x265_qp2qScale(rce->qpaRc) / rce->qRceq) - (rce->rowCplxrSum);
+            }
             else
             {
                 /* Depends on the fact that B-frame's QP is an offset from the following P-frame's.
                  * Not perfectly accurate with B-refs, but good enough. */
-                m_cplxrSum += bits * x265_qp2qScale(rce->qpaRc) / (rce->qRceq * fabs(m_param->rc.pbFactor));
+                m_cplxrSum += (bits * x265_qp2qScale(rce->qpaRc) / (rce->qRceq * fabs(m_param->rc.pbFactor))) - (rce->rowCplxrSum);
             }
             m_wantedBitsWindow += m_frameDuration * m_bitrate;
-            m_totalBits += bits;
+            m_totalBits += bits - rce->rowTotalBits;
         }
     }
 
@@ -1972,6 +2008,8 @@
             rce->hrdTiming->dpbOutputTime = (double)rce->picTimingSEI->m_picDpbOutputDelay * time->numUnitsInTick / time->timeScale + rce->hrdTiming->cpbRemovalTime;
         }
     }
+    // Allow rateControlStart of next frame only when rateControlEnd of previous frame is over
+    m_startEndOrder.incr();
     rce->isActive = false;
     return 0;
 
diff -r 93ab6ed75b01 -r 150a9b81cf87 source/encoder/ratecontrol.h
--- a/source/encoder/ratecontrol.h	Thu Jul 17 20:54:19 2014 -0500
+++ b/source/encoder/ratecontrol.h	Fri Jul 18 10:53:26 2014 +0530
@@ -147,6 +147,22 @@
     int64_t  m_totalBits;        /* total bits used for already encoded frames */
     int      m_framesDone;       /* # of frames passed through RateCotrol already */
     double   m_fps;
+
+    /* a common variable on which rateControlStart, rateControlEnd and rateControUpdateStats waits to
+     * sync the calls to these functions.For example
+     * -F2:
+     * rceStart  10
+     * rceUpdate 10
+     * rceEnd    9
+     * rceStart  11
+     * rceUpdate 11
+     * rceEnd    10
+     * rceStart  12
+     * rceUpdate 12
+     * rceEnd    11 */
+
+    ThreadSafeInteger m_startEndOrder;
+
     /* hrd stuff */
     SEIBufferingPeriod m_bufPeriodSEI;
     double   m_nominalRemovalTime;
@@ -185,7 +201,9 @@
     static const char  *s_defaultStatFileName;
 
     int m_residualFrames;
+    int m_partialResidualFrames;
     int m_residualCost;
+    int m_partialResidualCost;
 
     double getQScale(RateControlEntry *rce, double rateFactor);
     double rateEstimateQscale(Frame* pic, RateControlEntry *rce); // main logic for calculating QP based on ABR


More information about the x265-devel mailing list