[x265] [PATCH 3 of 3 RFC] rc: update ratecontrol stats in every frame, avoid frame parallelism lag in abr

santhoshini at multicorewareinc.com santhoshini at multicorewareinc.com
Fri Jul 11 13:24:46 CEST 2014


# HG changeset patch
# User Santhoshini Sekar <santhoshini at multicorewareinc.com>
# Date 1405077594 -19800
#      Fri Jul 11 16:49:54 2014 +0530
# Node ID 070c3f30547aca9af4f8a708b6ae4a108510aad5
# Parent  7acd78cdabfee453ba3b44b034eb2c87e587c7e6
rc: update ratecontrol stats in every frame, avoid frame parallelism lag in abr

diff -r 7acd78cdabfe -r 070c3f30547a source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Fri Jul 11 15:15:32 2014 +0530
+++ b/source/encoder/encoder.cpp	Fri Jul 11 16:49:54 2014 +0530
@@ -311,20 +311,6 @@
     else
         m_lookahead->flush();
 
-    if (m_param->rc.rateControlMode == X265_RC_ABR)
-    {
-        // delay frame parallelism for non-VBV ABR
-        if (m_pocLast == 0 && !m_param->rc.vbvBufferSize && !m_param->rc.vbvMaxBitrate)
-            m_param->frameNumThreads = 1;
-        else if (m_param->frameNumThreads != m_totalFrameThreads)
-        {
-            // re-enable frame parallelism after the first few P frames are encoded
-            uint32_t frameCnt = (uint32_t)((0.5 * m_param->fpsNum / m_param->fpsDenom) / (m_param->bframes + 1));
-            if (m_analyzeP.m_numPics > frameCnt)
-                m_param->frameNumThreads = m_totalFrameThreads;
-        }
-    }
-
     FrameEncoder *curEncoder = &m_frameEncoder[m_curEncoder];
     m_curEncoder = (m_curEncoder + 1) % m_param->frameNumThreads;
     int ret = 0;
@@ -393,26 +379,11 @@
             if (bChroma)
                 m_numChromaWPBiFrames++;
         }
-
-        uint64_t bytes = 0;
-        for (uint32_t i = 0; i < m_nalList.m_numNal; i++)
+        if (m_aborted == true)
         {
-            int type = m_nalList.m_nal[i].type;
-
-            // exclude SEI
-            if (type != NAL_UNIT_PREFIX_SEI && type != NAL_UNIT_SUFFIX_SEI)
-            {
-                bytes += m_nalList.m_nal[i].sizeBytes;
-                // and exclude start code prefix
-                bytes -= (!i || type == NAL_UNIT_SPS || type == NAL_UNIT_PPS) ? 4 : 3;
-            }
-        }
-        if (m_rateControl->rateControlEnd(out, bytes << 3, &curEncoder->m_rce, &curEncoder->m_frameStats) < 0)
-        {
-            m_aborted = true;
             return -1;
         }
-        finishFrameStats(out, curEncoder, bytes << 3);
+        finishFrameStats(out, curEncoder, curEncoder->m_accessUnitBits);
         // Allow this frame to be recycled if no frame encoders are using it for reference
         if (!pic_out)
         {
@@ -465,13 +436,16 @@
         // determine references, setup RPS, etc
         m_dpb->prepareEncode(fenc);
 
-        // set slice QP
-        m_rateControl->rateControlStart(fenc, m_lookahead, &curEncoder->m_rce, this);
 
         // Allow FrameEncoder::compressFrame() to start in a worker thread
         curEncoder->m_enable.trigger();
     }
-
+    else if (!fenc && m_encodedFrameNum > 0)
+    {
+        RateControlEntry rce;
+        rce.encodeOrder = m_encodedFrameNum++;
+        m_rateControl->rateControlStart(NULL, m_lookahead, &rce, this);
+    }
     return ret;
 }
 
diff -r 7acd78cdabfe -r 070c3f30547a source/encoder/encoder.h
--- a/source/encoder/encoder.h	Fri Jul 11 15:15:32 2014 +0530
+++ b/source/encoder/encoder.h	Fri Jul 11 16:49:54 2014 +0530
@@ -71,7 +71,6 @@
 {
 private:
 
-    bool               m_aborted;          // fatal error detected
     int                m_pocLast;          ///< time index (POC)
     int                m_encodedFrameNum;
     int                m_outputCount;
@@ -82,7 +81,6 @@
     int64_t            m_prevReorderedPts[2];
 
     ThreadPool*        m_threadPool;
-    Lookahead*         m_lookahead;
     FrameEncoder*      m_frameEncoder;
     DPB*               m_dpb;
 
@@ -91,14 +89,6 @@
     int                m_curEncoder;
 
 
-    /* Collect statistics globally */
-    EncStats           m_analyzeAll;
-    EncStats           m_analyzeI;
-    EncStats           m_analyzeP;
-    EncStats           m_analyzeB;
-    FILE*              m_csvfpt;
-    int64_t            m_encodeStartTime;
-
     // quality control
     TComScalingList    m_scalingList;      ///< quantization matrix information
 
@@ -119,6 +109,17 @@
     Level::Tier        m_levelTier;
     Level::Name        m_level;
 
+    /* Collect statistics globally */
+    EncStats           m_analyzeAll;
+    EncStats           m_analyzeI;
+    EncStats           m_analyzeP;
+    EncStats           m_analyzeB;
+    FILE*              m_csvfpt;
+    int64_t            m_encodeStartTime;
+
+    Lookahead*         m_lookahead;
+
+    bool               m_aborted;          // fatal error detected
     bool               m_nonPackedConstraintFlag;
     bool               m_frameOnlyConstraintFlag;
 
diff -r 7acd78cdabfe -r 070c3f30547a source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Fri Jul 11 15:15:32 2014 +0530
+++ b/source/encoder/frameencoder.cpp	Fri Jul 11 16:49:54 2014 +0530
@@ -429,26 +429,6 @@
         m_nalList.serialize(NAL_UNIT_PREFIX_SEI, m_bs);
     }
 
-    int qp = slice->getSliceQp();
-
-    int chromaQPOffset = slice->getPPS()->getChromaCbQpOffset() + slice->getSliceQpDeltaCb();
-    int qpCb = Clip3(0, MAX_MAX_QP, qp + chromaQPOffset);
-    
-    double lambda = x265_lambda2_tab[qp];
-    /* Assuming qpCb and qpCr are the same, since SAO takes only a single chroma lambda. TODO: Check why */
-    double chromaLambda = x265_lambda2_tab[qpCb];
-
-    // NOTE: set SAO lambda every Frame
-    m_frameFilter.m_sao.lumaLambda = lambda;
-    m_frameFilter.m_sao.chromaLambda = chromaLambda;
-
-    // Clip qps back to 0-51 range before encoding
-    qp = Clip3(-QP_BD_OFFSET, MAX_QP, qp);
-    slice->setSliceQp(qp);
-    m_frame->m_avgQpAq = qp;
-    slice->setSliceQpDelta(0);
-    slice->setSliceQpDeltaCb(0);
-    slice->setSliceQpDeltaCr(0);
 
     switch (slice->getSliceType())
     {
@@ -601,6 +581,24 @@
         }
     }
 
+    uint64_t bytes = 0;
+    for (uint32_t i = 0; i < m_nalList.m_numNal; i++)
+    {
+        int type = m_nalList.m_nal[i].type;
+
+        // exclude SEI
+        if (type != NAL_UNIT_PREFIX_SEI && type != NAL_UNIT_SUFFIX_SEI)
+        {
+            bytes += m_nalList.m_nal[i].sizeBytes;
+            // and exclude start code prefix
+            bytes -= (!i || type == NAL_UNIT_SPS || type == NAL_UNIT_PPS) ? 4 : 3;
+        }
+    }
+    m_accessUnitBits = bytes << 3;
+    if (m_top->m_rateControl->rateControlEnd(m_frame, m_accessUnitBits, &m_rce, &m_frameStats) < 0)
+    {
+        m_top->m_aborted = true;
+    }
     noiseReductionUpdate();
 
     m_elapsedCompressTime = (double)(x265_mdate() - startCompressTime) / 1000000;
@@ -699,7 +697,28 @@
     PPAScopeEvent(FrameEncoder_compressRows);
     TComSlice* slice = m_frame->getSlice();
 
-    // reset entropy coders
+    //set slice QP
+    m_top->m_rateControl->rateControlStart(m_frame, m_top->m_lookahead, &m_rce, m_top);
+    int qp = slice->getSliceQp();
+
+    int chromaQPOffset = slice->getPPS()->getChromaCbQpOffset() + slice->getSliceQpDeltaCb();
+    int qpCb = Clip3(0, MAX_MAX_QP, qp + chromaQPOffset);
+    double lambda = x265_lambda2_tab[qp];
+    /* Assuming qpCb and qpCr are the same, since SAO takes only a single chroma lambda. TODO: Check why */
+    double chromaLambda = x265_lambda2_tab[qpCb];
+
+    // NOTE: set SAO lambda every Frame
+    m_frameFilter.m_sao.lumaLambda = lambda;
+    m_frameFilter.m_sao.chromaLambda = chromaLambda;
+
+    // Clip qps back to 0-51 range before encoding
+    qp = Clip3(-QP_BD_OFFSET, MAX_QP, qp);
+    slice->setSliceQp(qp);
+    m_frame->m_avgQpAq = qp;
+    slice->setSliceQpDelta(0);
+    slice->setSliceQpDeltaCb(0);
+    slice->setSliceQpDeltaCr(0);
+     //reset entropy coders
     m_sbacCoder.resetEntropy(slice);
     for (int i = 0; i < this->m_numRows; i++)
     {
@@ -1026,7 +1045,26 @@
         }
     }
 
+    int rowCount;
+
+    if (m_top->m_analyzeAll.m_numPics <= 2 * (m_param->fpsNum / m_param->fpsDenom))
+        rowCount = m_numRows/2 ;
+    else
+        rowCount = m_refLagRows;
+
     // this row of CTUs has been encoded
+    if (row == rowCount)
+    {
+        int64_t bits = 0;
+        for(uint32_t col = 0; col < rowCount * numCols; col++)
+        {
+            TComDataCU* cu = m_frame->getCU(col);
+            bits += cu->m_totalBits;
+        }
+
+        m_rce.rowTotalBits = bits;
+        m_top->m_rateControl->rateControlUpdateStats(&m_rce);
+    }
 
     // trigger row-wise loop filters
     if (row >= m_filterRowDelay)
diff -r 7acd78cdabfe -r 070c3f30547a source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h	Fri Jul 11 15:15:32 2014 +0530
+++ b/source/encoder/frameencoder.h	Fri Jul 11 16:49:54 2014 +0530
@@ -137,6 +137,8 @@
     FrameStats               m_frameStats;          // stats of current frame for multipass encodes
     volatile bool            m_bAllRowsStop;
     volatile int             m_vbvResetTriggerRow;
+    Frame*                   m_frame;
+    uint64_t                 m_accessUnitBits;
 
 protected:
 
@@ -155,7 +157,6 @@
     NALList                  m_nalList;
     ThreadLocalData          m_tld;
 
-    Frame*                   m_frame;
 
     int                      m_filterRowDelay;
     int                      m_filterRowDelayCus;
diff -r 7acd78cdabfe -r 070c3f30547a source/encoder/ratecontrol.cpp
--- a/source/encoder/ratecontrol.cpp	Fri Jul 11 15:15:32 2014 +0530
+++ b/source/encoder/ratecontrol.cpp	Fri Jul 11 16:49:54 2014 +0530
@@ -263,7 +263,6 @@
     int lowresCuWidth = ((m_param->sourceWidth / 2) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
     int lowresCuHeight = ((m_param->sourceHeight / 2)  + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
     m_ncu = lowresCuWidth * lowresCuHeight;
-
     if (m_param->rc.cuTree)
         m_qCompress = 1;
     else
@@ -541,74 +540,88 @@
 
 void RateControl::rateControlStart(Frame* pic, Lookahead *l, RateControlEntry* rce, Encoder* enc)
 {
-    m_curSlice = pic->getSlice();
-    m_sliceType = m_curSlice->getSliceType();
-    rce->sliceType = m_sliceType;
-    rce->isActive = true;
-    if (m_sliceType == B_SLICE)
-        rce->bframes = m_bframes;
-    else
-        m_bframes = pic->m_lowres.leadingBframes;
+    int orderValue = m_startEndOrder.get();
+    int startOrdinal = rce->encodeOrder * 2;
 
-    rce->bLastMiniGopBFrame = pic->m_lowres.bLastMiniGopBFrame;
-    rce->bufferRate = m_bufferRate;
-    rce->poc = m_curSlice->getPOC();
-    if (m_isVbv)
-    {
-        if (rce->rowPreds[0][0].count == 0)
+    while (orderValue != startOrdinal && pic)
+       orderValue = m_startEndOrder.waitForChange(orderValue);
+
+    ScopedLock scope(m_lock);
+    if (pic)
         {
-            for (int i = 0; i < 3; i++)
+        m_curSlice = pic->getSlice();
+        m_sliceType = m_curSlice->getSliceType();
+        rce->sliceType = m_sliceType;
+
+        rce->isActive = true;
+        if (m_sliceType == B_SLICE)
+            rce->bframes = m_bframes;
+        else
+            m_bframes = pic->m_lowres.leadingBframes;
+        rce->bLastMiniGopBFrame = pic->m_lowres.bLastMiniGopBFrame;
+        rce->bufferRate = m_bufferRate;
+        rce->poc = m_curSlice->getPOC();
+        rce->rowCplxrSum = 0.0;
+        rce->rowTotalBits = 0;
+        if (m_isVbv)
+        {
+            if (rce->rowPreds[0][0].count == 0)
             {
-                for (int j = 0; j < 2; j++)
+                for (int i = 0; i < 3; i++)
                 {
-                    rce->rowPreds[i][j].coeff = 0.25;
-                    rce->rowPreds[i][j].count = 1.0;
-                    rce->rowPreds[i][j].decay = 0.5;
-                    rce->rowPreds[i][j].offset = 0.0;
+                    for (int j = 0; j < 2; j++)
+                    {
+                        rce->rowPreds[i][j].coeff = 0.25;
+                        rce->rowPreds[i][j].count = 1.0;
+                        rce->rowPreds[i][j].decay = 0.5;
+                        rce->rowPreds[i][j].offset = 0.0;
+                    }
                 }
             }
+            rce->rowPred[0] = &rce->rowPreds[m_sliceType][0];
+            rce->rowPred[1] = &rce->rowPreds[m_sliceType][1];
+            updateVbvPlan(enc);
+            rce->bufferFill = m_bufferFill;
         }
-        rce->rowPred[0] = &rce->rowPreds[m_sliceType][0];
-        rce->rowPred[1] = &rce->rowPreds[m_sliceType][1];
-        updateVbvPlan(enc);
-        rce->bufferFill = m_bufferFill;
-    }
-    if (m_isAbr) //ABR,CRF
-    {
-        m_currentSatd = l->getEstimatedPictureCost(pic) >> (X265_DEPTH - 8);
-        /* Update rce for use in rate control VBV later */
-        rce->lastSatd = m_currentSatd;
-        double q = x265_qScale2qp(rateEstimateQscale(pic, rce));
-        q = Clip3((double)MIN_QP, (double)MAX_MAX_QP, q);
-        m_qp = int(q + 0.5);
-        rce->qpaRc = pic->m_avgQpRc = pic->m_avgQpAq = q;
-        /* copy value of lastRceq into thread local rce struct *to be used in RateControlEnd() */
-        rce->qRceq = m_lastRceq;
-        accumPQpUpdate();
-    }
-    else //CQP
-    {
-        if (m_sliceType == B_SLICE && m_curSlice->isReferenced())
-            m_qp = (m_qpConstant[B_SLICE] + m_qpConstant[P_SLICE]) / 2;
-        else
-            m_qp = m_qpConstant[m_sliceType];
-        pic->m_avgQpAq = pic->m_avgQpRc = m_qp;
-    }
-    if (m_sliceType != B_SLICE)
-    {
-        m_lastNonBPictType = m_sliceType;
-        m_leadingNoBSatd = m_currentSatd;
-    }
-    rce->leadingNoBSatd = m_leadingNoBSatd;
-    if (pic->m_forceqp)
-    {
-        m_qp = int32_t(pic->m_forceqp + 0.5) - 1;
-        m_qp = Clip3(MIN_QP, MAX_MAX_QP, m_qp);
-        rce->qpaRc = pic->m_avgQpRc = pic->m_avgQpAq = m_qp;
-    }
-    m_framesDone++;
-    /* set the final QP to slice structure */
-    m_curSlice->setSliceQp(m_qp);
+        if (m_isAbr) //ABR,CRF
+        {
+            m_currentSatd = l->getEstimatedPictureCost(pic) >> (X265_DEPTH - 8);
+            /* Update rce for use in rate control VBV later */
+            rce->lastSatd = m_currentSatd;
+            double q = x265_qScale2qp(rateEstimateQscale(pic, rce));
+            q = Clip3((double)MIN_QP, (double)MAX_MAX_QP, q);
+            m_qp = int(q + 0.5);
+            rce->qpaRc = pic->m_avgQpRc = pic->m_avgQpAq = q;
+            /* copy value of lastRceq into thread local rce struct *to be used in RateControlEnd() */
+            rce->qRceq = m_lastRceq;
+            accumPQpUpdate();
+        }
+        else //CQP
+        {
+            if (m_sliceType == B_SLICE && m_curSlice->isReferenced())
+                m_qp = (m_qpConstant[B_SLICE] + m_qpConstant[P_SLICE]) / 2;
+            else
+                m_qp = m_qpConstant[m_sliceType];
+            pic->m_avgQpAq = pic->m_avgQpRc = m_qp;
+        }
+        if (m_sliceType != B_SLICE)
+        {
+            m_lastNonBPictType = m_sliceType;
+            m_leadingNoBSatd = m_currentSatd;
+        }
+        rce->leadingNoBSatd = m_leadingNoBSatd;
+        if (pic->m_forceqp)
+        {
+            m_qp = int32_t(pic->m_forceqp + 0.5) - 1;
+            m_qp = Clip3(MIN_QP, MAX_MAX_QP, m_qp);
+            rce->qpaRc = pic->m_avgQpRc = pic->m_avgQpAq = m_qp;
+        }
+        m_framesDone++;
+        /* set the final QP to slice structure */
+        m_curSlice->setSliceQp(m_qp);
+        }
+    else
+        m_startEndOrder.incr();
 }
 
 void RateControl::accumPQpUpdate()
@@ -1268,6 +1281,12 @@
 /* After encoding one frame, update rate control state */
 int RateControl::rateControlEnd(Frame* pic, int64_t bits, RateControlEntry* rce, FrameStats* stats)
 {
+    int orderValue = m_startEndOrder.get();
+    int endOrdinal = (rce->encodeOrder + m_param->frameNumThreads) * 2 - 1;
+    while (orderValue != endOrdinal)
+            orderValue = m_startEndOrder.waitForChange(orderValue);
+    ScopedLock scope(m_lock);
+
     int64_t actualBits = bits;
     if (m_isAbr)
     {
@@ -1366,17 +1385,19 @@
             }
 
             if (rce->sliceType != B_SLICE)
+            {
                 /* The factor 1.5 is to tune up the actual bits, otherwise the cplxrSum is scaled too low
                  * to improve short term compensation for next frame. */
-                m_cplxrSum += bits * x265_qp2qScale(rce->qpaRc) / rce->qRceq;
+                 m_cplxrSum += (bits * x265_qp2qScale(rce->qpaRc) / rce->qRceq) - (rce->rowCplxrSum);
+            }
             else
             {
                 /* Depends on the fact that B-frame's QP is an offset from the following P-frame's.
                  * Not perfectly accurate with B-refs, but good enough. */
-                m_cplxrSum += bits * x265_qp2qScale(rce->qpaRc) / (rce->qRceq * fabs(m_param->rc.pbFactor));
+                m_cplxrSum += (bits * x265_qp2qScale(rce->qpaRc) / (rce->qRceq * fabs(m_param->rc.pbFactor))) - (rce->rowCplxrSum);
             }
             m_wantedBitsWindow += m_frameDuration * m_bitrate;
-            m_totalBits += bits;
+            m_totalBits += bits - rce->rowTotalBits;
         }
     }
 
@@ -1425,6 +1446,7 @@
             rce->hrdTiming->dpbOutputTime = (double)rce->picTimingSEI->m_picDpbOutputDelay * time->getNumUnitsInTick() / time->getTimeScale() + rce->hrdTiming->cpbRemovalTime;
         }
     }
+    m_startEndOrder.incr();
     rce->isActive = false;
     return 0;
 


More information about the x265-devel mailing list