[x265] [PATCH RFC] rc: update ratecontrol stats in every frame, avoid frame parallelism lag in abr

Steve Borho steve at borho.org
Wed Jul 16 18:57:27 CEST 2014


On 07/16, santhoshini at multicorewareinc.com wrote:
> # HG changeset patch
> # User Santhoshini Sekar <santhoshini at multicorewareinc.com>
> # Date 1405506674 -19800
> #      Wed Jul 16 16:01:14 2014 +0530
> # Node ID 15c4b8f0c29dd11bcead8cfad298348dcada5d60
> # Parent  d850cbf81e0f4831d8dcf89db83561969e456205
> rc: update ratecontrol stats in every frame, avoid frame parallelism lag in abr
> 
> RateControl statistics are updated for every frame when refLagRows number of
> rows are completed in processRowEncoder. With this updated data rateControl
> predicts more accurate QP and results in better compression without deterioration
> in quality
> 
> Results:
>                        before/After   before/After  before/After  before/After      before/After
> video name                   FPS        bitrate   Global PSNR         SSIM            SSIM(dB)
> parkrun.y4m             6.34/6.34   2519.2/2379.27  29.636/29.547  0.8144/0.810795   7.314/7.231
> Johnny.y4m              5.72/5.73   2885.1/2858.86  44.667/44.639  0.971884/0.971781 15.511/15.495
> raindrops.YUV           1.68/1.71   3215.03/3021.71 32.156/32.071  0.734131/0.731214 5.753/5.706

These numbers don't tell us much without knowing what the ABR bitrate
targets were.

> diff -r d850cbf81e0f -r 15c4b8f0c29d source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp	Tue Jul 15 22:47:54 2014 -0500
> +++ b/source/encoder/analysis.cpp	Wed Jul 16 16:01:14 2014 +0530
> @@ -65,7 +65,7 @@
>  
>      m_rdCost.setPsyRdScale(m_param->psyRd);
>      m_bEnableRDOQ = top->m_bEnableRDOQ;
> -    m_bFrameParallel = top->m_totalFrameThreads > 1;
> +    m_bFrameParallel = m_param->frameNumThreads > 1;
>      m_numLayers = top->m_quadtreeTULog2MaxSize - top->m_quadtreeTULog2MinSize + 1;
>  
>      return initSearch();
> diff -r d850cbf81e0f -r 15c4b8f0c29d source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp	Tue Jul 15 22:47:54 2014 -0500
> +++ b/source/encoder/encoder.cpp	Wed Jul 16 16:01:14 2014 +0530
> @@ -150,7 +150,7 @@
>  
>      if (m_frameEncoder)
>      {
> -        for (int i = 0; i < m_totalFrameThreads; i++)
> +        for (int i = 0; i < m_param->frameNumThreads; i++)
>          {
>              // Ensure frame encoder is idle before destroying it
>              m_frameEncoder[i].getEncodedPicture(m_nalList);
> @@ -320,20 +320,6 @@
>      else
>          m_lookahead->flush();
>  
> -    if (m_param->rc.rateControlMode == X265_RC_ABR)
> -    {
> -        // delay frame parallelism for non-VBV ABR
> -        if (m_pocLast == 0 && !m_param->rc.vbvBufferSize && !m_param->rc.vbvMaxBitrate)
> -            m_param->frameNumThreads = 1;
> -        else if (m_param->frameNumThreads != m_totalFrameThreads)
> -        {
> -            // re-enable frame parallelism after the first few P frames are encoded
> -            uint32_t frameCnt = (uint32_t)((0.5 * m_param->fpsNum / m_param->fpsDenom) / (m_param->bframes + 1));
> -            if (m_analyzeP.m_numPics > frameCnt)
> -                m_param->frameNumThreads = m_totalFrameThreads;
> -        }
> -    }
> -
>      FrameEncoder *curEncoder = &m_frameEncoder[m_curEncoder];
>      m_curEncoder = (m_curEncoder + 1) % m_param->frameNumThreads;
>      int ret = 0;
> @@ -402,26 +388,11 @@
>              if (bChroma)
>                  m_numChromaWPBiFrames++;
>          }
> -
> -        uint64_t bytes = 0;
> -        for (uint32_t i = 0; i < m_nalList.m_numNal; i++)
> +        if (m_aborted == true)
>          {
> -            int type = m_nalList.m_nal[i].type;
> -
> -            // exclude SEI
> -            if (type != NAL_UNIT_PREFIX_SEI && type != NAL_UNIT_SUFFIX_SEI)
> -            {
> -                bytes += m_nalList.m_nal[i].sizeBytes;
> -                // and exclude start code prefix
> -                bytes -= (!i || type == NAL_UNIT_SPS || type == NAL_UNIT_PPS) ? 4 : 3;
> -            }
> -        }
> -        if (m_rateControl->rateControlEnd(out, bytes << 3, &curEncoder->m_rce, &curEncoder->m_frameStats) < 0)
> -        {
> -            m_aborted = true;
>              return -1;
>          }
> -        finishFrameStats(out, curEncoder, bytes << 3);
> +        finishFrameStats(out, curEncoder, curEncoder->m_accessUnitBits);
>          // Allow this frame to be recycled if no frame encoders are using it for reference
>          if (!pic_out)
>          {
> @@ -474,12 +445,17 @@
>          // determine references, setup RPS, etc
>          m_dpb->prepareEncode(fenc);
>  
> -        // set slice QP
> -        m_rateControl->rateControlStart(fenc, m_lookahead, &curEncoder->m_rce, this);
>  
>          // Allow FrameEncoder::compressFrame() to start in a worker thread
>          curEncoder->m_enable.trigger();
>      }
> +    else if (!fenc && m_encodedFrameNum > 0)
> +    {
> +        // faked rateControlStart calls to avoid rateControlEnd of last frameNumThreads parallel frames from waiting
> +        RateControlEntry rce;
> +        rce.encodeOrder = m_encodedFrameNum++;
> +        m_rateControl->rateControlStart(NULL, m_lookahead, &rce, this);
> +    }
>  
>      return ret;
>  }
> @@ -1229,7 +1205,6 @@
>      {
>          x265_log(p, X265_LOG_INFO, "Warning: picture-based SAO used with frame parallelism\n");
>      }
> -    m_totalFrameThreads = m_param->frameNumThreads;
>  
>      if (p->keyframeMax < 0)
>      {
> diff -r d850cbf81e0f -r 15c4b8f0c29d source/encoder/encoder.h
> --- a/source/encoder/encoder.h	Tue Jul 15 22:47:54 2014 -0500
> +++ b/source/encoder/encoder.h	Wed Jul 16 16:01:14 2014 +0530
> @@ -71,7 +71,6 @@
>  {
>  private:
>  
> -    bool               m_aborted;          // fatal error detected
>      int                m_pocLast;          ///< time index (POC)
>      int                m_encodedFrameNum;
>      int                m_outputCount;
> @@ -82,7 +81,6 @@
>      int64_t            m_prevReorderedPts[2];
>  
>      ThreadPool*        m_threadPool;
> -    Lookahead*         m_lookahead;
>      FrameEncoder*      m_frameEncoder;
>      DPB*               m_dpb;
>  
> @@ -90,15 +88,6 @@
>  
>      int                m_curEncoder;
>  
> -
> -    /* Collect statistics globally */
> -    EncStats           m_analyzeAll;
> -    EncStats           m_analyzeI;
> -    EncStats           m_analyzeP;
> -    EncStats           m_analyzeB;
> -    FILE*              m_csvfpt;
> -    int64_t            m_encodeStartTime;
> -
>      // quality control
>      TComScalingList    m_scalingList;      ///< quantization matrix information
>  
> @@ -141,6 +130,18 @@
>      //====== Tool list ========
>      int                m_lastBPSEI;
>  
> +    /* Collect statistics globally */
> +    EncStats           m_analyzeAll;
> +    EncStats           m_analyzeI;
> +    EncStats           m_analyzeP;
> +    EncStats           m_analyzeB;
> +    FILE*              m_csvfpt;
> +    int64_t            m_encodeStartTime;
> +
> +    Lookahead*         m_lookahead;
> +
> +    bool               m_aborted;          // fatal error detected
> +
>      uint32_t           m_log2ParallelMergeLevelMinus2; ///< Parallel merge estimation region
>  
>      int                m_useScalingListId; ///< Using quantization matrix i.e. 0=off, 1=default.
> @@ -165,8 +166,6 @@
>      Window             m_conformanceWindow;
>      Window             m_defaultDisplayWindow;
>  
> -    int                m_totalFrameThreads;
> -
>      uint32_t           m_numDelayedPic;
>  
>      Encoder();
> diff -r d850cbf81e0f -r 15c4b8f0c29d source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp	Tue Jul 15 22:47:54 2014 -0500
> +++ b/source/encoder/frameencoder.cpp	Wed Jul 16 16:01:14 2014 +0530
> @@ -445,26 +445,6 @@
>          m_nalList.serialize(NAL_UNIT_PREFIX_SEI, m_bs);
>      }
>  
> -    int qp = slice->getSliceQp();
> -
> -    int chromaQPOffset = slice->getPPS()->getChromaCbQpOffset() + slice->getSliceQpDeltaCb();
> -    int qpCb = Clip3(0, MAX_MAX_QP, qp + chromaQPOffset);
> -    
> -    double lambda = x265_lambda2_tab[qp];
> -    /* Assuming qpCb and qpCr are the same, since SAO takes only a single chroma lambda. TODO: Check why */
> -    double chromaLambda = x265_lambda2_tab[qpCb];
> -
> -    // NOTE: set SAO lambda every Frame
> -    m_frameFilter.m_sao.lumaLambda = lambda;
> -    m_frameFilter.m_sao.chromaLambda = chromaLambda;
> -
> -    // Clip qps back to 0-51 range before encoding
> -    qp = Clip3(-QP_BD_OFFSET, MAX_QP, qp);
> -    slice->setSliceQp(qp);
> -    m_frame->m_avgQpAq = qp;
> -    slice->setSliceQpDelta(0);
> -    slice->setSliceQpDeltaCb(0);
> -    slice->setSliceQpDeltaCr(0);
>  
>      switch (slice->getSliceType())
>      {
> @@ -622,6 +602,23 @@
>          }
>      }
>  
> +    uint64_t bytes = 0;
> +    for (uint32_t i = 0; i < m_nalList.m_numNal; i++)
> +    {
> +        int type = m_nalList.m_nal[i].type;
> +
> +        // exclude SEI
> +        if (type != NAL_UNIT_PREFIX_SEI && type != NAL_UNIT_SUFFIX_SEI)
> +        {
> +            bytes += m_nalList.m_nal[i].sizeBytes;
> +            // and exclude start code prefix
> +            bytes -= (!i || type == NAL_UNIT_SPS || type == NAL_UNIT_PPS) ? 4 : 3;
> +        }
> +    }
> +    m_accessUnitBits = bytes << 3;
> +    if (m_top->m_rateControl->rateControlEnd(m_frame, m_accessUnitBits, &m_rce, &m_frameStats) < 0)
> +        m_top->m_aborted = true;
> +
>      noiseReductionUpdate();
>  
>      m_elapsedCompressTime = (double)(x265_mdate() - startCompressTime) / 1000000;
> @@ -720,6 +717,27 @@
>      PPAScopeEvent(FrameEncoder_compressRows);
>      TComSlice* slice = m_frame->getSlice();
>  
> +    // set slice QP
> +    m_top->m_rateControl->rateControlStart(m_frame, m_top->m_lookahead, &m_rce, m_top);
> +    int qp = slice->getSliceQp();
> +
> +    int chromaQPOffset = slice->getPPS()->getChromaCbQpOffset() + slice->getSliceQpDeltaCb();
> +    int qpCb = Clip3(0, MAX_MAX_QP, qp + chromaQPOffset);
> +    double lambda = x265_lambda2_tab[qp];
> +    /* Assuming qpCb and qpCr are the same, since SAO takes only a single chroma lambda. TODO: Check why */
> +    double chromaLambda = x265_lambda2_tab[qpCb];
> +
> +    // NOTE: set SAO lambda every Frame
> +    m_frameFilter.m_sao.lumaLambda = lambda;
> +    m_frameFilter.m_sao.chromaLambda = chromaLambda;
> +
> +    // Clip qps back to 0-51 range before encoding
> +    qp = Clip3(-QP_BD_OFFSET, MAX_QP, qp);
> +    slice->setSliceQp(qp);
> +    m_frame->m_avgQpAq = qp;
> +    slice->setSliceQpDelta(0);
> +    slice->setSliceQpDeltaCb(0);
> +    slice->setSliceQpDeltaCr(0);
>      // reset entropy coders
>      m_sbacCoder.resetEntropy(slice);
>      for (int i = 0; i < this->m_numRows; i++)
> @@ -1047,7 +1065,31 @@
>          }
>      }
>  
> +    /* when a frame is half way through, update bits and complexity in rate control
> +     * for it to be available for the next frame's QScale calculation. This makes it 
> +     * more accurate with updated value */
> +    int rowCount;
> +
> +    /* for the first two seconds update when the frame is half done and for rest
> +     * of the sequence update when refLagRows are completed */
> +    if (m_top->m_analyzeAll.m_numPics <= 2 * (m_param->fpsNum / m_param->fpsDenom))
> +        rowCount = m_numRows/2 ;

white-space

> +    else
> +        rowCount = m_refLagRows;
> +
>      // this row of CTUs has been encoded
> +    if (row == rowCount)
> +    {
> +        int64_t bits = 0;
> +        for(uint32_t col = 0; col < rowCount * numCols; col++)
> +        {
> +            TComDataCU* cu = m_frame->getCU(col);
> +            bits += cu->m_totalBits;
> +        }
> +
> +        m_rce.rowTotalBits = bits;
> +        m_top->m_rateControl->rateControlUpdateStats(&m_rce);
> +    }

Given the usual encode order of PBBBP, the 3 B frames are typically able
to begin coding at the same time when the first P's first two rows are
done.  This patch will make each B wait for the previous B to reach two
rows before it starts; which is fine for ABR but would be wasteful for
CRF and CQP.  We should make an exception for those two modes to we
don't introduce any more lag than necessary.

>  
>      // trigger row-wise loop filters
>      if (row >= m_filterRowDelay)
> diff -r d850cbf81e0f -r 15c4b8f0c29d source/encoder/frameencoder.h
> --- a/source/encoder/frameencoder.h	Tue Jul 15 22:47:54 2014 -0500
> +++ b/source/encoder/frameencoder.h	Wed Jul 16 16:01:14 2014 +0530
> @@ -137,6 +137,8 @@
>      FrameStats               m_frameStats;          // stats of current frame for multipass encodes
>      volatile bool            m_bAllRowsStop;
>      volatile int             m_vbvResetTriggerRow;
> +    Frame*                   m_frame;
> +    uint64_t                 m_accessUnitBits;
>  
>  protected:
>  
> @@ -155,7 +157,6 @@
>      NALList                  m_nalList;
>      ThreadLocalData          m_tld;
>  
> -    Frame*                   m_frame;
>  
>      int                      m_filterRowDelay;
>      int                      m_filterRowDelayCus;
> diff -r d850cbf81e0f -r 15c4b8f0c29d source/encoder/ratecontrol.cpp
> --- a/source/encoder/ratecontrol.cpp	Tue Jul 15 22:47:54 2014 -0500
> +++ b/source/encoder/ratecontrol.cpp	Wed Jul 16 16:01:14 2014 +0530
> @@ -300,6 +300,7 @@
>      m_rateFactorMaxIncrement = 0;
>      m_rateFactorMaxDecrement = 0;
>      m_fps = m_param->fpsNum / m_param->fpsDenom;
> +    m_startEndOrder.set(0);
>      if (m_param->rc.rateControlMode == X265_RC_CRF)
>      {
>          m_param->rc.qp = (int)m_param->rc.rfConstant;
> @@ -979,6 +980,19 @@
>  
>  void RateControl::rateControlStart(Frame* pic, Lookahead *l, RateControlEntry* rce, Encoder* enc)
>  {
> +    int orderValue = m_startEndOrder.get();
> +    int startOrdinal = rce->encodeOrder * 2;
> +
> +    while (orderValue != startOrdinal && pic)
> +       orderValue = m_startEndOrder.waitForChange(orderValue);
> +
> +    if (!pic)
> +    {
> +        // faked rateControlStart calls
> +        m_startEndOrder.incr();
> +        return;
> +    }
> +
>      m_curSlice = pic->getSlice();
>      m_sliceType = m_curSlice->getSliceType();
>      rce->sliceType = m_sliceType;
> @@ -991,6 +1005,8 @@
>      rce->bLastMiniGopBFrame = pic->m_lowres.bLastMiniGopBFrame;
>      rce->bufferRate = m_bufferRate;
>      rce->poc = m_curSlice->getPOC();
> +    rce->rowCplxrSum = 0.0;
> +    rce->rowTotalBits = 0;
>      if (m_isVbv)
>      {
>          if (rce->rowPreds[0][0].count == 0)
> @@ -1044,6 +1060,8 @@
>          m_qp = Clip3(MIN_QP, MAX_MAX_QP, m_qp);
>          rce->qpaRc = pic->m_avgQpRc = pic->m_avgQpAq = m_qp;
>      }
> +    // Do not increment m_startEndOrder here. Make rateControlEnd of previous thread
> +    // to wait until rateControlUpdateStats of this frame is called
>      m_framesDone++;
>      /* set the final QP to slice structure */
>      m_curSlice->setSliceQp(m_qp);
> @@ -1361,6 +1379,12 @@
>  
>      m_cplxrSum += rce->rowCplxrSum;
>      m_totalBits += rce->rowTotalBits;
> +
> +    /* delay incrementing m_startEndOrder until here to sync with rateControlStart() */
> +    m_startEndOrder.incr();
> +
> +    if (rce->encodeOrder < m_param->frameNumThreads - 1)
> +        m_startEndOrder.incr(); // faked rateControlEnd calls for negative frames
>  }
>  
>  void RateControl::checkAndResetABR(RateControlEntry* rce, bool isFrameDone)
> @@ -1820,6 +1844,11 @@
>  /* After encoding one frame, update rate control state */
>  int RateControl::rateControlEnd(Frame* pic, int64_t bits, RateControlEntry* rce, FrameStats* stats)
>  {
> +    int orderValue = m_startEndOrder.get();
> +    int endOrdinal = (rce->encodeOrder + m_param->frameNumThreads) * 2 - 1;
> +    while (orderValue != endOrdinal)
> +            orderValue = m_startEndOrder.waitForChange(orderValue);

white-space

> +
>      int64_t actualBits = bits;
>      if (m_isAbr)
>      {
> @@ -1919,17 +1948,19 @@
>                  }
>              }
>              if (rce->sliceType != B_SLICE)
> +            {
>                  /* The factor 1.5 is to tune up the actual bits, otherwise the cplxrSum is scaled too low
>                   * to improve short term compensation for next frame. */
> -                m_cplxrSum += bits * x265_qp2qScale(rce->qpaRc) / rce->qRceq;
> +                 m_cplxrSum += (bits * x265_qp2qScale(rce->qpaRc) / rce->qRceq) - (rce->rowCplxrSum);

white-space

> +            }
>              else
>              {
>                  /* Depends on the fact that B-frame's QP is an offset from the following P-frame's.
>                   * Not perfectly accurate with B-refs, but good enough. */
> -                m_cplxrSum += bits * x265_qp2qScale(rce->qpaRc) / (rce->qRceq * fabs(m_param->rc.pbFactor));
> +                m_cplxrSum += (bits * x265_qp2qScale(rce->qpaRc) / (rce->qRceq * fabs(m_param->rc.pbFactor))) - (rce->rowCplxrSum);
>              }
>              m_wantedBitsWindow += m_frameDuration * m_bitrate;
> -            m_totalBits += bits;
> +            m_totalBits += bits - rce->rowTotalBits;
>          }
>      }
>  
> @@ -1973,6 +2004,8 @@
>              rce->hrdTiming->dpbOutputTime = (double)rce->picTimingSEI->m_picDpbOutputDelay * time->numUnitsInTick / time->timeScale + rce->hrdTiming->cpbRemovalTime;
>          }
>      }
> +    // Allow rateControlStart of next frame only when rateControlEnd of previous frame is over
> +    m_startEndOrder.incr();
>      rce->isActive = false;
>      return 0;
>  
> diff -r d850cbf81e0f -r 15c4b8f0c29d source/encoder/ratecontrol.h
> --- a/source/encoder/ratecontrol.h	Tue Jul 15 22:47:54 2014 -0500
> +++ b/source/encoder/ratecontrol.h	Wed Jul 16 16:01:14 2014 +0530
> @@ -147,6 +147,9 @@
>      int64_t  m_totalBits;        /* total bits used for already encoded frames */
>      int      m_framesDone;       /* # of frames passed through RateCotrol already */
>      double   m_fps;
> +
> +    ThreadSafeInteger m_startEndOrder;

This variable deserves a comment describing how it is used to serialize
the calls to start/updateStats/end.  Explain it to a developer who is
looking at ratecontrol.h for the first time.

> +
>      /* hrd stuff */
>      SEIBufferingPeriod m_bufPeriodSEI;
>      double   m_nominalRemovalTime;
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho


More information about the x265-devel mailing list