[x265] [PATCH] ratecontrol: improve visual quality and bitrate savings in ABR

Steve Borho steve at borho.org
Thu May 29 20:22:29 CEST 2014


On Thu, May 29, 2014 at 1:10 PM,  <aarthi at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Aarthi Thirumalai
> # Date 1401386381 -19800
> #      Thu May 29 23:29:41 2014 +0530
> # Node ID cbefd4760814f85da2f805781177a1fade437709
> # Parent  e9776dfd1471ec6691276518007b725095ab6d52
> ratecontrol: improve visual quality and bitrate savings in ABR.
>
> Try to prevent ABR over-compensation after I frames by amortizing the cost over
> the next few frames;
>
> Improve ABR quality with frame parallelism - enable frame parallelism only after first
> few P frames to prevent excessive qp fluctuations.
>
> Fix initial I frame qp. when cu tree is enabled, the qp decided arbitrarily is
> too low. This causes a huge qp spike in immediate P frames.Tuned cplxrSum
> for intial I frame so that a more appropriate qp is chosen.
>
> diff -r e9776dfd1471 -r cbefd4760814 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp        Wed May 28 20:39:40 2014 -0500
> +++ b/source/encoder/encoder.cpp        Thu May 29 23:29:41 2014 +0530
> @@ -186,6 +186,7 @@
>      }
>      m_lookahead->init();
>      m_encodeStartTime = x265_mdate();
> +    m_totalFrameThreads = param->frameNumThreads;
>  }
>
>  int Encoder::getStreamHeaders(NALUnitEBSP **nalunits)
> @@ -323,6 +324,20 @@
>      if (flush)
>          m_lookahead->flush();
>
> +    if (param->rc.rateControlMode == X265_RC_ABR)
> +    {
> +        // delay frame parallelism for non-VBV ABR
> +        if (m_pocLast == 0 && !param->rc.vbvBufferSize && !param->rc.vbvMaxBitrate)
> +            param->frameNumThreads = 1;
> +        else if (param->frameNumThreads != m_totalFrameThreads)
> +        {
> +            // re-enable frame parallelism after the first few P frames are encoded
> +            uint32_t frameCnt = (uint32_t)((0.5 * param->fpsNum / param->fpsDenom) / (param->bframes + 1));
> +            if (m_analyzeP.m_numPics > frameCnt)
> +                param->frameNumThreads = m_totalFrameThreads;
> +        }
> +    }

There a few places where param->frameNumThreads is used that should be
modified to use m_totalFrameThreads instead. In particular, the
encoder destroy() function and in the frame encoder and other places
which peek at this param to determine if frame parallelism is enabled
in order to determine if the ME search range is restricted.  This
could lead to non-determinism or memory leaks.

> +
>      FrameEncoder *curEncoder = &m_frameEncoder[m_curEncoder];
>      m_curEncoder = (m_curEncoder + 1) % param->frameNumThreads;
>      int ret = 0;
> diff -r e9776dfd1471 -r cbefd4760814 source/encoder/encoder.h
> --- a/source/encoder/encoder.h  Wed May 28 20:39:40 2014 -0500
> +++ b/source/encoder/encoder.h  Thu May 29 23:29:41 2014 +0530
> @@ -90,6 +90,7 @@
>      DPB*               m_dpb;
>      /* frame parallelism */
>      int                m_curEncoder;
> +    int                m_totalFrameThreads;
>
>      /* Collect statistics globally */
>      EncStats           m_analyzeAll;
> diff -r e9776dfd1471 -r cbefd4760814 source/encoder/ratecontrol.cpp
> --- a/source/encoder/ratecontrol.cpp    Wed May 28 20:39:40 2014 -0500
> +++ b/source/encoder/ratecontrol.cpp    Thu May 29 23:29:41 2014 +0530
> @@ -30,6 +30,10 @@
>
>  using namespace x265;
>
> +/* Amortize the partial cost of I frames over the next N frames */
> +const double RateControl::amortizeFraction = 0.85;
> +const int RateControl::amortizeFrames = 75;
> +
>  /* Compute variance to derive AC energy of each block */
>  static inline uint32_t acEnergyVar(TComPic *pic, uint64_t sum_ssd, int shift, int i)
>  {
> @@ -204,6 +208,8 @@
>          qCompress = param->rc.qCompress;
>
>      // validate for param->rc, maybe it is need to add a function like x265_parameters_valiate()
> +    residualFrames = 0;
> +    residualCost = 0;
>      param->rc.rfConstant = Clip3((double)-QP_BD_OFFSET, (double)51, param->rc.rfConstant);
>      param->rc.rfConstantMax = Clip3((double)-QP_BD_OFFSET, (double)51, param->rc.rfConstantMax);
>      rateFactorMaxIncrement = 0;
> @@ -316,7 +322,7 @@
>      {
>          /* Adjust the first frame in order to stabilize the quality level compared to the rest */
>  #define ABR_INIT_QP_MIN (24 + QP_BD_OFFSET)
> -#define ABR_INIT_QP_MAX (34 + QP_BD_OFFSET)
> +#define ABR_INIT_QP_MAX (40 + QP_BD_OFFSET)
>      }
>      else if (param->rc.rateControlMode == X265_RC_CRF)
>      {
> @@ -353,9 +359,12 @@
>  {
>      totalBits = 0;
>      framesDone = 0;
> -
> +    double tuneCplxFactor = 1;
> +    /* 720p videos seem to be a good cutoff for cplxrSum */
> +    if (param->rc.cuTree && ncu > 3600)
> +        tuneCplxFactor = 2.5;
>      /* estimated ratio that produces a reasonable QP for the first I-frame */
> -    cplxrSum = .01 * pow(7.0e5, qCompress) * pow(ncu, 0.5);
> +    cplxrSum = .01 * pow(7.0e5, qCompress) * pow(ncu, 0.5) * tuneCplxFactor;
>      wantedBitsWindow = bitrate * frameDuration;
>      accumPNorm = .01;
>      accumPQp = (param->rc.rateControlMode == X265_RC_CRF ? CRF_INIT_QP : ABR_INIT_QP_MIN) * accumPNorm;
> @@ -550,7 +559,7 @@
>                  /* use framesDone instead of POC as poc count is not serial with bframes enabled */
>                  double timeDone = (double)(framesDone - param->frameNumThreads + 1) * frameDuration;
>                  wantedBits = timeDone * bitrate;
> -                if (wantedBits > 0 && totalBits > 0)
> +                if (wantedBits > 0 && totalBits > 0 && !residualFrames)
>                  {
>                      abrBuffer *= X265_MAX(1, sqrt(timeDone));
>                      overflow = Clip3(.5, 2.0, 1.0 + (totalBits - wantedBits) / abrBuffer);
> @@ -572,10 +581,13 @@
>                  double lqmin = 0, lqmax = 0;
>                  lqmin = lastQScaleFor[sliceType] / lstep;
>                  lqmax = lastQScaleFor[sliceType] * lstep;
> -                if (overflow > 1.1 && framesDone > 3)
> -                    lqmax *= lstep;
> -                else if (overflow < 0.9)
> -                    lqmin /= lstep;
> +                if (!residualFrames)
> +                {
> +                    if (overflow > 1.1 && framesDone > 3)
> +                        lqmax *= lstep;
> +                    else if (overflow < 0.9)
> +                        lqmin /= lstep;
> +                }
>                  q = Clip3(lqmin, lqmax, q);
>              }
>          }
> @@ -1083,6 +1095,24 @@
>                  }
>              }
>
> +            /* amortize part of each I slice over the next several frames, up to
> +             * keyint-max, to avoid over-compensating for the large I slice cost */
> +            if (rce->sliceType == I_SLICE)
> +            {
> +                /* previous I still had a residual; roll it into the new loan */
> +                if (residualFrames)
> +                    bits += residualCost * residualFrames;
> +
> +                residualFrames = X265_MIN(amortizeFrames, param->keyframeMax);
> +                residualCost = (int)((bits * amortizeFraction) / residualFrames);
> +                bits -= residualCost * residualFrames;
> +            }
> +            else if (residualFrames)
> +            {
> +                bits += residualCost;
> +                residualFrames--;
> +            }
> +
>              if (rce->sliceType != B_SLICE)
>                  /* The factor 1.5 is to tune up the actual bits, otherwise the cplxrSum is scaled too low
>                   * to improve short term compensation for next frame. */
> @@ -1111,7 +1141,7 @@
>              }
>          }
>          updateVbv(bits, rce);
> -        rce->isActive = false;
>      }
> +    rce->isActive = false;
>      return 0;
>  }
> diff -r e9776dfd1471 -r cbefd4760814 source/encoder/ratecontrol.h
> --- a/source/encoder/ratecontrol.h      Wed May 28 20:39:40 2014 -0500
> +++ b/source/encoder/ratecontrol.h      Thu May 29 23:29:41 2014 +0530
> @@ -132,6 +132,12 @@
>
>  protected:
>
> +    static const double amortizeFraction;
> +    static const int amortizeFrames;
> +
> +    int residualFrames;
> +    int residualCost;
> +
>      void init();
>      double getQScale(RateControlEntry *rce, double rateFactor);
>      double rateEstimateQscale(TComPic* pic, RateControlEntry *rce); // main logic for calculating QP based on ABR
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel



-- 
Steve Borho


More information about the x265-devel mailing list