[x265] [PATCH] rc: add 2 pass logic in rateEstimateQscale

Steve Borho steve at borho.org
Wed Jul 16 21:45:16 CEST 2014


On 07/17, Aarthi Priya Thirumalai wrote:
> # HG changeset patch
> # User Aarthi Thirumalai<aarthi at multicorewareinc.com>
> # Date 1405013200 -19800
> #      Thu Jul 10 22:56:40 2014 +0530
> # Node ID 96e43814fcc6e9b661c16ec8230768b348ec6ce1
> # Parent  2737d0b05b72ca90f09987e6cf30b7c37e38b83c
> rc: add 2 pass logic in rateEstimateQscale
> 
> adjust qscale of each frame based on distance to end of the video and
> the difference between achieved and expected bits so far in the final pass.
> 
> diff -r 2737d0b05b72 -r 96e43814fcc6 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp Wed Jul 16 01:20:29 2014 -0500
> +++ b/source/encoder/encoder.cpp Thu Jul 10 22:56:40 2014 +0530
> @@ -41,7 +41,7 @@
> 
>  #include "x265.h"
> 
> -static const char *summaryCSVHeader =
> +const char *summaryCSVHeader =
>      "Command, Date/Time, Elapsed Time, FPS, Bitrate, "
>      "Y PSNR, U PSNR, V PSNR, Global PSNR, SSIM, SSIM (dB), "
>      "I count, I ave-QP, I kpbs, I-PSNR Y, I-PSNR U, I-PSNR V, I-SSIM (dB),
> "
> @@ -49,6 +49,8 @@
>      "B count, B ave-QP, B kpbs, B-PSNR Y, B-PSNR U, B-PSNR V, B-SSIM (dB),
> "
>      "Version\n";
> 
> +const char sliceTypeToChar[] = { 'B', 'P', 'I' };

these should have g_ prefixes if they're not going to be file-static,
and they need to be within the x265 namespace

>  using namespace x265;
> 
>  Encoder::Encoder()
> @@ -227,6 +229,8 @@
>              rc->m_bufferFill = X265_MAX(rc->m_bufferFill, 0);
>              rc->m_bufferFill += encoder->m_rce.bufferRate;
>              rc->m_bufferFill = X265_MIN(rc->m_bufferFill,
> rc->m_bufferSize);
> +            if (rc->m_2pass)
> +                rc->m_predictedBits +=
> (int64_t)encoder->m_rce.frameSizeEstimated;
>          }
>          encIdx = (encIdx + 1) % m_param->frameNumThreads;
>      }
> diff -r 2737d0b05b72 -r 96e43814fcc6 source/encoder/encoder.h
> --- a/source/encoder/encoder.h Wed Jul 16 01:20:29 2014 -0500
> +++ b/source/encoder/encoder.h Thu Jul 10 22:56:40 2014 +0530
> @@ -30,6 +30,9 @@
> 
>  struct x265_encoder {};
> 
> +extern const char *summaryCSVHeader;
> +extern const char sliceTypeToChar[3];
> +
>  namespace x265 {
>  // private namespace
> 
> diff -r 2737d0b05b72 -r 96e43814fcc6 source/encoder/ratecontrol.cpp
> --- a/source/encoder/ratecontrol.cpp Wed Jul 16 01:20:29 2014 -0500
> +++ b/source/encoder/ratecontrol.cpp Thu Jul 10 22:56:40 2014 +0530
> @@ -122,6 +122,23 @@
>             + rce->miscBits;
>  }
> 
> +inline void copyRceData(RateControlEntry* rce, RateControlEntry* rce2Pass)
> +{
> +    rce->coeffBits = rce2Pass->coeffBits;
> +    rce->mvBits = rce2Pass->mvBits;
> +    rce->miscBits = rce2Pass->miscBits;
> +    rce->iCuCount = rce2Pass->iCuCount;
> +    rce->pCuCount = rce2Pass->pCuCount;
> +    rce->skipCuCount = rce2Pass->skipCuCount;
> +    rce->keptAsRef = rce2Pass->keptAsRef;
> +    rce->qScale = rce2Pass->qScale;
> +    rce->newQScale = rce2Pass->newQScale;
> +    rce->expectedBits = rce2Pass->expectedBits;
> +    rce->expectedVbv = rce2Pass->expectedVbv;
> +    rce->blurredComplexity = rce2Pass->blurredComplexity;
> +    rce->sliceType = rce2Pass->sliceType;
> +}
> +
>  }  // end anonymous namespace
>  /* Compute variance to derive AC energy of each block */
>  static inline uint32_t acEnergyVar(Frame *pic, uint64_t sum_ssd, int
> shift, int i)
> @@ -982,6 +999,12 @@
>      m_curSlice = pic->getSlice();
>      m_sliceType = m_curSlice->getSliceType();
>      rce->sliceType = m_sliceType;
> +    if (m_param->rc.bStatRead)
> +    {
> +        X265_CHECK(rce->encodeOrder >= 0 && rce->encodeOrder <
> m_numEntries,
> +                    "Frame encode order is more than total no. of frames
> in the first pass");
> +        copyRceData(rce, &m_rce2Pass[rce->encodeOrder]);

X265_CHECK() shouldn't be used for potential user errors, it is intended
for internal programming errors (violating function parameter
expectations, etc).  If it's possible for a user to run two encodes with
different frame counts and cause a memory access error, we must prevent
the crash even in non-checked builds.

> +    }
>      rce->isActive = true;
>      if (m_sliceType == B_SLICE)
>          rce->bframes = m_bframes;
> @@ -1008,6 +1031,7 @@
>          }
>          rce->rowPred[0] = &rce->rowPreds[m_sliceType][0];
>          rce->rowPred[1] = &rce->rowPreds[m_sliceType][1];
> +        m_predictedBits = m_totalBits;
>          updateVbvPlan(enc);
>          rce->bufferFill = m_bufferFill;
>      }
> @@ -1045,6 +1069,7 @@
>          rce->qpaRc = pic->m_avgQpRc = pic->m_avgQpAq = m_qp;
>      }
>      m_framesDone++;
> +    rce->newQp = m_qp;
>      /* set the final QP to slice structure */
>      m_curSlice->setSliceQp(m_qp);
>  }
> @@ -1186,6 +1211,14 @@
>  {
>      double q;
> 
> +    if (m_2pass)
> +    {
> +        if (m_sliceType != rce->sliceType)
> +        {
> +            x265_log(m_param, X265_LOG_ERROR, "slice=%c but 2pass stats
> say %c\n",
> +                     sliceTypeToChar[m_sliceType],
> sliceTypeToChar[rce->sliceType]);
> +        }
> +    }
>      if (m_sliceType == B_SLICE)
>      {
>          /* B-frames don't have independent rate control, but rather get the
> @@ -1230,109 +1263,172 @@
>          double qScale = x265_qp2qScale(q);
>          rce->frameSizePlanned = predictSize(&m_predBfromP, qScale,
> (double)m_leadingNoBSatd);
>          rce->frameSizeEstimated = rce->frameSizePlanned;
> +        rce->newQScale = qScale;
>          return qScale;
>      }
>      else
>      {
>          double abrBuffer = 2 * m_param->rc.rateTolerance * m_bitrate;
> +        if (m_2pass)
> +        {
> +            int64_t diff;
> +            if (!m_isVbv)
> +            {
> +                m_predictedBits = m_totalBits;
> +                if (rce->encodeOrder < m_param->frameNumThreads)
> +                    m_predictedBits += (int64_t)(rce->encodeOrder *
> m_bitrate / m_fps) ;

white-space

> +                else
> +                    m_predictedBits += (int64_t)(m_param->frameNumThreads
> * m_bitrate / m_fps);
> +            }
> +            /* Adjust ABR buffer based on distance to the end of the
> video. */
> +            if (m_numEntries > rce->encodeOrder)
> +            {
> +                uint64_t finalBits = m_rce2Pass[m_numEntries -
> 1].expectedBits;
> +                double videoPos = (double)rce->expectedBits / finalBits;
> +                double scaleFactor = sqrt((1 - videoPos) * m_numEntries);
> +                abrBuffer *= 0.5 * X265_MAX(scaleFactor, 0.5);
> +            }
> 
> -        /* 1pass ABR */
> +            diff = m_predictedBits - (int64_t)rce->expectedBits;
> +            q = rce->newQScale;
> +            q /= Clip3(0.5, 2.0, (double)(abrBuffer - diff) / abrBuffer);
> +            if (((rce->encodeOrder + 1 - m_param->frameNumThreads) >=
> m_fps) &&
> +                (m_expectedBitsSum > 0))
> +            {
> +                /* Adjust quant based on the difference between
> +                 * achieved and expected bitrate so far */
> +                double curTime = (double)rce->encodeOrder / m_numEntries;
> +                double w = Clip3(0.0, 1.0, curTime * 100);
> +                q *= pow((double)m_totalBits / m_expectedBitsSum, w);
> +            }
> +            rce->qpNoVbv = x265_qScale2qp(q);
> +            if (m_isVbv)
> +            {
> +                /* Do not overflow vbv */
> +                double expectedSize = qScale2bits(rce, q);
> +                double expectedVbv = m_bufferFill + m_bufferRate -
> expectedSize;
> +                double expectedFullness = rce->expectedVbv / m_bufferSize;
> +                double qmax = q * (2 - expectedFullness);
> +                double sizeConstraint = 1 + expectedFullness;
> +                qmax = X265_MAX(qmax, rce->newQScale);
> +                if (expectedFullness < .05)
> +                    qmax = MAX_MAX_QPSCALE;
> +                qmax = X265_MIN(qmax, MAX_MAX_QPSCALE);
> +                while (((expectedVbv < rce->expectedVbv/sizeConstraint) &&
> (q < qmax)) ||
> +                        ((expectedVbv < 0) && (q < MAX_MAX_QPSCALE)))
> +                {
> +                    q *= 1.05;
> +                    expectedSize = qScale2bits(rce, q);
> +                    expectedVbv = m_bufferFill + m_bufferRate -
> expectedSize;
> +                }
> 
> -        /* Calculate the quantizer which would have produced the desired
> -         * average bitrate if it had been applied to all frames so far.
> -         * Then modulate that quant based on the current frame's complexity
> -         * relative to the average complexity so far (using the 2pass
> RCEQ).
> -         * Then bias the quant up or down if total size so far was far from
> -         * the target.
> -         * Result: Depending on the value of rate_tolerance, there is a
> -         * tradeoff between quality and bitrate precision. But at large
> -         * tolerances, the bit distribution approaches that of 2pass. */
> -
> -        double wantedBits, overflow = 1;
> -        rce->movingAvgSum = m_shortTermCplxSum;
> -        m_shortTermCplxSum *= 0.5;
> -        m_shortTermCplxCount *= 0.5;
> -        m_shortTermCplxSum += m_currentSatd /
> (CLIP_DURATION(m_frameDuration) / BASE_FRAME_DURATION);
> -        m_shortTermCplxCount++;
> -        /* coeffBits to be used in 2-pass */
> -        rce->coeffBits = (int)m_currentSatd;
> -        rce->blurredComplexity = m_shortTermCplxSum / m_shortTermCplxCount;
> -        rce->mvBits = 0;
> -        rce->sliceType = m_sliceType;
> -
> -        if (m_param->rc.rateControlMode == X265_RC_CRF)
> -        {
> -            q = getQScale(rce, m_rateFactorConstant);
> +            }
> +            q = Clip3(MIN_QPSCALE, MAX_MAX_QPSCALE, q);
>          }
>          else
>          {
> -            if (!m_param->rc.bStatRead)
> -                checkAndResetABR(rce, false);
> -            q = getQScale(rce, m_wantedBitsWindow / m_cplxrSum);
> +            /* 1pass ABR */
> 
> -            /* ABR code can potentially be counterproductive in CBR, so
> just
> -             * don't bother.  Don't run it if the frame complexity is zero
> -             * either. */
> -            if (!m_isCbr && m_currentSatd)
> +            /* Calculate the quantizer which would have produced the
> desired
> +             * average bitrate if it had been applied to all frames so far.
> +             * Then modulate that quant based on the current frame's
> complexity
> +             * relative to the average complexity so far (using the 2pass
> RCEQ).
> +             * Then bias the quant up or down if total size so far was far
> from
> +             * the target.
> +             * Result: Depending on the value of rate_tolerance, there is a
> +             * tradeoff between quality and bitrate precision. But at large
> +             * tolerances, the bit distribution approaches that of 2pass.
> */
> +
> +            double wantedBits, overflow = 1;
> +            rce->movingAvgSum = m_shortTermCplxSum;
> +            m_shortTermCplxSum *= 0.5;
> +            m_shortTermCplxCount *= 0.5;
> +            m_shortTermCplxSum += m_currentSatd /
> (CLIP_DURATION(m_frameDuration) / BASE_FRAME_DURATION);
> +            m_shortTermCplxCount++;
> +            /* coeffBits to be used in 2-pass */
> +            rce->coeffBits = (int)m_currentSatd;
> +            rce->blurredComplexity = m_shortTermCplxSum /
> m_shortTermCplxCount;
> +            rce->mvBits = 0;
> +            rce->sliceType = m_sliceType;
> +
> +            if (m_param->rc.rateControlMode == X265_RC_CRF)
>              {
> -                /* use framesDone instead of POC as poc count is not
> serial with bframes enabled */
> -                double timeDone = (double)(m_framesDone -
> m_param->frameNumThreads + 1) * m_frameDuration;
> -                wantedBits = timeDone * m_bitrate;
> -                if (wantedBits > 0 && m_totalBits > 0 && !m_residualFrames)
> +                q = getQScale(rce, m_rateFactorConstant);
> +            }
> +            else
> +            {
> +                if (!m_param->rc.bStatRead)
> +                    checkAndResetABR(rce, false);
> +                q = getQScale(rce, m_wantedBitsWindow / m_cplxrSum);
> +
> +                /* ABR code can potentially be counterproductive in CBR,
> so just
> +                 * don't bother.  Don't run it if the frame complexity is
> zero
> +                 * either. */
> +                if (!m_isCbr && m_currentSatd)
>                  {
> -                    abrBuffer *= X265_MAX(1, sqrt(timeDone));
> -                    overflow = Clip3(.5, 2.0, 1.0 + (m_totalBits -
> wantedBits) / abrBuffer);
> -                    q *= overflow;
> +                    /* use framesDone instead of POC as poc count is not
> serial with bframes enabled */
> +                    double timeDone = (double)(m_framesDone -
> m_param->frameNumThreads + 1) * m_frameDuration;
> +                    wantedBits = timeDone * m_bitrate;
> +                    if (wantedBits > 0 && m_totalBits > 0 &&
> !m_residualFrames)
> +                    {
> +                        abrBuffer *= X265_MAX(1, sqrt(timeDone));
> +                        overflow = Clip3(.5, 2.0, 1.0 + (m_totalBits -
> wantedBits) / abrBuffer);
> +                        q *= overflow;
> +                    }
>                  }
>              }
> +
> +            if (m_sliceType == I_SLICE && m_param->keyframeMax > 1
> +                && m_lastNonBPictType != I_SLICE && !m_isAbrReset)
> +            {
> +                q = x265_qp2qScale(m_accumPQp / m_accumPNorm);
> +                q /= fabs(m_param->rc.ipFactor);
> +            }
> +            else if (m_framesDone > 0)
> +            {
> +                if (m_param->rc.rateControlMode != X265_RC_CRF)
> +                {
> +                    double lqmin = 0, lqmax = 0;
> +                    lqmin = m_lastQScaleFor[m_sliceType] / m_lstep;
> +                    lqmax = m_lastQScaleFor[m_sliceType] * m_lstep;
> +                    if (!m_residualFrames)
> +                    {
> +                        if (overflow > 1.1 && m_framesDone > 3)
> +                            lqmax *= m_lstep;
> +                        else if (overflow < 0.9)
> +                            lqmin /= m_lstep;
> +                    }
> +                    q = Clip3(lqmin, lqmax, q);
> +                }
> +            }
> +            else if (m_qCompress != 1 && m_param->rc.rateControlMode ==
> X265_RC_CRF)
> +            {
> +                q = x265_qp2qScale(CRF_INIT_QP) /
> fabs(m_param->rc.ipFactor);
> +            }
> +            else if (m_framesDone == 0 && !m_isVbv)
> +            {
> +                /* for ABR alone, clip the first I frame qp */
> +                double lqmax = x265_qp2qScale(ABR_INIT_QP_MAX) * m_lstep;
> +                q = X265_MIN(lqmax, q);
> +            }
> +            q = Clip3(MIN_QPSCALE, MAX_MAX_QPSCALE, q);
> +            rce->qpNoVbv = x265_qScale2qp(q);
> +            q = clipQscale(pic, q);
>          }
> -
> -        if (m_sliceType == I_SLICE && m_param->keyframeMax > 1
> -            && m_lastNonBPictType != I_SLICE && !m_isAbrReset)
> -        {
> -            q = x265_qp2qScale(m_accumPQp / m_accumPNorm);
> -            q /= fabs(m_param->rc.ipFactor);
> -        }
> -        else if (m_framesDone > 0)
> -        {
> -            if (m_param->rc.rateControlMode != X265_RC_CRF)
> -            {
> -                double lqmin = 0, lqmax = 0;
> -                lqmin = m_lastQScaleFor[m_sliceType] / m_lstep;
> -                lqmax = m_lastQScaleFor[m_sliceType] * m_lstep;
> -                if (!m_residualFrames)
> -                {
> -                    if (overflow > 1.1 && m_framesDone > 3)
> -                        lqmax *= m_lstep;
> -                    else if (overflow < 0.9)
> -                        lqmin /= m_lstep;
> -                }
> -                q = Clip3(lqmin, lqmax, q);
> -            }
> -        }
> -        else if (m_qCompress != 1 && m_param->rc.rateControlMode ==
> X265_RC_CRF)
> -        {
> -            q = x265_qp2qScale(CRF_INIT_QP) / fabs(m_param->rc.ipFactor);
> -        }
> -        else if (m_framesDone == 0 && !m_isVbv)
> -        {
> -            /* for ABR alone, clip the first I frame qp */
> -            double lqmax = x265_qp2qScale(ABR_INIT_QP_MAX) * m_lstep;
> -            q = X265_MIN(lqmax, q);
> -        }
> -        q = Clip3(MIN_QPSCALE, MAX_MAX_QPSCALE, q);
> -        rce->qpNoVbv = x265_qScale2qp(q);
> -        q = clipQscale(pic, q);
>          m_lastQScaleFor[m_sliceType] = q;
> -        if (m_curSlice->getPOC() == 0 || m_lastQScaleFor[P_SLICE] < q)
> +        if ((m_curSlice->getPOC() == 0 || m_lastQScaleFor[P_SLICE] < q) &&
> !(m_2pass && !m_isVbv))
>              m_lastQScaleFor[P_SLICE] = q * fabs(m_param->rc.ipFactor);
> 
> -        rce->frameSizePlanned = predictSize(&m_pred[m_sliceType], q,
> (double)m_currentSatd);
> +        if (m_2pass && m_isVbv)
> +            rce->frameSizePlanned = qScale2bits(rce, q);
> +        else
> +            rce->frameSizePlanned = predictSize(&m_pred[m_sliceType], q,
> (double)m_currentSatd);
>          rce->frameSizeEstimated = rce->frameSizePlanned;
>          /* Always use up the whole VBV in this case. */
>          if (m_singleFrameVbv)
>              rce->frameSizePlanned = m_bufferRate;
> 
> +        rce->newQScale = q;
>          return q;
>      }
>  }
> @@ -1932,6 +2028,8 @@
>              m_totalBits += bits;
>          }
>      }
> +    if (m_2pass)
> +        m_expectedBitsSum += qScale2bits(rce, x265_qp2qScale(rce->newQp));
> 
>      if (m_isVbv)
>      {
> diff -r 2737d0b05b72 -r 96e43814fcc6 source/encoder/ratecontrol.h
> --- a/source/encoder/ratecontrol.h Wed Jul 16 01:20:29 2014 -0500
> +++ b/source/encoder/ratecontrol.h Thu Jul 10 22:56:40 2014 +0530
> @@ -160,6 +160,8 @@
>      int      m_numEntries;
>      RateControlEntry *m_rce2Pass;
>      double   m_lastAccumPNorm;
> +    int64_t  m_predictedBits;
> +    double   m_expectedBitsSum;   /* sum of qscale2bits after rceq,
> ratefactor, and overflow, only includes finished frames */
>      struct
>      {
>          uint16_t *qpBuffer[2]; /* Global buffers for converting MB-tree
> quantizer data. */

no complaints with the rest, but the patch was line-mangled in
transmission and is un-applyable.

-- 
Steve Borho


More information about the x265-devel mailing list