[x265] [PATCH] rc: add 2 pass logic in rateEstimateQscale

Steve Borho steve at borho.org
Fri Jul 11 08:34:37 CEST 2014


On Thu, Jul 10, 2014 at 1:00 PM,  <aarthi at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Aarthi Thirumalai<aarthi at multicorewareinc.com>
> # Date 1405013200 -19800
> #      Thu Jul 10 22:56:40 2014 +0530
> # Node ID 13bf49f57f958db1c58177007a4d4eb500129095
> # Parent  418b68734fd83bf32dbdd4a097e51ce455267c3d
> rc: add 2 pass logic in rateEstimateQscale
>
> adjust qscale of each frame based on distance to end of the video and
> the difference between achieved and expected bits so far in the final pass.
>
> diff -r 418b68734fd8 -r 13bf49f57f95 source/Lib/TLibCommon/TypeDef.h
> --- a/source/Lib/TLibCommon/TypeDef.h   Thu Jul 10 00:50:17 2014 +0530
> +++ b/source/Lib/TLibCommon/TypeDef.h   Thu Jul 10 22:56:40 2014 +0530
> @@ -61,6 +61,8 @@
>      I_SLICE
>  };
>
> +static const char sliceTypeToChar[] = { 'B', 'P', 'I' };

encoder.cpp has a similar table, they should be combined

> +
>  /// chroma formats (according to semantics of chroma_format_idc)
>  enum ChromaFormat
>  {
> diff -r 418b68734fd8 -r 13bf49f57f95 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp        Thu Jul 10 00:50:17 2014 +0530
> +++ b/source/encoder/encoder.cpp        Thu Jul 10 22:56:40 2014 +0530
> @@ -242,6 +242,8 @@
>              rc->m_bufferFill = X265_MAX(rc->m_bufferFill, 0);
>              rc->m_bufferFill += encoder->m_rce.bufferRate;
>              rc->m_bufferFill = X265_MIN(rc->m_bufferFill, rc->m_bufferSize);
> +            if (rc->m_2pass)
> +                rc->m_predictedBits += (int64_t)encoder->m_rce.frameSizeEstimated;
>          }
>          encIdx = (encIdx + 1) % m_param->frameNumThreads;
>      }
> diff -r 418b68734fd8 -r 13bf49f57f95 source/encoder/ratecontrol.cpp
> --- a/source/encoder/ratecontrol.cpp    Thu Jul 10 00:50:17 2014 +0530
> +++ b/source/encoder/ratecontrol.cpp    Thu Jul 10 22:56:40 2014 +0530
> @@ -124,6 +124,24 @@
>             + rce->miscBits;
>  }
>
> +inline void copyRceData(RateControlEntry* rce, RateControlEntry* rce2Pass)
> +{
> +    rce->coeffBits = rce2Pass->coeffBits;
> +    rce->mvBits = rce2Pass->mvBits;
> +    rce->miscBits = rce2Pass->miscBits;
> +    rce->iCuCount = rce2Pass->iCuCount;
> +    rce->pCuCount = rce2Pass->pCuCount;
> +    rce->skipCuCount = rce2Pass->skipCuCount;
> +    rce->keptAsRef = rce2Pass->keptAsRef;
> +    rce->qScale = rce2Pass->qScale;
> +    rce->newQScale = rce2Pass->newQScale;
> +    rce->expectedBits = rce2Pass->expectedBits;
> +    rce->expectedVbv = rce2Pass->expectedVbv;
> +    rce->blurredComplexity = rce2Pass->blurredComplexity;
> +    rce->sliceType = rce2Pass->sliceType;
> +}
> +
> +
>  }  // end anonymous namespace
>  /* Compute variance to derive AC energy of each block */
>  static inline uint32_t acEnergyVar(Frame *pic, uint64_t sum_ssd, int shift, int i)
> @@ -993,6 +1011,11 @@
>      m_curSlice = pic->getSlice();
>      m_sliceType = m_curSlice->getSliceType();
>      rce->sliceType = m_sliceType;
> +    if (m_param->rc.bStatRead)
> +    {
> +        assert(rce->encodeOrder >= 0 && rce->encodeOrder < m_numEntries);
> +        copyRceData(rce, &m_rce2Pass[rce->encodeOrder]);
> +    }
>      rce->isActive = true;
>      if (m_sliceType == B_SLICE)
>          rce->bframes = m_bframes;
> @@ -1019,6 +1042,7 @@
>          }
>          rce->rowPred[0] = &rce->rowPreds[m_sliceType][0];
>          rce->rowPred[1] = &rce->rowPreds[m_sliceType][1];
> +        m_predictedBits = m_totalBits;
>          updateVbvPlan(enc);
>          rce->bufferFill = m_bufferFill;
>      }
> @@ -1056,6 +1080,7 @@
>          rce->qpaRc = pic->m_avgQpRc = pic->m_avgQpAq = m_qp;
>      }
>      m_framesDone++;
> +    rce->newQp = m_qp;
>      /* set the final QP to slice structure */
>      m_curSlice->setSliceQp(m_qp);
>  }
> @@ -1196,7 +1221,14 @@
>  double RateControl::rateEstimateQscale(Frame* pic, RateControlEntry *rce)
>  {
>      double q;
> -
> +    if (m_2pass)
> +    {
> +        if (m_sliceType != rce->sliceType)
> +        {
> +            x265_log(m_param, X265_LOG_ERROR, "slice=%c but 2pass stats say %c\n",
> +                      sliceTypeToChar[m_sliceType], sliceTypeToChar[rce->sliceType]);
> +        }
> +    }
>      if (m_sliceType == B_SLICE)
>      {
>          /* B-frames don't have independent rate control, but rather get the
> @@ -1241,112 +1273,174 @@
>          double qScale = x265_qp2qScale(q);
>          rce->frameSizePlanned = predictSize(&m_predBfromP, qScale, (double)m_leadingNoBSatd);
>          rce->frameSizeEstimated = rce->frameSizePlanned;
> +        rce->newQScale = qScale;
>          return qScale;
>      }
>      else
>      {
>          double abrBuffer = 2 * m_param->rc.rateTolerance * m_bitrate;
> +        if (m_2pass)
> +        {
> +            int64_t diff;
> +            if (!m_isVbv)
> +            {
> +                m_predictedBits = m_totalBits;
> +                if (rce->encodeOrder < m_param->frameNumThreads)
> +                    m_predictedBits += (int64_t)(rce->encodeOrder * m_bitrate / m_fps) ;
> +                else
> +                    m_predictedBits += (int64_t)(m_param->frameNumThreads * m_bitrate / m_fps);
> +            }
>
> +            /* Adjust ABR buffer based on distance to the end of the video. */
> +            if (m_numEntries > rce->encodeOrder)
> +            {
> +                uint64_t finalBits = m_rce2Pass[m_numEntries - 1].expectedBits;
> +                double videoPos = (double)rce->expectedBits / finalBits;
> +                double scaleFactor = sqrt((1 - videoPos) * m_numEntries);
> +                abrBuffer *= 0.5 * X265_MAX(scaleFactor, 0.5);
> +            }
> +
> +            diff = m_predictedBits - (int64_t)rce->expectedBits;
> +            q = rce->newQScale;
> +            q /= Clip3(0.5, 2.0, (double)(abrBuffer - diff) / abrBuffer);
> +            if (((rce->encodeOrder + 1 - m_param->frameNumThreads) >= m_fps) &&
> +                (m_expectedBitsSum > 0))
> +            {
> +                /* Adjust quant based on the difference between
> +                 * achieved and expected bitrate so far */
> +                double curTime = (double)rce->encodeOrder / m_numEntries;
> +                double w = Clip3(0.0, 1.0, curTime * 100);
> +                q *= pow((double)m_totalBits / m_expectedBitsSum, w);
> +            }
> +            rce->qpNoVbv = x265_qScale2qp(q);
> +            if (m_isVbv)
> +            {
> +                /* Do not overflow vbv */
> +                double expectedSize = qScale2bits(rce, q);
> +                double expectedVbv = m_bufferFill + m_bufferRate - expectedSize;
> +                double expectedFullness = rce->expectedVbv / m_bufferSize;
> +                double qmax = q * (2 - expectedFullness);
> +                double sizeConstraint = 1 + expectedFullness;
> +                qmax = X265_MAX(qmax, rce->newQScale);
> +                if (expectedFullness < .05)
> +                    qmax = MAX_MAX_QPSCALE;
> +                qmax = X265_MIN(qmax, MAX_MAX_QPSCALE);
> +                while (((expectedVbv < rce->expectedVbv/sizeConstraint) && (q < qmax)) ||
> +                        ((expectedVbv < 0) && (q < MAX_MAX_QPSCALE)))
> +                {
> +                    q *= 1.05;
> +                    expectedSize = qScale2bits(rce, q);
> +                    expectedVbv = m_bufferFill + m_bufferRate - expectedSize;
> +                }
> +
> +            }
> +            q = Clip3(MIN_QPSCALE, MAX_MAX_QPSCALE, q);
> +        }
>          /* 1pass ABR */
> -
> -        /* Calculate the quantizer which would have produced the desired
> -         * average bitrate if it had been applied to all frames so far.
> -         * Then modulate that quant based on the current frame's complexity
> -         * relative to the average complexity so far (using the 2pass RCEQ).
> -         * Then bias the quant up or down if total size so far was far from
> -         * the target.
> -         * Result: Depending on the value of rate_tolerance, there is a
> -         * tradeoff between quality and bitrate precision. But at large
> -         * tolerances, the bit distribution approaches that of 2pass. */
> -
> -        double wantedBits, overflow = 1;
> -        rce->movingAvgSum = m_shortTermCplxSum;
> -        m_shortTermCplxSum *= 0.5;
> -        m_shortTermCplxCount *= 0.5;
> -        m_shortTermCplxSum += m_currentSatd / (CLIP_DURATION(m_frameDuration) / BASE_FRAME_DURATION);
> -        m_shortTermCplxCount++;
> -        /* coeffBits to be used in 2-pass */
> -        rce->coeffBits = (int)m_currentSatd;
> -        rce->blurredComplexity = m_shortTermCplxSum / m_shortTermCplxCount;
> -        rce->mvBits = 0;
> -        rce->sliceType = m_sliceType;
> -
> -        if (m_param->rc.rateControlMode == X265_RC_CRF)
> -        {
> -            q = getQScale(rce, m_rateFactorConstant);
> -        }
>          else
>          {
> -            if (!m_param->rc.bStatRead)
> -                checkAndResetABR(rce, false);
> -            q = getQScale(rce, m_wantedBitsWindow / m_cplxrSum);
> +            /* Calculate the quantizer which would have produced the desired
> +             * average bitrate if it had been applied to all frames so far.
> +             * Then modulate that quant based on the current frame's complexity
> +             * relative to the average complexity so far (using the 2pass RCEQ).
> +             * Then bias the quant up or down if total size so far was far from
> +             * the target.
> +             * Result: Depending on the value of rate_tolerance, there is a
> +             * tradeoff between quality and bitrate precision. But at large
> +             * tolerances, the bit distribution approaches that of 2pass. */
>
> -            /* ABR code can potentially be counterproductive in CBR, so just
> -             * don't bother.  Don't run it if the frame complexity is zero
> -             * either. */
> -            if (!m_isCbr && m_currentSatd)
> +            double wantedBits, overflow = 1;
> +            rce->movingAvgSum = m_shortTermCplxSum;
> +            m_shortTermCplxSum *= 0.5;
> +            m_shortTermCplxCount *= 0.5;
> +            m_shortTermCplxSum += m_currentSatd / (CLIP_DURATION(m_frameDuration) / BASE_FRAME_DURATION);
> +            m_shortTermCplxCount++;
> +            /* coeffBits to be used in 2-pass */
> +            rce->coeffBits = (int)m_currentSatd;
> +            rce->blurredComplexity = m_shortTermCplxSum / m_shortTermCplxCount;
> +            rce->mvBits = 0;
> +            rce->sliceType = m_sliceType;
> +
> +            if (m_param->rc.rateControlMode == X265_RC_CRF)
>              {
> -                /* use framesDone instead of POC as poc count is not serial with bframes enabled */
> -                double timeDone = (double)(m_framesDone - m_param->frameNumThreads + 1) * m_frameDuration;
> -                wantedBits = timeDone * m_bitrate;
> -                if (wantedBits > 0 && m_totalBits > 0 && !m_residualFrames)
> +                q = getQScale(rce, m_rateFactorConstant);
> +            }
> +            else
> +            {
> +                if (!m_param->rc.bStatRead)
> +                    checkAndResetABR(rce, false);
> +                q = getQScale(rce, m_wantedBitsWindow / m_cplxrSum);
> +
> +                /* ABR code can potentially be counterproductive in CBR, so just
> +                 * don't bother.  Don't run it if the frame complexity is zero
> +                 * either. */
> +                if (!m_isCbr && m_currentSatd)
>                  {
> -                    abrBuffer *= X265_MAX(1, sqrt(timeDone));
> -                    overflow = Clip3(.5, 2.0, 1.0 + (m_totalBits - wantedBits) / abrBuffer);
> -                    q *= overflow;
> +                    /* use framesDone instead of POC as poc count is not serial with bframes enabled */
> +                    double timeDone = (double)(m_framesDone - m_param->frameNumThreads + 1) * m_frameDuration;
> +                    wantedBits = timeDone * m_bitrate;
> +                    if (wantedBits > 0 && m_totalBits > 0 && !m_residualFrames)
> +                    {
> +                        abrBuffer *= X265_MAX(1, sqrt(timeDone));
> +                        overflow = Clip3(.5, 2.0, 1.0 + (m_totalBits - wantedBits) / abrBuffer);
> +                        q *= overflow;
> +                    }
>                  }
>              }
> +
> +            if (m_sliceType == I_SLICE && m_param->keyframeMax > 1
> +                && m_lastNonBPictType != I_SLICE && !m_isAbrReset)
> +            {
> +                q = x265_qp2qScale(m_accumPQp / m_accumPNorm);
> +                q /= fabs(m_param->rc.ipFactor);
> +            }
> +            else if (m_framesDone > 0)
> +            {
> +                if (m_param->rc.rateControlMode != X265_RC_CRF)
> +                {
> +                    double lqmin = 0, lqmax = 0;
> +                    lqmin = m_lastQScaleFor[m_sliceType] / m_lstep;
> +                    lqmax = m_lastQScaleFor[m_sliceType] * m_lstep;
> +                    if (!m_residualFrames)
> +                    {
> +                        if (overflow > 1.1 && m_framesDone > 3)
> +                            lqmax *= m_lstep;
> +                        else if (overflow < 0.9)
> +                            lqmin /= m_lstep;
> +                    }
> +                    q = Clip3(lqmin, lqmax, q);
> +                }
> +            }
> +            else if (m_qCompress != 1 && m_param->rc.rateControlMode == X265_RC_CRF)
> +            {
> +                q = x265_qp2qScale(CRF_INIT_QP) / fabs(m_param->rc.ipFactor);
> +            }
> +            else if (m_framesDone == 0 && !m_isVbv)
> +            {
> +                /* for ABR alone, clip the first I frame qp */
> +                double lqmax = x265_qp2qScale(ABR_INIT_QP_MAX) * m_lstep;
> +                q = X265_MIN(lqmax, q);
> +            }
> +
> +            q = Clip3(MIN_QPSCALE, MAX_MAX_QPSCALE, q);
> +            rce->qpNoVbv = x265_qScale2qp(q);
> +            q = clipQscale(pic, q);
>          }
>
> -        if (m_sliceType == I_SLICE && m_param->keyframeMax > 1
> -            && m_lastNonBPictType != I_SLICE && !m_isAbrReset)
> -        {
> -            q = x265_qp2qScale(m_accumPQp / m_accumPNorm);
> -            q /= fabs(m_param->rc.ipFactor);
> -        }
> -        else if (m_framesDone > 0)
> -        {
> -            if (m_param->rc.rateControlMode != X265_RC_CRF)
> -            {
> -                double lqmin = 0, lqmax = 0;
> -                lqmin = m_lastQScaleFor[m_sliceType] / m_lstep;
> -                lqmax = m_lastQScaleFor[m_sliceType] * m_lstep;
> -                if (!m_residualFrames)
> -                {
> -                    if (overflow > 1.1 && m_framesDone > 3)
> -                        lqmax *= m_lstep;
> -                    else if (overflow < 0.9)
> -                        lqmin /= m_lstep;
> -                }
> -                q = Clip3(lqmin, lqmax, q);
> -            }
> -        }
> -        else if (m_qCompress != 1 && m_param->rc.rateControlMode == X265_RC_CRF)
> -        {
> -            q = x265_qp2qScale(CRF_INIT_QP) / fabs(m_param->rc.ipFactor);
> -        }
> -        else if (m_framesDone == 0 && !m_isVbv)
> -        {
> -            /* for ABR alone, clip the first I frame qp */
> -            double lqmax = x265_qp2qScale(ABR_INIT_QP_MAX) * m_lstep;
> -            q = X265_MIN(lqmax, q);
> -        }
> -
> -        q = Clip3(MIN_QPSCALE, MAX_MAX_QPSCALE, q);
> -        rce->qpNoVbv = x265_qScale2qp(q);
> -        q = clipQscale(pic, q);
> -
>          m_lastQScaleFor[m_sliceType] = q;
>
> -        if (m_curSlice->getPOC() == 0 || m_lastQScaleFor[P_SLICE] < q)
> +        if ((m_curSlice->getPOC() == 0 || m_lastQScaleFor[P_SLICE] < q) && !(m_2pass && !m_isVbv))
>              m_lastQScaleFor[P_SLICE] = q * fabs(m_param->rc.ipFactor);
>
> -        rce->frameSizePlanned = predictSize(&m_pred[m_sliceType], q, (double)m_currentSatd);
> +        if (m_2pass && m_isVbv)
> +            rce->frameSizePlanned = qScale2bits(rce, q);
> +        else
> +            rce->frameSizePlanned = predictSize(&m_pred[m_sliceType], q, (double)m_currentSatd);
>          rce->frameSizeEstimated = rce->frameSizePlanned;
>          /* Always use up the whole VBV in this case. */
>          if (m_singleFrameVbv)
>              rce->frameSizePlanned = m_bufferRate;
> -
> +        rce->newQScale = q;
>          return q;
>      }
>  }
> @@ -1928,6 +2022,8 @@
>              m_totalBits += bits;
>          }
>      }
> +    if (m_2pass)
> +        m_expectedBitsSum += qScale2bits(rce, x265_qp2qScale(rce->newQp));
>
>      if (m_isVbv)
>      {
> diff -r 418b68734fd8 -r 13bf49f57f95 source/encoder/ratecontrol.h
> --- a/source/encoder/ratecontrol.h      Thu Jul 10 00:50:17 2014 +0530
> +++ b/source/encoder/ratecontrol.h      Thu Jul 10 22:56:40 2014 +0530
> @@ -161,6 +161,8 @@
>      int      m_numEntries;
>      RateControlEntry *m_rce2Pass;
>      double   m_lastAccumPNorm;
> +    int64_t  m_predictedBits;
> +    double   m_expectedBitsSum;   /* sum of qscale2bits after rceq, ratefactor, and overflow, only includes finished frames */
>
>      struct
>      {
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

the rest looks ok at first glance

-- 
Steve Borho


More information about the x265-devel mailing list