[x265] [PATCH] rc: add 2 pass logic in rateEstimateQscale
Steve Borho
steve at borho.org
Wed Jul 16 21:45:16 CEST 2014
On 07/17, Aarthi Priya Thirumalai wrote:
> # HG changeset patch
> # User Aarthi Thirumalai<aarthi at multicorewareinc.com>
> # Date 1405013200 -19800
> # Thu Jul 10 22:56:40 2014 +0530
> # Node ID 96e43814fcc6e9b661c16ec8230768b348ec6ce1
> # Parent 2737d0b05b72ca90f09987e6cf30b7c37e38b83c
> rc: add 2 pass logic in rateEstimateQscale
>
> adjust qscale of each frame based on distance to end of the video and
> the difference between achieved and expected bits so far in the final pass.
>
> diff -r 2737d0b05b72 -r 96e43814fcc6 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp Wed Jul 16 01:20:29 2014 -0500
> +++ b/source/encoder/encoder.cpp Thu Jul 10 22:56:40 2014 +0530
> @@ -41,7 +41,7 @@
>
> #include "x265.h"
>
> -static const char *summaryCSVHeader =
> +const char *summaryCSVHeader =
> "Command, Date/Time, Elapsed Time, FPS, Bitrate, "
> "Y PSNR, U PSNR, V PSNR, Global PSNR, SSIM, SSIM (dB), "
> "I count, I ave-QP, I kpbs, I-PSNR Y, I-PSNR U, I-PSNR V, I-SSIM (dB),
> "
> @@ -49,6 +49,8 @@
> "B count, B ave-QP, B kpbs, B-PSNR Y, B-PSNR U, B-PSNR V, B-SSIM (dB),
> "
> "Version\n";
>
> +const char sliceTypeToChar[] = { 'B', 'P', 'I' };
these should have g_ prefixes if they're not going to be file-static,
and they need to be within the x265 namespace
> using namespace x265;
>
> Encoder::Encoder()
> @@ -227,6 +229,8 @@
> rc->m_bufferFill = X265_MAX(rc->m_bufferFill, 0);
> rc->m_bufferFill += encoder->m_rce.bufferRate;
> rc->m_bufferFill = X265_MIN(rc->m_bufferFill,
> rc->m_bufferSize);
> + if (rc->m_2pass)
> + rc->m_predictedBits +=
> (int64_t)encoder->m_rce.frameSizeEstimated;
> }
> encIdx = (encIdx + 1) % m_param->frameNumThreads;
> }
> diff -r 2737d0b05b72 -r 96e43814fcc6 source/encoder/encoder.h
> --- a/source/encoder/encoder.h Wed Jul 16 01:20:29 2014 -0500
> +++ b/source/encoder/encoder.h Thu Jul 10 22:56:40 2014 +0530
> @@ -30,6 +30,9 @@
>
> struct x265_encoder {};
>
> +extern const char *summaryCSVHeader;
> +extern const char sliceTypeToChar[3];
> +
> namespace x265 {
> // private namespace
>
> diff -r 2737d0b05b72 -r 96e43814fcc6 source/encoder/ratecontrol.cpp
> --- a/source/encoder/ratecontrol.cpp Wed Jul 16 01:20:29 2014 -0500
> +++ b/source/encoder/ratecontrol.cpp Thu Jul 10 22:56:40 2014 +0530
> @@ -122,6 +122,23 @@
> + rce->miscBits;
> }
>
> +inline void copyRceData(RateControlEntry* rce, RateControlEntry* rce2Pass)
> +{
> + rce->coeffBits = rce2Pass->coeffBits;
> + rce->mvBits = rce2Pass->mvBits;
> + rce->miscBits = rce2Pass->miscBits;
> + rce->iCuCount = rce2Pass->iCuCount;
> + rce->pCuCount = rce2Pass->pCuCount;
> + rce->skipCuCount = rce2Pass->skipCuCount;
> + rce->keptAsRef = rce2Pass->keptAsRef;
> + rce->qScale = rce2Pass->qScale;
> + rce->newQScale = rce2Pass->newQScale;
> + rce->expectedBits = rce2Pass->expectedBits;
> + rce->expectedVbv = rce2Pass->expectedVbv;
> + rce->blurredComplexity = rce2Pass->blurredComplexity;
> + rce->sliceType = rce2Pass->sliceType;
> +}
> +
> } // end anonymous namespace
> /* Compute variance to derive AC energy of each block */
> static inline uint32_t acEnergyVar(Frame *pic, uint64_t sum_ssd, int
> shift, int i)
> @@ -982,6 +999,12 @@
> m_curSlice = pic->getSlice();
> m_sliceType = m_curSlice->getSliceType();
> rce->sliceType = m_sliceType;
> + if (m_param->rc.bStatRead)
> + {
> + X265_CHECK(rce->encodeOrder >= 0 && rce->encodeOrder <
> m_numEntries,
> + "Frame encode order is more than total no. of frames
> in the first pass");
> + copyRceData(rce, &m_rce2Pass[rce->encodeOrder]);
X265_CHECK() shouldn't be used for potential user errors, it is intended
for internal programming errors (violating function parameter
expectations, etc). If it's possible for a user to run two encodes with
different frame counts and cause a memory access error, we must prevent
the crash even in non-checked builds.
> + }
> rce->isActive = true;
> if (m_sliceType == B_SLICE)
> rce->bframes = m_bframes;
> @@ -1008,6 +1031,7 @@
> }
> rce->rowPred[0] = &rce->rowPreds[m_sliceType][0];
> rce->rowPred[1] = &rce->rowPreds[m_sliceType][1];
> + m_predictedBits = m_totalBits;
> updateVbvPlan(enc);
> rce->bufferFill = m_bufferFill;
> }
> @@ -1045,6 +1069,7 @@
> rce->qpaRc = pic->m_avgQpRc = pic->m_avgQpAq = m_qp;
> }
> m_framesDone++;
> + rce->newQp = m_qp;
> /* set the final QP to slice structure */
> m_curSlice->setSliceQp(m_qp);
> }
> @@ -1186,6 +1211,14 @@
> {
> double q;
>
> + if (m_2pass)
> + {
> + if (m_sliceType != rce->sliceType)
> + {
> + x265_log(m_param, X265_LOG_ERROR, "slice=%c but 2pass stats
> say %c\n",
> + sliceTypeToChar[m_sliceType],
> sliceTypeToChar[rce->sliceType]);
> + }
> + }
> if (m_sliceType == B_SLICE)
> {
> /* B-frames don't have independent rate control, but rather get the
> @@ -1230,109 +1263,172 @@
> double qScale = x265_qp2qScale(q);
> rce->frameSizePlanned = predictSize(&m_predBfromP, qScale,
> (double)m_leadingNoBSatd);
> rce->frameSizeEstimated = rce->frameSizePlanned;
> + rce->newQScale = qScale;
> return qScale;
> }
> else
> {
> double abrBuffer = 2 * m_param->rc.rateTolerance * m_bitrate;
> + if (m_2pass)
> + {
> + int64_t diff;
> + if (!m_isVbv)
> + {
> + m_predictedBits = m_totalBits;
> + if (rce->encodeOrder < m_param->frameNumThreads)
> + m_predictedBits += (int64_t)(rce->encodeOrder *
> m_bitrate / m_fps) ;
white-space
> + else
> + m_predictedBits += (int64_t)(m_param->frameNumThreads
> * m_bitrate / m_fps);
> + }
> + /* Adjust ABR buffer based on distance to the end of the
> video. */
> + if (m_numEntries > rce->encodeOrder)
> + {
> + uint64_t finalBits = m_rce2Pass[m_numEntries -
> 1].expectedBits;
> + double videoPos = (double)rce->expectedBits / finalBits;
> + double scaleFactor = sqrt((1 - videoPos) * m_numEntries);
> + abrBuffer *= 0.5 * X265_MAX(scaleFactor, 0.5);
> + }
>
> - /* 1pass ABR */
> + diff = m_predictedBits - (int64_t)rce->expectedBits;
> + q = rce->newQScale;
> + q /= Clip3(0.5, 2.0, (double)(abrBuffer - diff) / abrBuffer);
> + if (((rce->encodeOrder + 1 - m_param->frameNumThreads) >=
> m_fps) &&
> + (m_expectedBitsSum > 0))
> + {
> + /* Adjust quant based on the difference between
> + * achieved and expected bitrate so far */
> + double curTime = (double)rce->encodeOrder / m_numEntries;
> + double w = Clip3(0.0, 1.0, curTime * 100);
> + q *= pow((double)m_totalBits / m_expectedBitsSum, w);
> + }
> + rce->qpNoVbv = x265_qScale2qp(q);
> + if (m_isVbv)
> + {
> + /* Do not overflow vbv */
> + double expectedSize = qScale2bits(rce, q);
> + double expectedVbv = m_bufferFill + m_bufferRate -
> expectedSize;
> + double expectedFullness = rce->expectedVbv / m_bufferSize;
> + double qmax = q * (2 - expectedFullness);
> + double sizeConstraint = 1 + expectedFullness;
> + qmax = X265_MAX(qmax, rce->newQScale);
> + if (expectedFullness < .05)
> + qmax = MAX_MAX_QPSCALE;
> + qmax = X265_MIN(qmax, MAX_MAX_QPSCALE);
> + while (((expectedVbv < rce->expectedVbv/sizeConstraint) &&
> (q < qmax)) ||
> + ((expectedVbv < 0) && (q < MAX_MAX_QPSCALE)))
> + {
> + q *= 1.05;
> + expectedSize = qScale2bits(rce, q);
> + expectedVbv = m_bufferFill + m_bufferRate -
> expectedSize;
> + }
>
> - /* Calculate the quantizer which would have produced the desired
> - * average bitrate if it had been applied to all frames so far.
> - * Then modulate that quant based on the current frame's complexity
> - * relative to the average complexity so far (using the 2pass
> RCEQ).
> - * Then bias the quant up or down if total size so far was far from
> - * the target.
> - * Result: Depending on the value of rate_tolerance, there is a
> - * tradeoff between quality and bitrate precision. But at large
> - * tolerances, the bit distribution approaches that of 2pass. */
> -
> - double wantedBits, overflow = 1;
> - rce->movingAvgSum = m_shortTermCplxSum;
> - m_shortTermCplxSum *= 0.5;
> - m_shortTermCplxCount *= 0.5;
> - m_shortTermCplxSum += m_currentSatd /
> (CLIP_DURATION(m_frameDuration) / BASE_FRAME_DURATION);
> - m_shortTermCplxCount++;
> - /* coeffBits to be used in 2-pass */
> - rce->coeffBits = (int)m_currentSatd;
> - rce->blurredComplexity = m_shortTermCplxSum / m_shortTermCplxCount;
> - rce->mvBits = 0;
> - rce->sliceType = m_sliceType;
> -
> - if (m_param->rc.rateControlMode == X265_RC_CRF)
> - {
> - q = getQScale(rce, m_rateFactorConstant);
> + }
> + q = Clip3(MIN_QPSCALE, MAX_MAX_QPSCALE, q);
> }
> else
> {
> - if (!m_param->rc.bStatRead)
> - checkAndResetABR(rce, false);
> - q = getQScale(rce, m_wantedBitsWindow / m_cplxrSum);
> + /* 1pass ABR */
>
> - /* ABR code can potentially be counterproductive in CBR, so
> just
> - * don't bother. Don't run it if the frame complexity is zero
> - * either. */
> - if (!m_isCbr && m_currentSatd)
> + /* Calculate the quantizer which would have produced the
> desired
> + * average bitrate if it had been applied to all frames so far.
> + * Then modulate that quant based on the current frame's
> complexity
> + * relative to the average complexity so far (using the 2pass
> RCEQ).
> + * Then bias the quant up or down if total size so far was far
> from
> + * the target.
> + * Result: Depending on the value of rate_tolerance, there is a
> + * tradeoff between quality and bitrate precision. But at large
> + * tolerances, the bit distribution approaches that of 2pass.
> */
> +
> + double wantedBits, overflow = 1;
> + rce->movingAvgSum = m_shortTermCplxSum;
> + m_shortTermCplxSum *= 0.5;
> + m_shortTermCplxCount *= 0.5;
> + m_shortTermCplxSum += m_currentSatd /
> (CLIP_DURATION(m_frameDuration) / BASE_FRAME_DURATION);
> + m_shortTermCplxCount++;
> + /* coeffBits to be used in 2-pass */
> + rce->coeffBits = (int)m_currentSatd;
> + rce->blurredComplexity = m_shortTermCplxSum /
> m_shortTermCplxCount;
> + rce->mvBits = 0;
> + rce->sliceType = m_sliceType;
> +
> + if (m_param->rc.rateControlMode == X265_RC_CRF)
> {
> - /* use framesDone instead of POC as poc count is not
> serial with bframes enabled */
> - double timeDone = (double)(m_framesDone -
> m_param->frameNumThreads + 1) * m_frameDuration;
> - wantedBits = timeDone * m_bitrate;
> - if (wantedBits > 0 && m_totalBits > 0 && !m_residualFrames)
> + q = getQScale(rce, m_rateFactorConstant);
> + }
> + else
> + {
> + if (!m_param->rc.bStatRead)
> + checkAndResetABR(rce, false);
> + q = getQScale(rce, m_wantedBitsWindow / m_cplxrSum);
> +
> + /* ABR code can potentially be counterproductive in CBR,
> so just
> + * don't bother. Don't run it if the frame complexity is
> zero
> + * either. */
> + if (!m_isCbr && m_currentSatd)
> {
> - abrBuffer *= X265_MAX(1, sqrt(timeDone));
> - overflow = Clip3(.5, 2.0, 1.0 + (m_totalBits -
> wantedBits) / abrBuffer);
> - q *= overflow;
> + /* use framesDone instead of POC as poc count is not
> serial with bframes enabled */
> + double timeDone = (double)(m_framesDone -
> m_param->frameNumThreads + 1) * m_frameDuration;
> + wantedBits = timeDone * m_bitrate;
> + if (wantedBits > 0 && m_totalBits > 0 &&
> !m_residualFrames)
> + {
> + abrBuffer *= X265_MAX(1, sqrt(timeDone));
> + overflow = Clip3(.5, 2.0, 1.0 + (m_totalBits -
> wantedBits) / abrBuffer);
> + q *= overflow;
> + }
> }
> }
> +
> + if (m_sliceType == I_SLICE && m_param->keyframeMax > 1
> + && m_lastNonBPictType != I_SLICE && !m_isAbrReset)
> + {
> + q = x265_qp2qScale(m_accumPQp / m_accumPNorm);
> + q /= fabs(m_param->rc.ipFactor);
> + }
> + else if (m_framesDone > 0)
> + {
> + if (m_param->rc.rateControlMode != X265_RC_CRF)
> + {
> + double lqmin = 0, lqmax = 0;
> + lqmin = m_lastQScaleFor[m_sliceType] / m_lstep;
> + lqmax = m_lastQScaleFor[m_sliceType] * m_lstep;
> + if (!m_residualFrames)
> + {
> + if (overflow > 1.1 && m_framesDone > 3)
> + lqmax *= m_lstep;
> + else if (overflow < 0.9)
> + lqmin /= m_lstep;
> + }
> + q = Clip3(lqmin, lqmax, q);
> + }
> + }
> + else if (m_qCompress != 1 && m_param->rc.rateControlMode ==
> X265_RC_CRF)
> + {
> + q = x265_qp2qScale(CRF_INIT_QP) /
> fabs(m_param->rc.ipFactor);
> + }
> + else if (m_framesDone == 0 && !m_isVbv)
> + {
> + /* for ABR alone, clip the first I frame qp */
> + double lqmax = x265_qp2qScale(ABR_INIT_QP_MAX) * m_lstep;
> + q = X265_MIN(lqmax, q);
> + }
> + q = Clip3(MIN_QPSCALE, MAX_MAX_QPSCALE, q);
> + rce->qpNoVbv = x265_qScale2qp(q);
> + q = clipQscale(pic, q);
> }
> -
> - if (m_sliceType == I_SLICE && m_param->keyframeMax > 1
> - && m_lastNonBPictType != I_SLICE && !m_isAbrReset)
> - {
> - q = x265_qp2qScale(m_accumPQp / m_accumPNorm);
> - q /= fabs(m_param->rc.ipFactor);
> - }
> - else if (m_framesDone > 0)
> - {
> - if (m_param->rc.rateControlMode != X265_RC_CRF)
> - {
> - double lqmin = 0, lqmax = 0;
> - lqmin = m_lastQScaleFor[m_sliceType] / m_lstep;
> - lqmax = m_lastQScaleFor[m_sliceType] * m_lstep;
> - if (!m_residualFrames)
> - {
> - if (overflow > 1.1 && m_framesDone > 3)
> - lqmax *= m_lstep;
> - else if (overflow < 0.9)
> - lqmin /= m_lstep;
> - }
> - q = Clip3(lqmin, lqmax, q);
> - }
> - }
> - else if (m_qCompress != 1 && m_param->rc.rateControlMode ==
> X265_RC_CRF)
> - {
> - q = x265_qp2qScale(CRF_INIT_QP) / fabs(m_param->rc.ipFactor);
> - }
> - else if (m_framesDone == 0 && !m_isVbv)
> - {
> - /* for ABR alone, clip the first I frame qp */
> - double lqmax = x265_qp2qScale(ABR_INIT_QP_MAX) * m_lstep;
> - q = X265_MIN(lqmax, q);
> - }
> - q = Clip3(MIN_QPSCALE, MAX_MAX_QPSCALE, q);
> - rce->qpNoVbv = x265_qScale2qp(q);
> - q = clipQscale(pic, q);
> m_lastQScaleFor[m_sliceType] = q;
> - if (m_curSlice->getPOC() == 0 || m_lastQScaleFor[P_SLICE] < q)
> + if ((m_curSlice->getPOC() == 0 || m_lastQScaleFor[P_SLICE] < q) &&
> !(m_2pass && !m_isVbv))
> m_lastQScaleFor[P_SLICE] = q * fabs(m_param->rc.ipFactor);
>
> - rce->frameSizePlanned = predictSize(&m_pred[m_sliceType], q,
> (double)m_currentSatd);
> + if (m_2pass && m_isVbv)
> + rce->frameSizePlanned = qScale2bits(rce, q);
> + else
> + rce->frameSizePlanned = predictSize(&m_pred[m_sliceType], q,
> (double)m_currentSatd);
> rce->frameSizeEstimated = rce->frameSizePlanned;
> /* Always use up the whole VBV in this case. */
> if (m_singleFrameVbv)
> rce->frameSizePlanned = m_bufferRate;
>
> + rce->newQScale = q;
> return q;
> }
> }
> @@ -1932,6 +2028,8 @@
> m_totalBits += bits;
> }
> }
> + if (m_2pass)
> + m_expectedBitsSum += qScale2bits(rce, x265_qp2qScale(rce->newQp));
>
> if (m_isVbv)
> {
> diff -r 2737d0b05b72 -r 96e43814fcc6 source/encoder/ratecontrol.h
> --- a/source/encoder/ratecontrol.h Wed Jul 16 01:20:29 2014 -0500
> +++ b/source/encoder/ratecontrol.h Thu Jul 10 22:56:40 2014 +0530
> @@ -160,6 +160,8 @@
> int m_numEntries;
> RateControlEntry *m_rce2Pass;
> double m_lastAccumPNorm;
> + int64_t m_predictedBits;
> + double m_expectedBitsSum; /* sum of qscale2bits after rceq,
> ratefactor, and overflow, only includes finished frames */
> struct
> {
> uint16_t *qpBuffer[2]; /* Global buffers for converting MB-tree
> quantizer data. */
no complaints with the rest, but the patch was line-mangled in
transmission and is un-applyable.
--
Steve Borho
More information about the x265-devel
mailing list