[x265] [PATCH] rc: add 2 pass logic in rateEstimateQscale
Steve Borho
steve at borho.org
Sun Jul 13 16:57:43 CEST 2014
On 07/13, aarthi at multicorewareinc.com wrote:
> # HG changeset patch
> # User Aarthi Thirumalai<aarthi at multicorewareinc.com>
> # Date 1405013200 -19800
> # Thu Jul 10 22:56:40 2014 +0530
> # Node ID cf8b11c379f304cd80da524e0d33a5eba2aa6155
> # Parent 9d3683ab096b6efbaddd3ed388673e171457455f
> rc: add 2 pass logic in rateEstimateQscale
>
> adjust qscale of each frame based on distance to end of the video and
> the difference between achieved and expected bits so far in the final pass.
>
> diff -r 9d3683ab096b -r cf8b11c379f3 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp Sat Jul 12 01:18:07 2014 -0500
> +++ b/source/encoder/encoder.cpp Thu Jul 10 22:56:40 2014 +0530
> @@ -41,14 +41,6 @@
>
> #include "x265.h"
>
> -static const char *summaryCSVHeader =
> - "Command, Date/Time, Elapsed Time, FPS, Bitrate, "
> - "Y PSNR, U PSNR, V PSNR, Global PSNR, SSIM, SSIM (dB), "
> - "I count, I ave-QP, I kpbs, I-PSNR Y, I-PSNR U, I-PSNR V, I-SSIM (dB), "
> - "P count, P ave-QP, P kpbs, P-PSNR Y, P-PSNR U, P-PSNR V, P-SSIM (dB), "
> - "B count, B ave-QP, B kpbs, B-PSNR Y, B-PSNR U, B-PSNR V, B-SSIM (dB), "
> - "Version\n";
> -
> using namespace x265;
>
> Encoder::Encoder()
> @@ -227,6 +219,8 @@
> rc->m_bufferFill = X265_MAX(rc->m_bufferFill, 0);
> rc->m_bufferFill += encoder->m_rce.bufferRate;
> rc->m_bufferFill = X265_MIN(rc->m_bufferFill, rc->m_bufferSize);
> + if (rc->m_2pass)
> + rc->m_predictedBits += (int64_t)encoder->m_rce.frameSizeEstimated;
> }
> encIdx = (encIdx + 1) % m_param->frameNumThreads;
> }
> diff -r 9d3683ab096b -r cf8b11c379f3 source/encoder/encoder.h
> --- a/source/encoder/encoder.h Sat Jul 12 01:18:07 2014 -0500
> +++ b/source/encoder/encoder.h Thu Jul 10 22:56:40 2014 +0530
> @@ -30,6 +30,16 @@
>
> struct x265_encoder {};
>
> +static const char *summaryCSVHeader =
> + "Command, Date/Time, Elapsed Time, FPS, Bitrate, "
> + "Y PSNR, U PSNR, V PSNR, Global PSNR, SSIM, SSIM (dB), "
> + "I count, I ave-QP, I kpbs, I-PSNR Y, I-PSNR U, I-PSNR V, I-SSIM (dB), "
> + "P count, P ave-QP, P kpbs, P-PSNR Y, P-PSNR U, P-PSNR V, P-SSIM (dB), "
> + "B count, B ave-QP, B kpbs, B-PSNR Y, B-PSNR U, B-PSNR V, B-SSIM (dB), "
> + "Version\n";
> +
> +static const char sliceTypeToChar[] = { 'B', 'P', 'I' };
> +
This causes warnings from GCC from most files that include this header:
In file included from
/Users/steve/repos/x265/source/encoder/api.cpp:26:0:
/Users/steve/repos/x265/source/encoder/encoder.h:33:20: warning: 'summaryCSVHeader' defined but not used [-Wunused-variable]
> namespace x265 {
> // private namespace
>
> diff -r 9d3683ab096b -r cf8b11c379f3 source/encoder/ratecontrol.cpp
> --- a/source/encoder/ratecontrol.cpp Sat Jul 12 01:18:07 2014 -0500
> +++ b/source/encoder/ratecontrol.cpp Thu Jul 10 22:56:40 2014 +0530
> @@ -122,6 +122,23 @@
> + rce->miscBits;
> }
>
> +inline void copyRceData(RateControlEntry* rce, RateControlEntry* rce2Pass)
> +{
> + rce->coeffBits = rce2Pass->coeffBits;
> + rce->mvBits = rce2Pass->mvBits;
> + rce->miscBits = rce2Pass->miscBits;
> + rce->iCuCount = rce2Pass->iCuCount;
> + rce->pCuCount = rce2Pass->pCuCount;
> + rce->skipCuCount = rce2Pass->skipCuCount;
> + rce->keptAsRef = rce2Pass->keptAsRef;
> + rce->qScale = rce2Pass->qScale;
> + rce->newQScale = rce2Pass->newQScale;
> + rce->expectedBits = rce2Pass->expectedBits;
> + rce->expectedVbv = rce2Pass->expectedVbv;
> + rce->blurredComplexity = rce2Pass->blurredComplexity;
> + rce->sliceType = rce2Pass->sliceType;
> +}
> +
> } // end anonymous namespace
> /* Compute variance to derive AC energy of each block */
> static inline uint32_t acEnergyVar(Frame *pic, uint64_t sum_ssd, int shift, int i)
> @@ -989,6 +1006,12 @@
> m_curSlice = pic->getSlice();
> m_sliceType = m_curSlice->getSliceType();
> rce->sliceType = m_sliceType;
> + if (m_param->rc.bStatRead)
> + {
> + X265_CHECK(rce->encodeOrder >= 0 && rce->encodeOrder < m_numEntries,
> + "Frame encode order is more than total no. of frames in the first pass");
> + copyRceData(rce, &m_rce2Pass[rce->encodeOrder]);
> + }
> rce->isActive = true;
> if (m_sliceType == B_SLICE)
> rce->bframes = m_bframes;
> @@ -1015,6 +1038,7 @@
> }
> rce->rowPred[0] = &rce->rowPreds[m_sliceType][0];
> rce->rowPred[1] = &rce->rowPreds[m_sliceType][1];
> + m_predictedBits = m_totalBits;
> updateVbvPlan(enc);
> rce->bufferFill = m_bufferFill;
> }
> @@ -1052,6 +1076,7 @@
> rce->qpaRc = pic->m_avgQpRc = pic->m_avgQpAq = m_qp;
> }
> m_framesDone++;
> + rce->newQp = m_qp;
> /* set the final QP to slice structure */
> m_curSlice->setSliceQp(m_qp);
> }
> @@ -1193,6 +1218,14 @@
> {
> double q;
>
> + if (m_2pass)
> + {
> + if (m_sliceType != rce->sliceType)
> + {
> + x265_log(m_param, X265_LOG_ERROR, "slice=%c but 2pass stats say %c\n",
> + sliceTypeToChar[m_sliceType], sliceTypeToChar[rce->sliceType]);
> + }
> + }
> if (m_sliceType == B_SLICE)
> {
> /* B-frames don't have independent rate control, but rather get the
> @@ -1237,109 +1270,172 @@
> double qScale = x265_qp2qScale(q);
> rce->frameSizePlanned = predictSize(&m_predBfromP, qScale, (double)m_leadingNoBSatd);
> rce->frameSizeEstimated = rce->frameSizePlanned;
> + rce->newQScale = qScale;
> return qScale;
> }
> else
> {
> double abrBuffer = 2 * m_param->rc.rateTolerance * m_bitrate;
> + if (m_2pass)
white-space
> + {
> + int64_t diff;
> + if (!m_isVbv)
> + {
> + m_predictedBits = m_totalBits;
> + if (rce->encodeOrder < m_param->frameNumThreads)
> + m_predictedBits += (int64_t)(rce->encodeOrder * m_bitrate / m_fps) ;
> + else
> + m_predictedBits += (int64_t)(m_param->frameNumThreads * m_bitrate / m_fps);
> + }
> + /* Adjust ABR buffer based on distance to the end of the video. */
> + if (m_numEntries > rce->encodeOrder)
> + {
> + uint64_t finalBits = m_rce2Pass[m_numEntries - 1].expectedBits;
> + double videoPos = (double)rce->expectedBits / finalBits;
> + double scaleFactor = sqrt((1 - videoPos) * m_numEntries);
> + abrBuffer *= 0.5 * X265_MAX(scaleFactor, 0.5);
> + }
>
> - /* 1pass ABR */
> + diff = m_predictedBits - (int64_t)rce->expectedBits;
> + q = rce->newQScale;
> + q /= Clip3(0.5, 2.0, (double)(abrBuffer - diff) / abrBuffer);
> + if (((rce->encodeOrder + 1 - m_param->frameNumThreads) >= m_fps) &&
> + (m_expectedBitsSum > 0))
> + {
> + /* Adjust quant based on the difference between
> + * achieved and expected bitrate so far */
> + double curTime = (double)rce->encodeOrder / m_numEntries;
> + double w = Clip3(0.0, 1.0, curTime * 100);
> + q *= pow((double)m_totalBits / m_expectedBitsSum, w);
> + }
> + rce->qpNoVbv = x265_qScale2qp(q);
> + if (m_isVbv)
> + {
> + /* Do not overflow vbv */
> + double expectedSize = qScale2bits(rce, q);
> + double expectedVbv = m_bufferFill + m_bufferRate - expectedSize;
> + double expectedFullness = rce->expectedVbv / m_bufferSize;
> + double qmax = q * (2 - expectedFullness);
> + double sizeConstraint = 1 + expectedFullness;
> + qmax = X265_MAX(qmax, rce->newQScale);
> + if (expectedFullness < .05)
> + qmax = MAX_MAX_QPSCALE;
> + qmax = X265_MIN(qmax, MAX_MAX_QPSCALE);
> + while (((expectedVbv < rce->expectedVbv/sizeConstraint) && (q < qmax)) ||
> + ((expectedVbv < 0) && (q < MAX_MAX_QPSCALE)))
> + {
> + q *= 1.05;
> + expectedSize = qScale2bits(rce, q);
> + expectedVbv = m_bufferFill + m_bufferRate - expectedSize;
> + }
>
> - /* Calculate the quantizer which would have produced the desired
> - * average bitrate if it had been applied to all frames so far.
> - * Then modulate that quant based on the current frame's complexity
> - * relative to the average complexity so far (using the 2pass RCEQ).
> - * Then bias the quant up or down if total size so far was far from
> - * the target.
> - * Result: Depending on the value of rate_tolerance, there is a
> - * tradeoff between quality and bitrate precision. But at large
> - * tolerances, the bit distribution approaches that of 2pass. */
> -
> - double wantedBits, overflow = 1;
> - rce->movingAvgSum = m_shortTermCplxSum;
> - m_shortTermCplxSum *= 0.5;
> - m_shortTermCplxCount *= 0.5;
> - m_shortTermCplxSum += m_currentSatd / (CLIP_DURATION(m_frameDuration) / BASE_FRAME_DURATION);
> - m_shortTermCplxCount++;
> - /* coeffBits to be used in 2-pass */
> - rce->coeffBits = (int)m_currentSatd;
> - rce->blurredComplexity = m_shortTermCplxSum / m_shortTermCplxCount;
> - rce->mvBits = 0;
> - rce->sliceType = m_sliceType;
> -
> - if (m_param->rc.rateControlMode == X265_RC_CRF)
> - {
> - q = getQScale(rce, m_rateFactorConstant);
> + }
> + q = Clip3(MIN_QPSCALE, MAX_MAX_QPSCALE, q);
> }
> else
> {
> - if (!m_param->rc.bStatRead)
> - checkAndResetABR(rce, false);
> - q = getQScale(rce, m_wantedBitsWindow / m_cplxrSum);
> + /* 1pass ABR */
>
> - /* ABR code can potentially be counterproductive in CBR, so just
> - * don't bother. Don't run it if the frame complexity is zero
> - * either. */
> - if (!m_isCbr && m_currentSatd)
> + /* Calculate the quantizer which would have produced the desired
> + * average bitrate if it had been applied to all frames so far.
> + * Then modulate that quant based on the current frame's complexity
> + * relative to the average complexity so far (using the 2pass RCEQ).
> + * Then bias the quant up or down if total size so far was far from
> + * the target.
> + * Result: Depending on the value of rate_tolerance, there is a
> + * tradeoff between quality and bitrate precision. But at large
> + * tolerances, the bit distribution approaches that of 2pass. */
> +
> + double wantedBits, overflow = 1;
> + rce->movingAvgSum = m_shortTermCplxSum;
> + m_shortTermCplxSum *= 0.5;
> + m_shortTermCplxCount *= 0.5;
> + m_shortTermCplxSum += m_currentSatd / (CLIP_DURATION(m_frameDuration) / BASE_FRAME_DURATION);
> + m_shortTermCplxCount++;
> + /* coeffBits to be used in 2-pass */
> + rce->coeffBits = (int)m_currentSatd;
> + rce->blurredComplexity = m_shortTermCplxSum / m_shortTermCplxCount;
> + rce->mvBits = 0;
> + rce->sliceType = m_sliceType;
> +
> + if (m_param->rc.rateControlMode == X265_RC_CRF)
> {
> - /* use framesDone instead of POC as poc count is not serial with bframes enabled */
> - double timeDone = (double)(m_framesDone - m_param->frameNumThreads + 1) * m_frameDuration;
> - wantedBits = timeDone * m_bitrate;
> - if (wantedBits > 0 && m_totalBits > 0 && !m_residualFrames)
> + q = getQScale(rce, m_rateFactorConstant);
> + }
> + else
> + {
> + if (!m_param->rc.bStatRead)
> + checkAndResetABR(rce, false);
> + q = getQScale(rce, m_wantedBitsWindow / m_cplxrSum);
> +
> + /* ABR code can potentially be counterproductive in CBR, so just
> + * don't bother. Don't run it if the frame complexity is zero
> + * either. */
> + if (!m_isCbr && m_currentSatd)
> {
> - abrBuffer *= X265_MAX(1, sqrt(timeDone));
> - overflow = Clip3(.5, 2.0, 1.0 + (m_totalBits - wantedBits) / abrBuffer);
> - q *= overflow;
> + /* use framesDone instead of POC as poc count is not serial with bframes enabled */
> + double timeDone = (double)(m_framesDone - m_param->frameNumThreads + 1) * m_frameDuration;
> + wantedBits = timeDone * m_bitrate;
> + if (wantedBits > 0 && m_totalBits > 0 && !m_residualFrames)
> + {
> + abrBuffer *= X265_MAX(1, sqrt(timeDone));
> + overflow = Clip3(.5, 2.0, 1.0 + (m_totalBits - wantedBits) / abrBuffer);
> + q *= overflow;
> + }
> }
> }
> +
> + if (m_sliceType == I_SLICE && m_param->keyframeMax > 1
> + && m_lastNonBPictType != I_SLICE && !m_isAbrReset)
> + {
> + q = x265_qp2qScale(m_accumPQp / m_accumPNorm);
> + q /= fabs(m_param->rc.ipFactor);
> + }
> + else if (m_framesDone > 0)
> + {
> + if (m_param->rc.rateControlMode != X265_RC_CRF)
> + {
> + double lqmin = 0, lqmax = 0;
> + lqmin = m_lastQScaleFor[m_sliceType] / m_lstep;
> + lqmax = m_lastQScaleFor[m_sliceType] * m_lstep;
> + if (!m_residualFrames)
> + {
> + if (overflow > 1.1 && m_framesDone > 3)
> + lqmax *= m_lstep;
> + else if (overflow < 0.9)
> + lqmin /= m_lstep;
> + }
> + q = Clip3(lqmin, lqmax, q);
> + }
> + }
> + else if (m_qCompress != 1 && m_param->rc.rateControlMode == X265_RC_CRF)
> + {
> + q = x265_qp2qScale(CRF_INIT_QP) / fabs(m_param->rc.ipFactor);
> + }
> + else if (m_framesDone == 0 && !m_isVbv)
> + {
> + /* for ABR alone, clip the first I frame qp */
> + double lqmax = x265_qp2qScale(ABR_INIT_QP_MAX) * m_lstep;
> + q = X265_MIN(lqmax, q);
> + }
> + q = Clip3(MIN_QPSCALE, MAX_MAX_QPSCALE, q);
> + rce->qpNoVbv = x265_qScale2qp(q);
> + q = clipQscale(pic, q);
> }
> -
> - if (m_sliceType == I_SLICE && m_param->keyframeMax > 1
> - && m_lastNonBPictType != I_SLICE && !m_isAbrReset)
> - {
> - q = x265_qp2qScale(m_accumPQp / m_accumPNorm);
> - q /= fabs(m_param->rc.ipFactor);
> - }
> - else if (m_framesDone > 0)
> - {
> - if (m_param->rc.rateControlMode != X265_RC_CRF)
> - {
> - double lqmin = 0, lqmax = 0;
> - lqmin = m_lastQScaleFor[m_sliceType] / m_lstep;
> - lqmax = m_lastQScaleFor[m_sliceType] * m_lstep;
> - if (!m_residualFrames)
> - {
> - if (overflow > 1.1 && m_framesDone > 3)
> - lqmax *= m_lstep;
> - else if (overflow < 0.9)
> - lqmin /= m_lstep;
> - }
> - q = Clip3(lqmin, lqmax, q);
> - }
> - }
> - else if (m_qCompress != 1 && m_param->rc.rateControlMode == X265_RC_CRF)
> - {
> - q = x265_qp2qScale(CRF_INIT_QP) / fabs(m_param->rc.ipFactor);
> - }
> - else if (m_framesDone == 0 && !m_isVbv)
> - {
> - /* for ABR alone, clip the first I frame qp */
> - double lqmax = x265_qp2qScale(ABR_INIT_QP_MAX) * m_lstep;
> - q = X265_MIN(lqmax, q);
> - }
> - q = Clip3(MIN_QPSCALE, MAX_MAX_QPSCALE, q);
> - rce->qpNoVbv = x265_qScale2qp(q);
> - q = clipQscale(pic, q);
> m_lastQScaleFor[m_sliceType] = q;
> - if (m_curSlice->getPOC() == 0 || m_lastQScaleFor[P_SLICE] < q)
> + if ((m_curSlice->getPOC() == 0 || m_lastQScaleFor[P_SLICE] < q) && !(m_2pass && !m_isVbv))
> m_lastQScaleFor[P_SLICE] = q * fabs(m_param->rc.ipFactor);
>
> - rce->frameSizePlanned = predictSize(&m_pred[m_sliceType], q, (double)m_currentSatd);
> + if (m_2pass && m_isVbv)
> + rce->frameSizePlanned = qScale2bits(rce, q);
> + else
> + rce->frameSizePlanned = predictSize(&m_pred[m_sliceType], q, (double)m_currentSatd);
> rce->frameSizeEstimated = rce->frameSizePlanned;
> /* Always use up the whole VBV in this case. */
> if (m_singleFrameVbv)
> rce->frameSizePlanned = m_bufferRate;
>
> + rce->newQScale = q;
> return q;
> }
> }
> @@ -1913,6 +2009,8 @@
> m_totalBits += bits;
> }
> }
> + if (m_2pass)
> + m_expectedBitsSum += qScale2bits(rce, x265_qp2qScale(rce->newQp));
>
> if (m_isVbv)
> {
> diff -r 9d3683ab096b -r cf8b11c379f3 source/encoder/ratecontrol.h
> --- a/source/encoder/ratecontrol.h Sat Jul 12 01:18:07 2014 -0500
> +++ b/source/encoder/ratecontrol.h Thu Jul 10 22:56:40 2014 +0530
> @@ -158,6 +158,8 @@
> int m_numEntries;
> RateControlEntry *m_rce2Pass;
> double m_lastAccumPNorm;
> + int64_t m_predictedBits;
> + double m_expectedBitsSum; /* sum of qscale2bits after rceq, ratefactor, and overflow, only includes finished frames */
> struct
> {
> uint16_t *qpBuffer[2]; /* Global buffers for converting MB-tree quantizer data. */
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list