[x265] [PATCH] ratecontrol: improve visual quality and bitrate savings in ABR
Steve Borho
steve at borho.org
Thu May 29 20:22:29 CEST 2014
On Thu, May 29, 2014 at 1:10 PM, <aarthi at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Aarthi Thirumalai
> # Date 1401386381 -19800
> # Thu May 29 23:29:41 2014 +0530
> # Node ID cbefd4760814f85da2f805781177a1fade437709
> # Parent e9776dfd1471ec6691276518007b725095ab6d52
> ratecontrol: improve visual quality and bitrate savings in ABR.
>
> Try to prevent ABR over-compensation after I frames by amortizing the cost over
> the next few frames;
>
> Improve ABR quality with frame parallelism - enable frame parallelism only after first
> few P frames to prevent excessive qp fluctuations.
>
> Fix initial I frame qp. when cu tree is enabled, the qp decided arbitrarily is
> too low. This causes a huge qp spike in immediate P frames.Tuned cplxrSum
> for intial I frame so that a more appropriate qp is chosen.
>
> diff -r e9776dfd1471 -r cbefd4760814 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp Wed May 28 20:39:40 2014 -0500
> +++ b/source/encoder/encoder.cpp Thu May 29 23:29:41 2014 +0530
> @@ -186,6 +186,7 @@
> }
> m_lookahead->init();
> m_encodeStartTime = x265_mdate();
> + m_totalFrameThreads = param->frameNumThreads;
> }
>
> int Encoder::getStreamHeaders(NALUnitEBSP **nalunits)
> @@ -323,6 +324,20 @@
> if (flush)
> m_lookahead->flush();
>
> + if (param->rc.rateControlMode == X265_RC_ABR)
> + {
> + // delay frame parallelism for non-VBV ABR
> + if (m_pocLast == 0 && !param->rc.vbvBufferSize && !param->rc.vbvMaxBitrate)
> + param->frameNumThreads = 1;
> + else if (param->frameNumThreads != m_totalFrameThreads)
> + {
> + // re-enable frame parallelism after the first few P frames are encoded
> + uint32_t frameCnt = (uint32_t)((0.5 * param->fpsNum / param->fpsDenom) / (param->bframes + 1));
> + if (m_analyzeP.m_numPics > frameCnt)
> + param->frameNumThreads = m_totalFrameThreads;
> + }
> + }
There a few places where param->frameNumThreads is used that should be
modified to use m_totalFrameThreads instead. In particular, the
encoder destroy() function and in the frame encoder and other places
which peek at this param to determine if frame parallelism is enabled
in order to determine if the ME search range is restricted. This
could lead to non-determinism or memory leaks.
> +
> FrameEncoder *curEncoder = &m_frameEncoder[m_curEncoder];
> m_curEncoder = (m_curEncoder + 1) % param->frameNumThreads;
> int ret = 0;
> diff -r e9776dfd1471 -r cbefd4760814 source/encoder/encoder.h
> --- a/source/encoder/encoder.h Wed May 28 20:39:40 2014 -0500
> +++ b/source/encoder/encoder.h Thu May 29 23:29:41 2014 +0530
> @@ -90,6 +90,7 @@
> DPB* m_dpb;
> /* frame parallelism */
> int m_curEncoder;
> + int m_totalFrameThreads;
>
> /* Collect statistics globally */
> EncStats m_analyzeAll;
> diff -r e9776dfd1471 -r cbefd4760814 source/encoder/ratecontrol.cpp
> --- a/source/encoder/ratecontrol.cpp Wed May 28 20:39:40 2014 -0500
> +++ b/source/encoder/ratecontrol.cpp Thu May 29 23:29:41 2014 +0530
> @@ -30,6 +30,10 @@
>
> using namespace x265;
>
> +/* Amortize the partial cost of I frames over the next N frames */
> +const double RateControl::amortizeFraction = 0.85;
> +const int RateControl::amortizeFrames = 75;
> +
> /* Compute variance to derive AC energy of each block */
> static inline uint32_t acEnergyVar(TComPic *pic, uint64_t sum_ssd, int shift, int i)
> {
> @@ -204,6 +208,8 @@
> qCompress = param->rc.qCompress;
>
> // validate for param->rc, maybe it is need to add a function like x265_parameters_valiate()
> + residualFrames = 0;
> + residualCost = 0;
> param->rc.rfConstant = Clip3((double)-QP_BD_OFFSET, (double)51, param->rc.rfConstant);
> param->rc.rfConstantMax = Clip3((double)-QP_BD_OFFSET, (double)51, param->rc.rfConstantMax);
> rateFactorMaxIncrement = 0;
> @@ -316,7 +322,7 @@
> {
> /* Adjust the first frame in order to stabilize the quality level compared to the rest */
> #define ABR_INIT_QP_MIN (24 + QP_BD_OFFSET)
> -#define ABR_INIT_QP_MAX (34 + QP_BD_OFFSET)
> +#define ABR_INIT_QP_MAX (40 + QP_BD_OFFSET)
> }
> else if (param->rc.rateControlMode == X265_RC_CRF)
> {
> @@ -353,9 +359,12 @@
> {
> totalBits = 0;
> framesDone = 0;
> -
> + double tuneCplxFactor = 1;
> + /* 720p videos seem to be a good cutoff for cplxrSum */
> + if (param->rc.cuTree && ncu > 3600)
> + tuneCplxFactor = 2.5;
> /* estimated ratio that produces a reasonable QP for the first I-frame */
> - cplxrSum = .01 * pow(7.0e5, qCompress) * pow(ncu, 0.5);
> + cplxrSum = .01 * pow(7.0e5, qCompress) * pow(ncu, 0.5) * tuneCplxFactor;
> wantedBitsWindow = bitrate * frameDuration;
> accumPNorm = .01;
> accumPQp = (param->rc.rateControlMode == X265_RC_CRF ? CRF_INIT_QP : ABR_INIT_QP_MIN) * accumPNorm;
> @@ -550,7 +559,7 @@
> /* use framesDone instead of POC as poc count is not serial with bframes enabled */
> double timeDone = (double)(framesDone - param->frameNumThreads + 1) * frameDuration;
> wantedBits = timeDone * bitrate;
> - if (wantedBits > 0 && totalBits > 0)
> + if (wantedBits > 0 && totalBits > 0 && !residualFrames)
> {
> abrBuffer *= X265_MAX(1, sqrt(timeDone));
> overflow = Clip3(.5, 2.0, 1.0 + (totalBits - wantedBits) / abrBuffer);
> @@ -572,10 +581,13 @@
> double lqmin = 0, lqmax = 0;
> lqmin = lastQScaleFor[sliceType] / lstep;
> lqmax = lastQScaleFor[sliceType] * lstep;
> - if (overflow > 1.1 && framesDone > 3)
> - lqmax *= lstep;
> - else if (overflow < 0.9)
> - lqmin /= lstep;
> + if (!residualFrames)
> + {
> + if (overflow > 1.1 && framesDone > 3)
> + lqmax *= lstep;
> + else if (overflow < 0.9)
> + lqmin /= lstep;
> + }
> q = Clip3(lqmin, lqmax, q);
> }
> }
> @@ -1083,6 +1095,24 @@
> }
> }
>
> + /* amortize part of each I slice over the next several frames, up to
> + * keyint-max, to avoid over-compensating for the large I slice cost */
> + if (rce->sliceType == I_SLICE)
> + {
> + /* previous I still had a residual; roll it into the new loan */
> + if (residualFrames)
> + bits += residualCost * residualFrames;
> +
> + residualFrames = X265_MIN(amortizeFrames, param->keyframeMax);
> + residualCost = (int)((bits * amortizeFraction) / residualFrames);
> + bits -= residualCost * residualFrames;
> + }
> + else if (residualFrames)
> + {
> + bits += residualCost;
> + residualFrames--;
> + }
> +
> if (rce->sliceType != B_SLICE)
> /* The factor 1.5 is to tune up the actual bits, otherwise the cplxrSum is scaled too low
> * to improve short term compensation for next frame. */
> @@ -1111,7 +1141,7 @@
> }
> }
> updateVbv(bits, rce);
> - rce->isActive = false;
> }
> + rce->isActive = false;
> return 0;
> }
> diff -r e9776dfd1471 -r cbefd4760814 source/encoder/ratecontrol.h
> --- a/source/encoder/ratecontrol.h Wed May 28 20:39:40 2014 -0500
> +++ b/source/encoder/ratecontrol.h Thu May 29 23:29:41 2014 +0530
> @@ -132,6 +132,12 @@
>
> protected:
>
> + static const double amortizeFraction;
> + static const int amortizeFrames;
> +
> + int residualFrames;
> + int residualCost;
> +
> void init();
> double getQScale(RateControlEntry *rce, double rateFactor);
> double rateEstimateQscale(TComPic* pic, RateControlEntry *rce); // main logic for calculating QP based on ABR
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list