[x265] [PATCH RFC] rc: update ratecontrol stats in every frame, avoid frame parallelism lag in abr
Steve Borho
steve at borho.org
Fri Jul 18 19:28:40 CEST 2014
On 07/18, santhoshini at multicorewareinc.com wrote:
> # HG changeset patch
> # User Santhoshini Sekar <santhoshini at multicorewareinc.com>
> # Date 1405661006 -19800
> # Fri Jul 18 10:53:26 2014 +0530
> # Node ID 150a9b81cf871a3229116f560d8d2f5ad0fc7aac
> # Parent 93ab6ed75b01449b7cbfbec518d6974134291852
> rc: update ratecontrol stats in every frame, avoid frame parallelism lag in abr
>
> RateControl statistics are updated for every frame when refLagRows number of
> rows are completed in processRowEncoder. With this updated data rateControl
> predicts more accurate QP
Queued, but I would like you to make some follow on patches
> diff -r 93ab6ed75b01 -r 150a9b81cf87 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp Thu Jul 17 20:54:19 2014 -0500
> +++ b/source/encoder/analysis.cpp Fri Jul 18 10:53:26 2014 +0530
> @@ -66,7 +66,7 @@
>
> m_rdCost.setPsyRdScale(m_param->psyRd);
> m_bEnableRDOQ = top->m_bEnableRDOQ;
> - m_bFrameParallel = top->m_totalFrameThreads > 1;
> + m_bFrameParallel = m_param->frameNumThreads > 1;
> m_numLayers = top->m_quadtreeTULog2MaxSize - top->m_quadtreeTULog2MinSize + 1;
>
> return initSearch();
> diff -r 93ab6ed75b01 -r 150a9b81cf87 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp Thu Jul 17 20:54:19 2014 -0500
> +++ b/source/encoder/encoder.cpp Fri Jul 18 10:53:26 2014 +0530
> @@ -157,7 +157,7 @@
>
> if (m_frameEncoder)
> {
> - for (int i = 0; i < m_totalFrameThreads; i++)
> + for (int i = 0; i < m_param->frameNumThreads; i++)
> {
> // Ensure frame encoder is idle before destroying it
> m_frameEncoder[i].getEncodedPicture(m_nalList);
> @@ -310,20 +310,6 @@
> else
> m_lookahead->flush();
>
> - if (m_param->rc.rateControlMode == X265_RC_ABR)
> - {
> - // delay frame parallelism for non-VBV ABR
> - if (m_pocLast == 0 && !m_param->rc.vbvBufferSize && !m_param->rc.vbvMaxBitrate)
> - m_param->frameNumThreads = 1;
> - else if (m_param->frameNumThreads != m_totalFrameThreads)
> - {
> - // re-enable frame parallelism after the first few P frames are encoded
> - uint32_t frameCnt = (uint32_t)((0.5 * m_param->fpsNum / m_param->fpsDenom) / (m_param->bframes + 1));
> - if (m_analyzeP.m_numPics > frameCnt)
> - m_param->frameNumThreads = m_totalFrameThreads;
> - }
> - }
> -
> FrameEncoder *curEncoder = &m_frameEncoder[m_curEncoder];
> m_curEncoder = (m_curEncoder + 1) % m_param->frameNumThreads;
> int ret = 0;
> @@ -392,26 +378,11 @@
> if (bChroma)
> m_numChromaWPBiFrames++;
> }
> -
> - uint64_t bytes = 0;
> - for (uint32_t i = 0; i < m_nalList.m_numNal; i++)
> + if (m_aborted == true)
> {
> - int type = m_nalList.m_nal[i].type;
> -
> - // exclude SEI
> - if (type != NAL_UNIT_PREFIX_SEI && type != NAL_UNIT_SUFFIX_SEI)
> - {
> - bytes += m_nalList.m_nal[i].sizeBytes;
> - // and exclude start code prefix
> - bytes -= (!i || type == NAL_UNIT_SPS || type == NAL_UNIT_PPS) ? 4 : 3;
> - }
> - }
> - if (m_rateControl->rateControlEnd(out, bytes << 3, &curEncoder->m_rce, &curEncoder->m_frameStats) < 0)
> - {
> - m_aborted = true;
> return -1;
> }
> - finishFrameStats(out, curEncoder, bytes << 3);
> + finishFrameStats(out, curEncoder, curEncoder->m_accessUnitBits);
> // Allow this frame to be recycled if no frame encoders are using it for reference
> if (!pic_out)
> {
> @@ -466,12 +437,17 @@
> // determine references, setup RPS, etc
> m_dpb->prepareEncode(fenc);
>
> - // set slice QP
> - m_rateControl->rateControlStart(fenc, m_lookahead, &curEncoder->m_rce, this);
At this point here, if ABR or CRF we should call m_lookahead->getEstimatedCost(),
which stores the result in fenc->m_lowres.satdCost (and returns it, but
we'll no longer need the function to return a value).
Then in the frame encoder, we can pass this stored satdCost to
rateControlStart without having to pass in a pointer to the lookahead.
All traces of Lookahead can be removed from ratecontrol.h and
ratecontrol.cpp
> // Allow FrameEncoder::compressFrame() to start in a worker thread
> curEncoder->m_enable.trigger();
> }
> + else if (!fenc && m_encodedFrameNum > 0)
> + {
> + // faked rateControlStart calls to avoid rateControlEnd of last frameNumThreads parallel frames from waiting
> + RateControlEntry rce;
> + rce.encodeOrder = m_encodedFrameNum++;
> + m_rateControl->rateControlStart(NULL, m_lookahead, &rce, this);
> + }
>
> return ret;
> }
> @@ -1268,7 +1244,6 @@
> {
> x265_log(p, X265_LOG_INFO, "Warning: picture-based SAO used with frame parallelism\n");
> }
> - m_totalFrameThreads = m_param->frameNumThreads;
>
> if (p->keyframeMax < 0)
> {
> diff -r 93ab6ed75b01 -r 150a9b81cf87 source/encoder/encoder.h
> --- a/source/encoder/encoder.h Thu Jul 17 20:54:19 2014 -0500
> +++ b/source/encoder/encoder.h Fri Jul 18 10:53:26 2014 +0530
> @@ -73,7 +73,6 @@
> {
> private:
>
> - bool m_aborted; // fatal error detected
> int m_pocLast; ///< time index (POC)
> int m_encodedFrameNum;
> int m_outputCount;
> @@ -84,7 +83,6 @@
> int64_t m_prevReorderedPts[2];
>
> ThreadPool* m_threadPool;
> - Lookahead* m_lookahead;
> FrameEncoder* m_frameEncoder;
> DPB* m_dpb;
>
> @@ -117,6 +115,9 @@
> ProfileTierLevel m_ptl;
> TComScalingList m_scalingList; // quantization matrix information
>
> + Lookahead* m_lookahead;
> +
> + bool m_aborted; // fatal error detected
>
> /* profile & level */
> Profile::Name m_profile;
> @@ -145,8 +146,6 @@
> Window m_conformanceWindow;
> Window m_defaultDisplayWindow;
>
> - int m_totalFrameThreads;
> -
> uint32_t m_numDelayedPic;
>
> Encoder();
> diff -r 93ab6ed75b01 -r 150a9b81cf87 source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp Thu Jul 17 20:54:19 2014 -0500
> +++ b/source/encoder/frameencoder.cpp Fri Jul 18 10:53:26 2014 +0530
> @@ -314,24 +314,6 @@
> m_nalList.serialize(NAL_UNIT_PREFIX_SEI, m_bs);
> }
>
> - int qp = slice->m_sliceQp;
> -
> - int chromaQPOffset = slice->m_pps->chromaCbQpOffset;
> - int qpCb = Clip3(0, MAX_MAX_QP, qp + chromaQPOffset);
> -
> - double lambda = x265_lambda2_tab[qp];
> - /* Assuming qpCb and qpCr are the same, since SAO takes only a single chroma lambda. TODO: Check why */
> - double chromaLambda = x265_lambda2_tab[qpCb];
> -
> - // NOTE: set SAO lambda every Frame
> - m_frameFilter.m_sao.lumaLambda = lambda;
> - m_frameFilter.m_sao.chromaLambda = chromaLambda;
> -
> - // Clip qps back to 0-51 range before encoding
> - qp = Clip3(-QP_BD_OFFSET, MAX_QP, qp);
> - slice->m_sliceQp = qp;
> - m_frame->m_avgQpAq = qp;
> -
> switch (slice->m_sliceType)
> {
> case I_SLICE:
> @@ -479,6 +461,23 @@
> }
> }
>
> + uint64_t bytes = 0;
> + for (uint32_t i = 0; i < m_nalList.m_numNal; i++)
> + {
> + int type = m_nalList.m_nal[i].type;
> +
> + // exclude SEI
> + if (type != NAL_UNIT_PREFIX_SEI && type != NAL_UNIT_SUFFIX_SEI)
> + {
> + bytes += m_nalList.m_nal[i].sizeBytes;
> + // and exclude start code prefix
> + bytes -= (!i || type == NAL_UNIT_SPS || type == NAL_UNIT_PPS) ? 4 : 3;
> + }
> + }
> + m_accessUnitBits = bytes << 3;
> + if (m_top->m_rateControl->rateControlEnd(m_frame, m_accessUnitBits, &m_rce, &m_frameStats) < 0)
> + m_top->m_aborted = true;
> +
> noiseReductionUpdate();
>
> m_elapsedCompressTime = (double)(x265_mdate() - startCompressTime) / 1000000;
> @@ -581,6 +580,27 @@
> PPAScopeEvent(FrameEncoder_compressRows);
> TComSlice* slice = m_frame->getSlice();
>
> + // set slice QP
> + m_top->m_rateControl->rateControlStart(m_frame, m_top->m_lookahead, &m_rce, m_top);
> + int qp = slice->m_sliceQp;
patch #2: return QP and set slice QP here. RCS shouldn't modify the slice
int qp = m_top->m_rateControl->rateControlStart( ... )
slice->m_sliceQp = qp;
> +
> + int chromaQPOffset = slice->m_pps->chromaCbQpOffset;
> + int qpCb = Clip3(0, MAX_MAX_QP, qp + chromaQPOffset);
> +
> + double lambda = x265_lambda2_tab[qp];
> + /* Assuming qpCb and qpCr are the same, since SAO takes only a single chroma lambda. TODO: Check why */
> + double chromaLambda = x265_lambda2_tab[qpCb];
> +
> + // NOTE: set SAO lambda every Frame
> + m_frameFilter.m_sao.lumaLambda = lambda;
> + m_frameFilter.m_sao.chromaLambda = chromaLambda;
> +
> + // Clip qps back to 0-51 range before encoding
> + qp = Clip3(-QP_BD_OFFSET, MAX_QP, qp);
> + slice->m_sliceQp = qp;
oh look, it was being set again...
> + m_frame->m_avgQpAq = qp;
> +
> // reset entropy coders
> m_sbacCoder.resetEntropy(slice);
> for (int i = 0; i < this->m_numRows; i++)
> @@ -908,7 +928,34 @@
> }
> }
>
> + /* when a frame is half way through, update bits and complexity in rate control
> + * for it to be available for the next frame's QScale calculation. This makes it
> + * more accurate with updated value */
> + int rowCount = 0;
> +
> + /* for the first two seconds update when the frame is half done and for rest
> + * of the sequence update when refLagRows are completed */
> + if (m_param->rc.rateControlMode == X265_RC_ABR)
> + {
> + if (m_rce.encodeOrder <= 2 * (m_param->fpsNum / m_param->fpsDenom))
> + rowCount = m_numRows/2;
> + else
> + rowCount = m_refLagRows;
> + }
> +
> // this row of CTUs has been encoded
> + if (row == rowCount)
> + {
> + int64_t bits = 0;
> + for(uint32_t col = 0; col < rowCount * numCols; col++)
> + {
> + TComDataCU* cu = m_frame->getCU(col);
> + bits += cu->m_totalBits;
> + }
> +
> + m_rce.rowTotalBits = bits;
> + m_top->m_rateControl->rateControlUpdateStats(&m_rce);
> + }
>
> // trigger row-wise loop filters
> if (row >= m_filterRowDelay)
> diff -r 93ab6ed75b01 -r 150a9b81cf87 source/encoder/frameencoder.h
> --- a/source/encoder/frameencoder.h Thu Jul 17 20:54:19 2014 -0500
> +++ b/source/encoder/frameencoder.h Fri Jul 18 10:53:26 2014 +0530
> @@ -133,6 +133,8 @@
> FrameStats m_frameStats; // stats of current frame for multipass encodes
> volatile bool m_bAllRowsStop;
> volatile int m_vbvResetTriggerRow;
> + Frame* m_frame;
> + uint64_t m_accessUnitBits;
>
> protected:
>
> @@ -152,7 +154,6 @@
> NALList m_nalList;
> ThreadLocalData m_tld;
>
> - Frame* m_frame;
>
> int m_filterRowDelay;
> int m_filterRowDelayCus;
> diff -r 93ab6ed75b01 -r 150a9b81cf87 source/encoder/ratecontrol.cpp
> --- a/source/encoder/ratecontrol.cpp Thu Jul 17 20:54:19 2014 -0500
> +++ b/source/encoder/ratecontrol.cpp Fri Jul 18 10:53:26 2014 +0530
> @@ -296,10 +296,13 @@
>
> // validate for param->rc, maybe it is need to add a function like x265_parameters_valiate()
> m_residualFrames = 0;
> + m_partialResidualFrames = 0;
> m_residualCost = 0;
> + m_partialResidualCost = 0;
> m_rateFactorMaxIncrement = 0;
> m_rateFactorMaxDecrement = 0;
> m_fps = m_param->fpsNum / m_param->fpsDenom;
> + m_startEndOrder.set(0);
> if (m_param->rc.rateControlMode == X265_RC_CRF)
> {
> m_param->rc.qp = (int)m_param->rc.rfConstant;
> @@ -666,6 +669,7 @@
> m_totalBits = 0;
> m_framesDone = 0;
> m_residualCost = 0;
> + m_partialResidualCost = 0;
>
> /* 720p videos seem to be a good cutoff for cplxrSum */
> double tuneCplxFactor = (m_param->rc.cuTree && m_ncu > 3600) ? 2.5 : 1;
> @@ -979,6 +983,19 @@
>
> void RateControl::rateControlStart(Frame* pic, Lookahead *l, RateControlEntry* rce, Encoder* enc)
> {
> + int orderValue = m_startEndOrder.get();
> + int startOrdinal = rce->encodeOrder * 2;
> +
> + while (orderValue != startOrdinal && pic)
> + orderValue = m_startEndOrder.waitForChange(orderValue);
> +
> + if (!pic)
> + {
> + // faked rateControlStart calls
> + m_startEndOrder.incr();
> + return;
> + }
> +
> m_curSlice = pic->getSlice();
> m_sliceType = m_curSlice->m_sliceType;
> rce->sliceType = m_sliceType;
> @@ -991,6 +1008,8 @@
> rce->bLastMiniGopBFrame = pic->m_lowres.bLastMiniGopBFrame;
> rce->bufferRate = m_bufferRate;
> rce->poc = m_curSlice->m_poc;
> + rce->rowCplxrSum = 0.0;
> + rce->rowTotalBits = 0;
> if (m_isVbv)
> {
> if (rce->rowPreds[0][0].count == 0)
> @@ -1044,6 +1063,8 @@
> m_qp = Clip3(MIN_QP, MAX_MAX_QP, m_qp);
> rce->qpaRc = pic->m_avgQpRc = pic->m_avgQpAq = m_qp;
> }
> + // Do not increment m_startEndOrder here. Make rateControlEnd of previous thread
> + // to wait until rateControlUpdateStats of this frame is called
> m_framesDone++;
> /* set the final QP to slice structure */
> m_curSlice->m_sliceQp = m_qp;
> @@ -1278,7 +1299,7 @@
> /* use framesDone instead of POC as poc count is not serial with bframes enabled */
> double timeDone = (double)(m_framesDone - m_param->frameNumThreads + 1) * m_frameDuration;
> wantedBits = timeDone * m_bitrate;
> - if (wantedBits > 0 && m_totalBits > 0 && !m_residualFrames)
> + if (wantedBits > 0 && m_totalBits > 0 && !m_partialResidualFrames)
> {
> abrBuffer *= X265_MAX(1, sqrt(timeDone));
> overflow = Clip3(.5, 2.0, 1.0 + (m_totalBits - wantedBits) / abrBuffer);
> @@ -1300,7 +1321,7 @@
> double lqmin = 0, lqmax = 0;
> lqmin = m_lastQScaleFor[m_sliceType] / m_lstep;
> lqmax = m_lastQScaleFor[m_sliceType] * m_lstep;
> - if (!m_residualFrames)
> + if (!m_partialResidualFrames)
> {
> if (overflow > 1.1 && m_framesDone > 3)
> lqmax *= m_lstep;
> @@ -1342,16 +1363,17 @@
> if (rce->sliceType == I_SLICE)
> {
> /* previous I still had a residual; roll it into the new loan */
> - if (m_residualFrames)
> - rce->rowTotalBits += m_residualCost * m_residualFrames;
> + if (m_partialResidualFrames)
> + rce->rowTotalBits += m_partialResidualCost * m_partialResidualFrames;
>
> - m_residualFrames = X265_MIN(s_amortizeFrames, m_param->keyframeMax);
> - m_residualCost = (int)((rce->rowTotalBits * s_amortizeFraction) / m_residualFrames);
> - rce->rowTotalBits -= m_residualCost * m_residualFrames;
> + m_partialResidualFrames = X265_MIN(s_amortizeFrames, m_param->keyframeMax);
> + m_partialResidualCost = (int)((rce->rowTotalBits * s_amortizeFraction) /m_partialResidualFrames);
> + rce->rowTotalBits -= m_partialResidualCost * m_partialResidualFrames;
> }
> - else if (m_residualFrames)
> + else if (m_partialResidualFrames)
> {
> - rce->rowTotalBits += m_residualCost;
> + rce->rowTotalBits += m_partialResidualCost;
> + m_partialResidualFrames--;
> }
>
> if (rce->sliceType != B_SLICE)
> @@ -1361,6 +1383,13 @@
>
> m_cplxrSum += rce->rowCplxrSum;
> m_totalBits += rce->rowTotalBits;
> +
> + /* do not allow the next frame to enter rateControlStart() until this
> + * frame has updated its mid-frame statistics */
> + m_startEndOrder.incr();
> +
> + if (rce->encodeOrder < m_param->frameNumThreads - 1)
> + m_startEndOrder.incr(); // faked rateControlEnd calls for negative frames
> }
>
> void RateControl::checkAndResetABR(RateControlEntry* rce, bool isFrameDone)
> @@ -1819,6 +1848,11 @@
> /* After encoding one frame, update rate control state */
> int RateControl::rateControlEnd(Frame* pic, int64_t bits, RateControlEntry* rce, FrameStats* stats)
> {
> + int orderValue = m_startEndOrder.get();
> + int endOrdinal = (rce->encodeOrder + m_param->frameNumThreads) * 2 - 1;
> + while (orderValue != endOrdinal)
> + orderValue = m_startEndOrder.waitForChange(orderValue);
> +
> int64_t actualBits = bits;
> if (m_isAbr)
> {
> @@ -1918,17 +1952,19 @@
> }
> }
> if (rce->sliceType != B_SLICE)
> + {
> /* The factor 1.5 is to tune up the actual bits, otherwise the cplxrSum is scaled too low
> * to improve short term compensation for next frame. */
> - m_cplxrSum += bits * x265_qp2qScale(rce->qpaRc) / rce->qRceq;
> + m_cplxrSum += (bits * x265_qp2qScale(rce->qpaRc) / rce->qRceq) - (rce->rowCplxrSum);
> + }
> else
> {
> /* Depends on the fact that B-frame's QP is an offset from the following P-frame's.
> * Not perfectly accurate with B-refs, but good enough. */
> - m_cplxrSum += bits * x265_qp2qScale(rce->qpaRc) / (rce->qRceq * fabs(m_param->rc.pbFactor));
> + m_cplxrSum += (bits * x265_qp2qScale(rce->qpaRc) / (rce->qRceq * fabs(m_param->rc.pbFactor))) - (rce->rowCplxrSum);
> }
> m_wantedBitsWindow += m_frameDuration * m_bitrate;
> - m_totalBits += bits;
> + m_totalBits += bits - rce->rowTotalBits;
> }
> }
>
> @@ -1972,6 +2008,8 @@
> rce->hrdTiming->dpbOutputTime = (double)rce->picTimingSEI->m_picDpbOutputDelay * time->numUnitsInTick / time->timeScale + rce->hrdTiming->cpbRemovalTime;
> }
> }
> + // Allow rateControlStart of next frame only when rateControlEnd of previous frame is over
> + m_startEndOrder.incr();
> rce->isActive = false;
> return 0;
>
> diff -r 93ab6ed75b01 -r 150a9b81cf87 source/encoder/ratecontrol.h
> --- a/source/encoder/ratecontrol.h Thu Jul 17 20:54:19 2014 -0500
> +++ b/source/encoder/ratecontrol.h Fri Jul 18 10:53:26 2014 +0530
> @@ -147,6 +147,22 @@
> int64_t m_totalBits; /* total bits used for already encoded frames */
> int m_framesDone; /* # of frames passed through RateCotrol already */
> double m_fps;
> +
> + /* a common variable on which rateControlStart, rateControlEnd and rateControUpdateStats waits to
> + * sync the calls to these functions.For example
> + * -F2:
> + * rceStart 10
> + * rceUpdate 10
> + * rceEnd 9
> + * rceStart 11
> + * rceUpdate 11
> + * rceEnd 10
> + * rceStart 12
> + * rceUpdate 12
> + * rceEnd 11 */
> +
> + ThreadSafeInteger m_startEndOrder;
> +
> /* hrd stuff */
> SEIBufferingPeriod m_bufPeriodSEI;
> double m_nominalRemovalTime;
> @@ -185,7 +201,9 @@
> static const char *s_defaultStatFileName;
>
> int m_residualFrames;
> + int m_partialResidualFrames;
> int m_residualCost;
> + int m_partialResidualCost;
>
> double getQScale(RateControlEntry *rce, double rateFactor);
> double rateEstimateQscale(Frame* pic, RateControlEntry *rce); // main logic for calculating QP based on ABR
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list