[x265] [PATCH] Add VBV support for No-WPP
Pradeep Ramachandran
pradeep at multicorewareinc.com
Thu Oct 27 08:56:41 CEST 2016
On Wed, Oct 26, 2016 at 11:08 AM, <aruna at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Aruna Matheswaran
> # Date 1476352364 -19800
> # Thu Oct 13 15:22:44 2016 +0530
> # Node ID f9e7422416c9d2d4f7b7618791a7c28592de4828
> # Parent bc911034c2a07380630aff98fdda38038b2ae62e
> Add VBV support for No-WPP
>
Can you please share what improvement to encoding efficiency you see on
command-lines with VBV with and without wpp?
> diff -r bc911034c2a0 -r f9e7422416c9 source/common/bitstream.h
> --- a/source/common/bitstream.h Tue Oct 25 11:32:10 2016 +0530
> +++ b/source/common/bitstream.h Thu Oct 13 15:22:44 2016 +0530
> @@ -71,6 +71,7 @@
> uint32_t getNumberOfWrittenBytes() const { return m_byteOccupancy; }
> uint32_t getNumberOfWrittenBits() const { return m_byteOccupancy * 8
> + m_partialByteBits; }
> const uint8_t* getFIFO() const { return m_fifo; }
> + void copyBits(Bitstream* stream) { m_partialByteBits =
> stream->m_partialByteBits; m_byteOccupancy = stream->m_byteOccupancy;
> m_partialByte = stream->m_partialByte; }
>
> void write(uint32_t val, uint32_t numBits);
> void writeByte(uint32_t val);
> diff -r bc911034c2a0 -r f9e7422416c9 source/common/framedata.h
> --- a/source/common/framedata.h Tue Oct 25 11:32:10 2016 +0530
> +++ b/source/common/framedata.h Thu Oct 13 15:22:44 2016 +0530
> @@ -126,10 +126,10 @@
> uint32_t encodedBits; /* sum of 'totalBits' of encoded CTUs */
> uint32_t satdForVbv; /* sum of lowres (estimated) costs for
> entire row */
> uint32_t intraSatdForVbv; /* sum of lowres (estimated) intra
> costs for entire row */
> - uint32_t diagSatd;
> - uint32_t diagIntraSatd;
> - double diagQp;
> - double diagQpScale;
> + uint32_t rowSatd;
> + uint32_t rowIntraSatd;
> + double rowQp;
> + double rowQpScale;
> double sumQpRc;
> double sumQpAq;
> };
> diff -r bc911034c2a0 -r f9e7422416c9 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp Tue Oct 25 11:32:10 2016 +0530
> +++ b/source/encoder/encoder.cpp Thu Oct 13 15:22:44 2016 +0530
> @@ -149,12 +149,6 @@
> p->bEnableWavefront = p->bDistributeModeAnalysis =
> p->bDistributeMotionEstimation = p->lookaheadSlices = 0;
> }
>
> - if (!p->bEnableWavefront && p->rc.vbvBufferSize)
> - {
> - x265_log(p, X265_LOG_ERROR, "VBV requires wavefront
> parallelism\n");
> - m_aborted = true;
> - }
> -
> x265_log(p, X265_LOG_INFO, "Slices :
> %d\n", p->maxSlices);
>
> char buf[128];
> diff -r bc911034c2a0 -r f9e7422416c9 source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp Tue Oct 25 11:32:10 2016 +0530
> +++ b/source/encoder/frameencoder.cpp Thu Oct 13 15:22:44 2016 +0530
> @@ -50,6 +50,7 @@
> m_bAllRowsStop = false;
> m_vbvResetTriggerRow = -1;
> m_outStreams = NULL;
> + m_backupStreams = NULL;
> m_substreamSizes = NULL;
> m_nr = NULL;
> m_tld = NULL;
> @@ -85,6 +86,7 @@
>
> delete[] m_rows;
> delete[] m_outStreams;
> + delete[] m_backupStreams;
> X265_FREE(m_sliceBaseRow);
> X265_FREE(m_cuGeoms);
> X265_FREE(m_ctuGeomMap);
> @@ -532,6 +534,8 @@
> if (!m_outStreams)
> {
> m_outStreams = new Bitstream[numSubstreams];
> + if (!m_param->bEnableWavefront)
> + m_backupStreams = new Bitstream[numSubstreams];
> m_substreamSizes = X265_MALLOC(uint32_t, numSubstreams);
> if (!m_param->bEnableSAO)
> for (uint32_t i = 0; i < numSubstreams; i++)
> @@ -1203,17 +1207,25 @@
>
> if (bIsVbv)
> {
> - if (!row)
> + if (col == 0 && !m_param->bEnableWavefront)
> + {
> + m_backupStreams[0].copyBits(&m_outStreams[0]);
> + curRow.bufferedEntropy.copyState(rowCoder);
> + curRow.bufferedEntropy.loadContexts(rowCoder);
> + }
> + if (!row && m_vbvResetTriggerRow != intRow)
> {
> - curEncData.m_rowStat[row].diagQp = curEncData.m_avgQpRc;
> - curEncData.m_rowStat[row].diagQpScale =
> x265_qp2qScale(curEncData.m_avgQpRc);
> + curEncData.m_rowStat[row].rowQp = curEncData.m_avgQpRc;
> + curEncData.m_rowStat[row].rowQpScale =
> x265_qp2qScale(curEncData.m_avgQpRc);
> }
>
> FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr];
> - if (row >= col && row && m_vbvResetTriggerRow != intRow)
> - cuStat.baseQp = curEncData.m_cuStat[cuAddr - numCols +
> 1].baseQp;
> + if (m_param->bEnableWavefront && row >= col && row &&
> m_vbvResetTriggerRow != intRow)
> + cuStat.baseQp = curEncData.m_cuStat[cuAddr - numCols +
> 1].baseQp;
> + else if (!m_param->bEnableWavefront && row &&
> m_vbvResetTriggerRow != intRow)
> + cuStat.baseQp = curEncData.m_rowStat[row - 1].rowQp;
> else
> - cuStat.baseQp = curEncData.m_rowStat[row].diagQp;
> + cuStat.baseQp = curEncData.m_rowStat[row].rowQp;
>
> /* TODO: use defines from slicetype.h for lowres block size */
> uint32_t block_y = (ctu->m_cuPelY >> g_maxLog2CUSize) *
> noOfBlocks;
> @@ -1364,21 +1376,52 @@
> if (bIsVbv)
> {
> // Update encoded bits, satdCost, baseQP for each CU
> - curEncData.m_rowStat[row].diagSatd +=
> curEncData.m_cuStat[cuAddr].vbvCost;
> - curEncData.m_rowStat[row].diagIntraSatd +=
> curEncData.m_cuStat[cuAddr].intraVbvCost;
> + curEncData.m_rowStat[row].rowSatd +=
> curEncData.m_cuStat[cuAddr].vbvCost;
> + curEncData.m_rowStat[row].rowIntraSatd +=
> curEncData.m_cuStat[cuAddr].intraVbvCost;
> curEncData.m_rowStat[row].encodedBits +=
> curEncData.m_cuStat[cuAddr].totalBits;
> curEncData.m_rowStat[row].sumQpRc +=
> curEncData.m_cuStat[cuAddr].baseQp;
> curEncData.m_rowStat[row].numEncodedCUs = cuAddr;
>
> + // If current block is at row end checkpoint, call vbv
> ratecontrol.
> +
> + if (!m_param->bEnableWavefront && col == numCols - 1)
> + {
> + double qpBase = curEncData.m_cuStat[cuAddr].baseQp;
> + int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame,
> row, &m_rce, qpBase);
> + qpBase = x265_clip3((double)m_param->rc.qpMin,
> (double)m_param->rc.qpMax, qpBase);
> + curEncData.m_rowStat[row].rowQp = qpBase;
> + curEncData.m_rowStat[row].rowQpScale =
> x265_qp2qScale(qpBase);
> + if (reEncode < 0)
> + {
> + x265_log(m_param, X265_LOG_DEBUG, "POC %d row %d -
> encode restart required for VBV, to %.2f from %.2f\n",
> + m_frame->m_poc, row, qpBase,
> curEncData.m_cuStat[cuAddr].baseQp);
> +
> + m_vbvResetTriggerRow = row;
> + m_outStreams[0].copyBits(&m_backupStreams[0]);
> +
> + rowCoder.copyState(curRow.bufferedEntropy);
> + rowCoder.loadContexts(curRow.bufferedEntropy);
> +
> + curRow.completed = 0;
> + memset(&curRow.rowStats, 0, sizeof(curRow.rowStats));
> + curEncData.m_rowStat[row].numEncodedCUs = 0;
> + curEncData.m_rowStat[row].encodedBits = 0;
> + curEncData.m_rowStat[row].rowSatd = 0;
> + curEncData.m_rowStat[row].rowIntraSatd = 0;
> + curEncData.m_rowStat[row].sumQpRc = 0;
> + curEncData.m_rowStat[row].sumQpAq = 0;
> + }
> + }
> +
> // If current block is at row diagonal checkpoint, call vbv
> ratecontrol.
>
> - if (row == col && row)
> + else if (m_param->bEnableWavefront && row == col && row)
> {
> double qpBase = curEncData.m_cuStat[cuAddr].baseQp;
> - int reEncode = m_top->m_rateControl->
> rowDiagonalVbvRateControl(m_frame, row, &m_rce, qpBase);
> + int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame,
> row, &m_rce, qpBase);
> qpBase = x265_clip3((double)m_param->rc.qpMin,
> (double)m_param->rc.qpMax, qpBase);
> - curEncData.m_rowStat[row].diagQp = qpBase;
> - curEncData.m_rowStat[row].diagQpScale =
> x265_qp2qScale(qpBase);
> + curEncData.m_rowStat[row].rowQp = qpBase;
> + curEncData.m_rowStat[row].rowQpScale =
> x265_qp2qScale(qpBase);
>
> if (reEncode < 0)
> {
> @@ -1431,8 +1474,8 @@
> memset(&stopRow.rowStats, 0,
> sizeof(stopRow.rowStats));
> curEncData.m_rowStat[r].numEncodedCUs = 0;
> curEncData.m_rowStat[r].encodedBits = 0;
> - curEncData.m_rowStat[r].diagSatd = 0;
> - curEncData.m_rowStat[r].diagIntraSatd = 0;
> + curEncData.m_rowStat[r].rowSatd = 0;
> + curEncData.m_rowStat[r].rowIntraSatd = 0;
> curEncData.m_rowStat[r].sumQpRc = 0;
> curEncData.m_rowStat[r].sumQpAq = 0;
> }
> diff -r bc911034c2a0 -r f9e7422416c9 source/encoder/frameencoder.h
> --- a/source/encoder/frameencoder.h Tue Oct 25 11:32:10 2016 +0530
> +++ b/source/encoder/frameencoder.h Thu Oct 13 15:22:44 2016 +0530
> @@ -184,6 +184,7 @@
> NoiseReduction* m_nr;
> ThreadLocalData* m_tld; /* for --no-wpp */
> Bitstream* m_outStreams;
> + Bitstream* m_backupStreams;
> uint32_t* m_substreamSizes;
>
> CUGeom* m_cuGeoms;
> diff -r bc911034c2a0 -r f9e7422416c9 source/encoder/ratecontrol.cpp
> --- a/source/encoder/ratecontrol.cpp Tue Oct 25 11:32:10 2016 +0530
> +++ b/source/encoder/ratecontrol.cpp Thu Oct 13 15:22:44 2016 +0530
> @@ -2180,7 +2180,7 @@
> for (uint32_t row = 0; row < maxRows; row++)
> {
> encodedBitsSoFar += curEncData.m_rowStat[row].encodedBits;
> - rowSatdCostSoFar = curEncData.m_rowStat[row].diagSatd;
> + rowSatdCostSoFar = curEncData.m_rowStat[row].rowSatd;
> uint32_t satdCostForPendingCus = curEncData.m_rowStat[row].satdForVbv
> - rowSatdCostSoFar;
> satdCostForPendingCus >>= X265_DEPTH - 8;
> if (satdCostForPendingCus > 0)
> @@ -2209,7 +2209,7 @@
> }
>
> refRowSatdCost >>= X265_DEPTH - 8;
> - refQScale = refEncData.m_rowStat[row].diagQpScale;
> + refQScale = refEncData.m_rowStat[row].rowQpScale;
> }
>
> if (picType == I_SLICE || qScale >= refQScale)
> @@ -2231,7 +2231,7 @@
> }
> else if (picType == P_SLICE)
> {
> - intraCostForPendingCus = curEncData.m_rowStat[row].intraSatdForVbv
> - curEncData.m_rowStat[row].diagIntraSatd;
> + intraCostForPendingCus = curEncData.m_rowStat[row].intraSatdForVbv
> - curEncData.m_rowStat[row].rowIntraSatd;
> intraCostForPendingCus >>= X265_DEPTH - 8;
> /* Our QP is lower than the reference! */
> double pred_intra = predictSize(rce->rowPred[1], qScale,
> intraCostForPendingCus);
> @@ -2246,16 +2246,16 @@
> return totalSatdBits + encodedBitsSoFar;
> }
>
> -int RateControl::rowDiagonalVbvRateControl(Frame* curFrame, uint32_t
> row, RateControlEntry* rce, double& qpVbv)
> +int RateControl::rowVbvRateControl(Frame* curFrame, uint32_t row,
> RateControlEntry* rce, double& qpVbv)
> {
> FrameData& curEncData = *curFrame->m_encData;
> double qScaleVbv = x265_qp2qScale(qpVbv);
> - uint64_t rowSatdCost = curEncData.m_rowStat[row].diagSatd;
> + uint64_t rowSatdCost = curEncData.m_rowStat[row].rowSatd;
> double encodedBits = curEncData.m_rowStat[row].encodedBits;
>
> - if (row == 1)
> + if (m_param->bEnableWavefront && row == 1)
> {
> - rowSatdCost += curEncData.m_rowStat[0].diagSatd;
> + rowSatdCost += curEncData.m_rowStat[0].rowSatd;
> encodedBits += curEncData.m_rowStat[0].encodedBits;
> }
> rowSatdCost >>= X265_DEPTH - 8;
> @@ -2263,11 +2263,11 @@
> if (curEncData.m_slice->m_sliceType != I_SLICE)
> {
> Frame* refFrame = curEncData.m_slice->m_refFrameList[0][0];
> - if (qpVbv < refFrame->m_encData->m_rowStat[row].diagQp)
> + if (qpVbv < refFrame->m_encData->m_rowStat[row].rowQp)
> {
> - uint64_t intraRowSatdCost = curEncData.m_rowStat[row].
> diagIntraSatd;
> - if (row == 1)
> - intraRowSatdCost += curEncData.m_rowStat[0].
> diagIntraSatd;
> + uint64_t intraRowSatdCost = curEncData.m_rowStat[row].
> rowIntraSatd;
> + if (m_param->bEnableWavefront && row == 1)
> + intraRowSatdCost += curEncData.m_rowStat[0].rowIntraSatd;
> intraRowSatdCost >>= X265_DEPTH - 8;
> updatePredictor(rce->rowPred[1], qScaleVbv,
> (double)intraRowSatdCost, encodedBits);
> }
> @@ -2328,7 +2328,7 @@
> }
>
> while (qpVbv > qpMin
> - && (qpVbv > curEncData.m_rowStat[0].diagQp ||
> m_singleFrameVbv)
> + && (qpVbv > curEncData.m_rowStat[0].rowQp ||
> m_singleFrameVbv)
> && (((accFrameBits < rce->frameSizePlanned * 0.8f && qpVbv
> <= prevRowQp)
> || accFrameBits < (rce->bufferFill - m_bufferSize +
> m_bufferRate) * 1.1)
> && (!m_param->rc.bStrictCbr ? 1 : abrOvershoot < 0)))
> @@ -2348,7 +2348,7 @@
> accFrameBits = predictRowsSizeSum(curFrame, rce, qpVbv,
> encodedBitsSoFar);
> abrOvershoot = (accFrameBits + m_totalBits -
> m_wantedBitsWindow) / totalBitsNeeded;
> }
> - if (qpVbv > curEncData.m_rowStat[0].diagQp &&
> + if (qpVbv > curEncData.m_rowStat[0].rowQp &&
> abrOvershoot < -0.1 && timeDone > 0.5 && accFrameBits <
> rce->frameSizePlanned - rcTol)
> {
> qpVbv -= stepSize;
> diff -r bc911034c2a0 -r f9e7422416c9 source/encoder/ratecontrol.h
> --- a/source/encoder/ratecontrol.h Tue Oct 25 11:32:10 2016 +0530
> +++ b/source/encoder/ratecontrol.h Thu Oct 13 15:22:44 2016 +0530
> @@ -243,7 +243,7 @@
> int rateControlStart(Frame* curFrame, RateControlEntry* rce,
> Encoder* enc);
> void rateControlUpdateStats(RateControlEntry* rce);
> int rateControlEnd(Frame* curFrame, int64_t bits, RateControlEntry*
> rce);
> - int rowDiagonalVbvRateControl(Frame* curFrame, uint32_t row,
> RateControlEntry* rce, double& qpVbv);
> + int rowVbvRateControl(Frame* curFrame, uint32_t row,
> RateControlEntry* rce, double& qpVbv);
> int rateControlSliceType(int frameNum);
> bool cuTreeReadFor2Pass(Frame* curFrame);
> void hrdFullness(SEIBufferingPeriod* sei);
> diff -r bc911034c2a0 -r f9e7422416c9 source/test/rate-control-tests.txt
> --- a/source/test/rate-control-tests.txt Tue Oct 25 11:32:10 2016
> +0530
> +++ b/source/test/rate-control-tests.txt Thu Oct 13 15:22:44 2016
> +0530
> @@ -21,6 +21,9 @@
> big_buck_bunny_360p24.y4m,--preset medium --bitrate 400 --vbv-bufsize
> 600 --vbv-maxrate 600 --aud --hrd --tune fast-decode
> sita_1920x1080_30.yuv,--preset superfast --crf 25 --vbv-bufsize 3000
> --vbv-maxrate 4000 --vbv-bufsize 5000 --hrd --crf-max 30
> sita_1920x1080_30.yuv,--preset superfast --bitrate 3000 --vbv-bufsize
> 3000 --vbv-maxrate 3000 --aud --strict-cbr
> +BasketballDrive_1920x1080_50.y4m,--preset ultrafast --bitrate 3000
> --vbv-bufsize 3000 --vbv-maxrate 3000 --no-wpp
> +big_buck_bunny_360p24.y4m,--preset medium --bitrate 400 --vbv-bufsize
> 600 --vbv-maxrate 600 --no-wpp --aud --hrd --tune fast-decode
> +sita_1920x1080_30.yuv,--preset superfast --bitrate 3000 --vbv-bufsize
> 3000 --vbv-maxrate 3000 --aud --strict-cbr --no-wpp
>
>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20161027/23a697eb/attachment-0001.html>
More information about the x265-devel
mailing list