[x265] [PATCH] Add VBV support for No-WPP
aruna at multicorewareinc.com
aruna at multicorewareinc.com
Wed Oct 26 07:38:16 CEST 2016
# HG changeset patch
# User Aruna Matheswaran
# Date 1476352364 -19800
# Thu Oct 13 15:22:44 2016 +0530
# Node ID f9e7422416c9d2d4f7b7618791a7c28592de4828
# Parent bc911034c2a07380630aff98fdda38038b2ae62e
Add VBV support for No-WPP
diff -r bc911034c2a0 -r f9e7422416c9 source/common/bitstream.h
--- a/source/common/bitstream.h Tue Oct 25 11:32:10 2016 +0530
+++ b/source/common/bitstream.h Thu Oct 13 15:22:44 2016 +0530
@@ -71,6 +71,7 @@
uint32_t getNumberOfWrittenBytes() const { return m_byteOccupancy; }
uint32_t getNumberOfWrittenBits() const { return m_byteOccupancy * 8 + m_partialByteBits; }
const uint8_t* getFIFO() const { return m_fifo; }
+ void copyBits(Bitstream* stream) { m_partialByteBits = stream->m_partialByteBits; m_byteOccupancy = stream->m_byteOccupancy; m_partialByte = stream->m_partialByte; }
void write(uint32_t val, uint32_t numBits);
void writeByte(uint32_t val);
diff -r bc911034c2a0 -r f9e7422416c9 source/common/framedata.h
--- a/source/common/framedata.h Tue Oct 25 11:32:10 2016 +0530
+++ b/source/common/framedata.h Thu Oct 13 15:22:44 2016 +0530
@@ -126,10 +126,10 @@
uint32_t encodedBits; /* sum of 'totalBits' of encoded CTUs */
uint32_t satdForVbv; /* sum of lowres (estimated) costs for entire row */
uint32_t intraSatdForVbv; /* sum of lowres (estimated) intra costs for entire row */
- uint32_t diagSatd;
- uint32_t diagIntraSatd;
- double diagQp;
- double diagQpScale;
+ uint32_t rowSatd;
+ uint32_t rowIntraSatd;
+ double rowQp;
+ double rowQpScale;
double sumQpRc;
double sumQpAq;
};
diff -r bc911034c2a0 -r f9e7422416c9 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Tue Oct 25 11:32:10 2016 +0530
+++ b/source/encoder/encoder.cpp Thu Oct 13 15:22:44 2016 +0530
@@ -149,12 +149,6 @@
p->bEnableWavefront = p->bDistributeModeAnalysis = p->bDistributeMotionEstimation = p->lookaheadSlices = 0;
}
- if (!p->bEnableWavefront && p->rc.vbvBufferSize)
- {
- x265_log(p, X265_LOG_ERROR, "VBV requires wavefront parallelism\n");
- m_aborted = true;
- }
-
x265_log(p, X265_LOG_INFO, "Slices : %d\n", p->maxSlices);
char buf[128];
diff -r bc911034c2a0 -r f9e7422416c9 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Tue Oct 25 11:32:10 2016 +0530
+++ b/source/encoder/frameencoder.cpp Thu Oct 13 15:22:44 2016 +0530
@@ -50,6 +50,7 @@
m_bAllRowsStop = false;
m_vbvResetTriggerRow = -1;
m_outStreams = NULL;
+ m_backupStreams = NULL;
m_substreamSizes = NULL;
m_nr = NULL;
m_tld = NULL;
@@ -85,6 +86,7 @@
delete[] m_rows;
delete[] m_outStreams;
+ delete[] m_backupStreams;
X265_FREE(m_sliceBaseRow);
X265_FREE(m_cuGeoms);
X265_FREE(m_ctuGeomMap);
@@ -532,6 +534,8 @@
if (!m_outStreams)
{
m_outStreams = new Bitstream[numSubstreams];
+ if (!m_param->bEnableWavefront)
+ m_backupStreams = new Bitstream[numSubstreams];
m_substreamSizes = X265_MALLOC(uint32_t, numSubstreams);
if (!m_param->bEnableSAO)
for (uint32_t i = 0; i < numSubstreams; i++)
@@ -1203,17 +1207,25 @@
if (bIsVbv)
{
- if (!row)
+ if (col == 0 && !m_param->bEnableWavefront)
+ {
+ m_backupStreams[0].copyBits(&m_outStreams[0]);
+ curRow.bufferedEntropy.copyState(rowCoder);
+ curRow.bufferedEntropy.loadContexts(rowCoder);
+ }
+ if (!row && m_vbvResetTriggerRow != intRow)
{
- curEncData.m_rowStat[row].diagQp = curEncData.m_avgQpRc;
- curEncData.m_rowStat[row].diagQpScale = x265_qp2qScale(curEncData.m_avgQpRc);
+ curEncData.m_rowStat[row].rowQp = curEncData.m_avgQpRc;
+ curEncData.m_rowStat[row].rowQpScale = x265_qp2qScale(curEncData.m_avgQpRc);
}
FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr];
- if (row >= col && row && m_vbvResetTriggerRow != intRow)
- cuStat.baseQp = curEncData.m_cuStat[cuAddr - numCols + 1].baseQp;
+ if (m_param->bEnableWavefront && row >= col && row && m_vbvResetTriggerRow != intRow)
+ cuStat.baseQp = curEncData.m_cuStat[cuAddr - numCols + 1].baseQp;
+ else if (!m_param->bEnableWavefront && row && m_vbvResetTriggerRow != intRow)
+ cuStat.baseQp = curEncData.m_rowStat[row - 1].rowQp;
else
- cuStat.baseQp = curEncData.m_rowStat[row].diagQp;
+ cuStat.baseQp = curEncData.m_rowStat[row].rowQp;
/* TODO: use defines from slicetype.h for lowres block size */
uint32_t block_y = (ctu->m_cuPelY >> g_maxLog2CUSize) * noOfBlocks;
@@ -1364,21 +1376,52 @@
if (bIsVbv)
{
// Update encoded bits, satdCost, baseQP for each CU
- curEncData.m_rowStat[row].diagSatd += curEncData.m_cuStat[cuAddr].vbvCost;
- curEncData.m_rowStat[row].diagIntraSatd += curEncData.m_cuStat[cuAddr].intraVbvCost;
+ curEncData.m_rowStat[row].rowSatd += curEncData.m_cuStat[cuAddr].vbvCost;
+ curEncData.m_rowStat[row].rowIntraSatd += curEncData.m_cuStat[cuAddr].intraVbvCost;
curEncData.m_rowStat[row].encodedBits += curEncData.m_cuStat[cuAddr].totalBits;
curEncData.m_rowStat[row].sumQpRc += curEncData.m_cuStat[cuAddr].baseQp;
curEncData.m_rowStat[row].numEncodedCUs = cuAddr;
+ // If current block is at row end checkpoint, call vbv ratecontrol.
+
+ if (!m_param->bEnableWavefront && col == numCols - 1)
+ {
+ double qpBase = curEncData.m_cuStat[cuAddr].baseQp;
+ int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame, row, &m_rce, qpBase);
+ qpBase = x265_clip3((double)m_param->rc.qpMin, (double)m_param->rc.qpMax, qpBase);
+ curEncData.m_rowStat[row].rowQp = qpBase;
+ curEncData.m_rowStat[row].rowQpScale = x265_qp2qScale(qpBase);
+ if (reEncode < 0)
+ {
+ x265_log(m_param, X265_LOG_DEBUG, "POC %d row %d - encode restart required for VBV, to %.2f from %.2f\n",
+ m_frame->m_poc, row, qpBase, curEncData.m_cuStat[cuAddr].baseQp);
+
+ m_vbvResetTriggerRow = row;
+ m_outStreams[0].copyBits(&m_backupStreams[0]);
+
+ rowCoder.copyState(curRow.bufferedEntropy);
+ rowCoder.loadContexts(curRow.bufferedEntropy);
+
+ curRow.completed = 0;
+ memset(&curRow.rowStats, 0, sizeof(curRow.rowStats));
+ curEncData.m_rowStat[row].numEncodedCUs = 0;
+ curEncData.m_rowStat[row].encodedBits = 0;
+ curEncData.m_rowStat[row].rowSatd = 0;
+ curEncData.m_rowStat[row].rowIntraSatd = 0;
+ curEncData.m_rowStat[row].sumQpRc = 0;
+ curEncData.m_rowStat[row].sumQpAq = 0;
+ }
+ }
+
// If current block is at row diagonal checkpoint, call vbv ratecontrol.
- if (row == col && row)
+ else if (m_param->bEnableWavefront && row == col && row)
{
double qpBase = curEncData.m_cuStat[cuAddr].baseQp;
- int reEncode = m_top->m_rateControl->rowDiagonalVbvRateControl(m_frame, row, &m_rce, qpBase);
+ int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame, row, &m_rce, qpBase);
qpBase = x265_clip3((double)m_param->rc.qpMin, (double)m_param->rc.qpMax, qpBase);
- curEncData.m_rowStat[row].diagQp = qpBase;
- curEncData.m_rowStat[row].diagQpScale = x265_qp2qScale(qpBase);
+ curEncData.m_rowStat[row].rowQp = qpBase;
+ curEncData.m_rowStat[row].rowQpScale = x265_qp2qScale(qpBase);
if (reEncode < 0)
{
@@ -1431,8 +1474,8 @@
memset(&stopRow.rowStats, 0, sizeof(stopRow.rowStats));
curEncData.m_rowStat[r].numEncodedCUs = 0;
curEncData.m_rowStat[r].encodedBits = 0;
- curEncData.m_rowStat[r].diagSatd = 0;
- curEncData.m_rowStat[r].diagIntraSatd = 0;
+ curEncData.m_rowStat[r].rowSatd = 0;
+ curEncData.m_rowStat[r].rowIntraSatd = 0;
curEncData.m_rowStat[r].sumQpRc = 0;
curEncData.m_rowStat[r].sumQpAq = 0;
}
diff -r bc911034c2a0 -r f9e7422416c9 source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h Tue Oct 25 11:32:10 2016 +0530
+++ b/source/encoder/frameencoder.h Thu Oct 13 15:22:44 2016 +0530
@@ -184,6 +184,7 @@
NoiseReduction* m_nr;
ThreadLocalData* m_tld; /* for --no-wpp */
Bitstream* m_outStreams;
+ Bitstream* m_backupStreams;
uint32_t* m_substreamSizes;
CUGeom* m_cuGeoms;
diff -r bc911034c2a0 -r f9e7422416c9 source/encoder/ratecontrol.cpp
--- a/source/encoder/ratecontrol.cpp Tue Oct 25 11:32:10 2016 +0530
+++ b/source/encoder/ratecontrol.cpp Thu Oct 13 15:22:44 2016 +0530
@@ -2180,7 +2180,7 @@
for (uint32_t row = 0; row < maxRows; row++)
{
encodedBitsSoFar += curEncData.m_rowStat[row].encodedBits;
- rowSatdCostSoFar = curEncData.m_rowStat[row].diagSatd;
+ rowSatdCostSoFar = curEncData.m_rowStat[row].rowSatd;
uint32_t satdCostForPendingCus = curEncData.m_rowStat[row].satdForVbv - rowSatdCostSoFar;
satdCostForPendingCus >>= X265_DEPTH - 8;
if (satdCostForPendingCus > 0)
@@ -2209,7 +2209,7 @@
}
refRowSatdCost >>= X265_DEPTH - 8;
- refQScale = refEncData.m_rowStat[row].diagQpScale;
+ refQScale = refEncData.m_rowStat[row].rowQpScale;
}
if (picType == I_SLICE || qScale >= refQScale)
@@ -2231,7 +2231,7 @@
}
else if (picType == P_SLICE)
{
- intraCostForPendingCus = curEncData.m_rowStat[row].intraSatdForVbv - curEncData.m_rowStat[row].diagIntraSatd;
+ intraCostForPendingCus = curEncData.m_rowStat[row].intraSatdForVbv - curEncData.m_rowStat[row].rowIntraSatd;
intraCostForPendingCus >>= X265_DEPTH - 8;
/* Our QP is lower than the reference! */
double pred_intra = predictSize(rce->rowPred[1], qScale, intraCostForPendingCus);
@@ -2246,16 +2246,16 @@
return totalSatdBits + encodedBitsSoFar;
}
-int RateControl::rowDiagonalVbvRateControl(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv)
+int RateControl::rowVbvRateControl(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv)
{
FrameData& curEncData = *curFrame->m_encData;
double qScaleVbv = x265_qp2qScale(qpVbv);
- uint64_t rowSatdCost = curEncData.m_rowStat[row].diagSatd;
+ uint64_t rowSatdCost = curEncData.m_rowStat[row].rowSatd;
double encodedBits = curEncData.m_rowStat[row].encodedBits;
- if (row == 1)
+ if (m_param->bEnableWavefront && row == 1)
{
- rowSatdCost += curEncData.m_rowStat[0].diagSatd;
+ rowSatdCost += curEncData.m_rowStat[0].rowSatd;
encodedBits += curEncData.m_rowStat[0].encodedBits;
}
rowSatdCost >>= X265_DEPTH - 8;
@@ -2263,11 +2263,11 @@
if (curEncData.m_slice->m_sliceType != I_SLICE)
{
Frame* refFrame = curEncData.m_slice->m_refFrameList[0][0];
- if (qpVbv < refFrame->m_encData->m_rowStat[row].diagQp)
+ if (qpVbv < refFrame->m_encData->m_rowStat[row].rowQp)
{
- uint64_t intraRowSatdCost = curEncData.m_rowStat[row].diagIntraSatd;
- if (row == 1)
- intraRowSatdCost += curEncData.m_rowStat[0].diagIntraSatd;
+ uint64_t intraRowSatdCost = curEncData.m_rowStat[row].rowIntraSatd;
+ if (m_param->bEnableWavefront && row == 1)
+ intraRowSatdCost += curEncData.m_rowStat[0].rowIntraSatd;
intraRowSatdCost >>= X265_DEPTH - 8;
updatePredictor(rce->rowPred[1], qScaleVbv, (double)intraRowSatdCost, encodedBits);
}
@@ -2328,7 +2328,7 @@
}
while (qpVbv > qpMin
- && (qpVbv > curEncData.m_rowStat[0].diagQp || m_singleFrameVbv)
+ && (qpVbv > curEncData.m_rowStat[0].rowQp || m_singleFrameVbv)
&& (((accFrameBits < rce->frameSizePlanned * 0.8f && qpVbv <= prevRowQp)
|| accFrameBits < (rce->bufferFill - m_bufferSize + m_bufferRate) * 1.1)
&& (!m_param->rc.bStrictCbr ? 1 : abrOvershoot < 0)))
@@ -2348,7 +2348,7 @@
accFrameBits = predictRowsSizeSum(curFrame, rce, qpVbv, encodedBitsSoFar);
abrOvershoot = (accFrameBits + m_totalBits - m_wantedBitsWindow) / totalBitsNeeded;
}
- if (qpVbv > curEncData.m_rowStat[0].diagQp &&
+ if (qpVbv > curEncData.m_rowStat[0].rowQp &&
abrOvershoot < -0.1 && timeDone > 0.5 && accFrameBits < rce->frameSizePlanned - rcTol)
{
qpVbv -= stepSize;
diff -r bc911034c2a0 -r f9e7422416c9 source/encoder/ratecontrol.h
--- a/source/encoder/ratecontrol.h Tue Oct 25 11:32:10 2016 +0530
+++ b/source/encoder/ratecontrol.h Thu Oct 13 15:22:44 2016 +0530
@@ -243,7 +243,7 @@
int rateControlStart(Frame* curFrame, RateControlEntry* rce, Encoder* enc);
void rateControlUpdateStats(RateControlEntry* rce);
int rateControlEnd(Frame* curFrame, int64_t bits, RateControlEntry* rce);
- int rowDiagonalVbvRateControl(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv);
+ int rowVbvRateControl(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv);
int rateControlSliceType(int frameNum);
bool cuTreeReadFor2Pass(Frame* curFrame);
void hrdFullness(SEIBufferingPeriod* sei);
diff -r bc911034c2a0 -r f9e7422416c9 source/test/rate-control-tests.txt
--- a/source/test/rate-control-tests.txt Tue Oct 25 11:32:10 2016 +0530
+++ b/source/test/rate-control-tests.txt Thu Oct 13 15:22:44 2016 +0530
@@ -21,6 +21,9 @@
big_buck_bunny_360p24.y4m,--preset medium --bitrate 400 --vbv-bufsize 600 --vbv-maxrate 600 --aud --hrd --tune fast-decode
sita_1920x1080_30.yuv,--preset superfast --crf 25 --vbv-bufsize 3000 --vbv-maxrate 4000 --vbv-bufsize 5000 --hrd --crf-max 30
sita_1920x1080_30.yuv,--preset superfast --bitrate 3000 --vbv-bufsize 3000 --vbv-maxrate 3000 --aud --strict-cbr
+BasketballDrive_1920x1080_50.y4m,--preset ultrafast --bitrate 3000 --vbv-bufsize 3000 --vbv-maxrate 3000 --no-wpp
+big_buck_bunny_360p24.y4m,--preset medium --bitrate 400 --vbv-bufsize 600 --vbv-maxrate 600 --no-wpp --aud --hrd --tune fast-decode
+sita_1920x1080_30.yuv,--preset superfast --bitrate 3000 --vbv-bufsize 3000 --vbv-maxrate 3000 --aud --strict-cbr --no-wpp
More information about the x265-devel
mailing list