<div dir="ltr"><div class="gmail_extra"><br><div class="gmail_quote">On Wed, Oct 26, 2016 at 11:08 AM, <span dir="ltr"><<a href="mailto:aruna@multicorewareinc.com" target="_blank">aruna@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Aruna Matheswaran<br>
# Date 1476352364 -19800<br>
# Thu Oct 13 15:22:44 2016 +0530<br>
# Node ID f9e7422416c9d2d4f7b7618791a7c2<wbr>8592de4828<br>
# Parent bc911034c2a07380630aff98fdda38<wbr>038b2ae62e<br>
Add VBV support for No-WPP<br></blockquote><div><br></div><div>Can you please share what improvement to encoding efficiency you see on command-lines with VBV with and without wpp?</div><div><br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<br>
diff -r bc911034c2a0 -r f9e7422416c9 source/common/bitstream.h<br>
--- a/source/common/bitstream.h Tue Oct 25 11:32:10 2016 +0530<br>
+++ b/source/common/bitstream.h Thu Oct 13 15:22:44 2016 +0530<br>
@@ -71,6 +71,7 @@<br>
uint32_t getNumberOfWrittenBytes() const { return m_byteOccupancy; }<br>
uint32_t getNumberOfWrittenBits() const { return m_byteOccupancy * 8 + m_partialByteBits; }<br>
const uint8_t* getFIFO() const { return m_fifo; }<br>
+ void copyBits(Bitstream* stream) { m_partialByteBits = stream->m_partialByteBits; m_byteOccupancy = stream->m_byteOccupancy; m_partialByte = stream->m_partialByte; }<br>
<br>
void write(uint32_t val, uint32_t numBits);<br>
void writeByte(uint32_t val);<br>
diff -r bc911034c2a0 -r f9e7422416c9 source/common/framedata.h<br>
--- a/source/common/framedata.h Tue Oct 25 11:32:10 2016 +0530<br>
+++ b/source/common/framedata.h Thu Oct 13 15:22:44 2016 +0530<br>
@@ -126,10 +126,10 @@<br>
uint32_t encodedBits; /* sum of 'totalBits' of encoded CTUs */<br>
uint32_t satdForVbv; /* sum of lowres (estimated) costs for entire row */<br>
uint32_t intraSatdForVbv; /* sum of lowres (estimated) intra costs for entire row */<br>
- uint32_t diagSatd;<br>
- uint32_t diagIntraSatd;<br>
- double diagQp;<br>
- double diagQpScale;<br>
+ uint32_t rowSatd;<br>
+ uint32_t rowIntraSatd;<br>
+ double rowQp;<br>
+ double rowQpScale;<br>
double sumQpRc;<br>
double sumQpAq;<br>
};<br>
diff -r bc911034c2a0 -r f9e7422416c9 source/encoder/encoder.cpp<br>
--- a/source/encoder/encoder.cpp Tue Oct 25 11:32:10 2016 +0530<br>
+++ b/source/encoder/encoder.cpp Thu Oct 13 15:22:44 2016 +0530<br>
@@ -149,12 +149,6 @@<br>
p->bEnableWavefront = p->bDistributeModeAnalysis = p->bDistributeMotionEstimation = p->lookaheadSlices = 0;<br>
}<br>
<br>
- if (!p->bEnableWavefront && p->rc.vbvBufferSize)<br>
- {<br>
- x265_log(p, X265_LOG_ERROR, "VBV requires wavefront parallelism\n");<br>
- m_aborted = true;<br>
- }<br>
-<br>
x265_log(p, X265_LOG_INFO, "Slices : %d\n", p->maxSlices);<br>
<br>
char buf[128];<br>
diff -r bc911034c2a0 -r f9e7422416c9 source/encoder/frameencoder.<wbr>cpp<br>
--- a/source/encoder/frameencoder.<wbr>cpp Tue Oct 25 11:32:10 2016 +0530<br>
+++ b/source/encoder/frameencoder.<wbr>cpp Thu Oct 13 15:22:44 2016 +0530<br>
@@ -50,6 +50,7 @@<br>
m_bAllRowsStop = false;<br>
m_vbvResetTriggerRow = -1;<br>
m_outStreams = NULL;<br>
+ m_backupStreams = NULL;<br>
m_substreamSizes = NULL;<br>
m_nr = NULL;<br>
m_tld = NULL;<br>
@@ -85,6 +86,7 @@<br>
<br>
delete[] m_rows;<br>
delete[] m_outStreams;<br>
+ delete[] m_backupStreams;<br>
X265_FREE(m_sliceBaseRow);<br>
X265_FREE(m_cuGeoms);<br>
X265_FREE(m_ctuGeomMap);<br>
@@ -532,6 +534,8 @@<br>
if (!m_outStreams)<br>
{<br>
m_outStreams = new Bitstream[numSubstreams];<br>
+ if (!m_param->bEnableWavefront)<br>
+ m_backupStreams = new Bitstream[numSubstreams];<br>
m_substreamSizes = X265_MALLOC(uint32_t, numSubstreams);<br>
if (!m_param->bEnableSAO)<br>
for (uint32_t i = 0; i < numSubstreams; i++)<br>
@@ -1203,17 +1207,25 @@<br>
<br>
if (bIsVbv)<br>
{<br>
- if (!row)<br>
+ if (col == 0 && !m_param->bEnableWavefront)<br>
+ {<br>
+ m_backupStreams[0].copyBits(&<wbr>m_outStreams[0]);<br>
+ curRow.bufferedEntropy.<wbr>copyState(rowCoder);<br>
+ curRow.bufferedEntropy.<wbr>loadContexts(rowCoder);<br>
+ }<br>
+ if (!row && m_vbvResetTriggerRow != intRow)<br>
{<br>
- curEncData.m_rowStat[row].<wbr>diagQp = curEncData.m_avgQpRc;<br>
- curEncData.m_rowStat[row].<wbr>diagQpScale = x265_qp2qScale(curEncData.m_<wbr>avgQpRc);<br>
+ curEncData.m_rowStat[row].<wbr>rowQp = curEncData.m_avgQpRc;<br>
+ curEncData.m_rowStat[row].<wbr>rowQpScale = x265_qp2qScale(curEncData.m_<wbr>avgQpRc);<br>
}<br>
<br>
FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr];<br>
- if (row >= col && row && m_vbvResetTriggerRow != intRow)<br>
- cuStat.baseQp = curEncData.m_cuStat[cuAddr - numCols + 1].baseQp;<br>
+ if (m_param->bEnableWavefront && row >= col && row && m_vbvResetTriggerRow != intRow)<br>
+ cuStat.baseQp = curEncData.m_cuStat[cuAddr - numCols + 1].baseQp;<br>
+ else if (!m_param->bEnableWavefront && row && m_vbvResetTriggerRow != intRow)<br>
+ cuStat.baseQp = curEncData.m_rowStat[row - 1].rowQp;<br>
else<br>
- cuStat.baseQp = curEncData.m_rowStat[row].<wbr>diagQp;<br>
+ cuStat.baseQp = curEncData.m_rowStat[row].<wbr>rowQp;<br>
<br>
/* TODO: use defines from slicetype.h for lowres block size */<br>
uint32_t block_y = (ctu->m_cuPelY >> g_maxLog2CUSize) * noOfBlocks;<br>
@@ -1364,21 +1376,52 @@<br>
if (bIsVbv)<br>
{<br>
// Update encoded bits, satdCost, baseQP for each CU<br>
- curEncData.m_rowStat[row].<wbr>diagSatd += curEncData.m_cuStat[cuAddr].<wbr>vbvCost;<br>
- curEncData.m_rowStat[row].<wbr>diagIntraSatd += curEncData.m_cuStat[cuAddr].<wbr>intraVbvCost;<br>
+ curEncData.m_rowStat[row].<wbr>rowSatd += curEncData.m_cuStat[cuAddr].<wbr>vbvCost;<br>
+ curEncData.m_rowStat[row].<wbr>rowIntraSatd += curEncData.m_cuStat[cuAddr].<wbr>intraVbvCost;<br>
curEncData.m_rowStat[row].<wbr>encodedBits += curEncData.m_cuStat[cuAddr].<wbr>totalBits;<br>
curEncData.m_rowStat[row].<wbr>sumQpRc += curEncData.m_cuStat[cuAddr].<wbr>baseQp;<br>
curEncData.m_rowStat[row].<wbr>numEncodedCUs = cuAddr;<br>
<br>
+ // If current block is at row end checkpoint, call vbv ratecontrol.<br>
+<br>
+ if (!m_param->bEnableWavefront && col == numCols - 1)<br>
+ {<br>
+ double qpBase = curEncData.m_cuStat[cuAddr].<wbr>baseQp;<br>
+ int reEncode = m_top->m_rateControl-><wbr>rowVbvRateControl(m_frame, row, &m_rce, qpBase);<br>
+ qpBase = x265_clip3((double)m_param-><wbr>rc.qpMin, (double)m_param->rc.qpMax, qpBase);<br>
+ curEncData.m_rowStat[row].<wbr>rowQp = qpBase;<br>
+ curEncData.m_rowStat[row].<wbr>rowQpScale = x265_qp2qScale(qpBase);<br>
+ if (reEncode < 0)<br>
+ {<br>
+ x265_log(m_param, X265_LOG_DEBUG, "POC %d row %d - encode restart required for VBV, to %.2f from %.2f\n",<br>
+ m_frame->m_poc, row, qpBase, curEncData.m_cuStat[cuAddr].<wbr>baseQp);<br>
+<br>
+ m_vbvResetTriggerRow = row;<br>
+ m_outStreams[0].copyBits(&m_<wbr>backupStreams[0]);<br>
+<br>
+ rowCoder.copyState(curRow.<wbr>bufferedEntropy);<br>
+ rowCoder.loadContexts(curRow.<wbr>bufferedEntropy);<br>
+<br>
+ curRow.completed = 0;<br>
+ memset(&curRow.rowStats, 0, sizeof(curRow.rowStats));<br>
+ curEncData.m_rowStat[row].<wbr>numEncodedCUs = 0;<br>
+ curEncData.m_rowStat[row].<wbr>encodedBits = 0;<br>
+ curEncData.m_rowStat[row].<wbr>rowSatd = 0;<br>
+ curEncData.m_rowStat[row].<wbr>rowIntraSatd = 0;<br>
+ curEncData.m_rowStat[row].<wbr>sumQpRc = 0;<br>
+ curEncData.m_rowStat[row].<wbr>sumQpAq = 0;<br>
+ }<br>
+ }<br>
+<br>
// If current block is at row diagonal checkpoint, call vbv ratecontrol.<br>
<br>
- if (row == col && row)<br>
+ else if (m_param->bEnableWavefront && row == col && row)<br>
{<br>
double qpBase = curEncData.m_cuStat[cuAddr].<wbr>baseQp;<br>
- int reEncode = m_top->m_rateControl-><wbr>rowDiagonalVbvRateControl(m_<wbr>frame, row, &m_rce, qpBase);<br>
+ int reEncode = m_top->m_rateControl-><wbr>rowVbvRateControl(m_frame, row, &m_rce, qpBase);<br>
qpBase = x265_clip3((double)m_param-><wbr>rc.qpMin, (double)m_param->rc.qpMax, qpBase);<br>
- curEncData.m_rowStat[row].<wbr>diagQp = qpBase;<br>
- curEncData.m_rowStat[row].<wbr>diagQpScale = x265_qp2qScale(qpBase);<br>
+ curEncData.m_rowStat[row].<wbr>rowQp = qpBase;<br>
+ curEncData.m_rowStat[row].<wbr>rowQpScale = x265_qp2qScale(qpBase);<br>
<br>
if (reEncode < 0)<br>
{<br>
@@ -1431,8 +1474,8 @@<br>
memset(&stopRow.rowStats, 0, sizeof(stopRow.rowStats));<br>
curEncData.m_rowStat[r].<wbr>numEncodedCUs = 0;<br>
curEncData.m_rowStat[r].<wbr>encodedBits = 0;<br>
- curEncData.m_rowStat[r].<wbr>diagSatd = 0;<br>
- curEncData.m_rowStat[r].<wbr>diagIntraSatd = 0;<br>
+ curEncData.m_rowStat[r].<wbr>rowSatd = 0;<br>
+ curEncData.m_rowStat[r].<wbr>rowIntraSatd = 0;<br>
curEncData.m_rowStat[r].<wbr>sumQpRc = 0;<br>
curEncData.m_rowStat[r].<wbr>sumQpAq = 0;<br>
}<br>
diff -r bc911034c2a0 -r f9e7422416c9 source/encoder/frameencoder.h<br>
--- a/source/encoder/frameencoder.<wbr>h Tue Oct 25 11:32:10 2016 +0530<br>
+++ b/source/encoder/frameencoder.<wbr>h Thu Oct 13 15:22:44 2016 +0530<br>
@@ -184,6 +184,7 @@<br>
NoiseReduction* m_nr;<br>
ThreadLocalData* m_tld; /* for --no-wpp */<br>
Bitstream* m_outStreams;<br>
+ Bitstream* m_backupStreams;<br>
uint32_t* m_substreamSizes;<br>
<br>
CUGeom* m_cuGeoms;<br>
diff -r bc911034c2a0 -r f9e7422416c9 source/encoder/ratecontrol.cpp<br>
--- a/source/encoder/ratecontrol.<wbr>cpp Tue Oct 25 11:32:10 2016 +0530<br>
+++ b/source/encoder/ratecontrol.<wbr>cpp Thu Oct 13 15:22:44 2016 +0530<br>
@@ -2180,7 +2180,7 @@<br>
for (uint32_t row = 0; row < maxRows; row++)<br>
{<br>
encodedBitsSoFar += curEncData.m_rowStat[row].<wbr>encodedBits;<br>
- rowSatdCostSoFar = curEncData.m_rowStat[row].<wbr>diagSatd;<br>
+ rowSatdCostSoFar = curEncData.m_rowStat[row].<wbr>rowSatd;<br>
uint32_t satdCostForPendingCus = curEncData.m_rowStat[row].<wbr>satdForVbv - rowSatdCostSoFar;<br>
satdCostForPendingCus >>= X265_DEPTH - 8;<br>
if (satdCostForPendingCus > 0)<br>
@@ -2209,7 +2209,7 @@<br>
}<br>
<br>
refRowSatdCost >>= X265_DEPTH - 8;<br>
- refQScale = refEncData.m_rowStat[row].<wbr>diagQpScale;<br>
+ refQScale = refEncData.m_rowStat[row].<wbr>rowQpScale;<br>
}<br>
<br>
if (picType == I_SLICE || qScale >= refQScale)<br>
@@ -2231,7 +2231,7 @@<br>
}<br>
else if (picType == P_SLICE)<br>
{<br>
- intraCostForPendingCus = curEncData.m_rowStat[row].<wbr>intraSatdForVbv - curEncData.m_rowStat[row].<wbr>diagIntraSatd;<br>
+ intraCostForPendingCus = curEncData.m_rowStat[row].<wbr>intraSatdForVbv - curEncData.m_rowStat[row].<wbr>rowIntraSatd;<br>
intraCostForPendingCus >>= X265_DEPTH - 8;<br>
/* Our QP is lower than the reference! */<br>
double pred_intra = predictSize(rce->rowPred[1], qScale, intraCostForPendingCus);<br>
@@ -2246,16 +2246,16 @@<br>
return totalSatdBits + encodedBitsSoFar;<br>
}<br>
<br>
-int RateControl::<wbr>rowDiagonalVbvRateControl(<wbr>Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv)<br>
+int RateControl::<wbr>rowVbvRateControl(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv)<br>
{<br>
FrameData& curEncData = *curFrame->m_encData;<br>
double qScaleVbv = x265_qp2qScale(qpVbv);<br>
- uint64_t rowSatdCost = curEncData.m_rowStat[row].<wbr>diagSatd;<br>
+ uint64_t rowSatdCost = curEncData.m_rowStat[row].<wbr>rowSatd;<br>
double encodedBits = curEncData.m_rowStat[row].<wbr>encodedBits;<br>
<br>
- if (row == 1)<br>
+ if (m_param->bEnableWavefront && row == 1)<br>
{<br>
- rowSatdCost += curEncData.m_rowStat[0].<wbr>diagSatd;<br>
+ rowSatdCost += curEncData.m_rowStat[0].<wbr>rowSatd;<br>
encodedBits += curEncData.m_rowStat[0].<wbr>encodedBits;<br>
}<br>
rowSatdCost >>= X265_DEPTH - 8;<br>
@@ -2263,11 +2263,11 @@<br>
if (curEncData.m_slice->m_<wbr>sliceType != I_SLICE)<br>
{<br>
Frame* refFrame = curEncData.m_slice->m_<wbr>refFrameList[0][0];<br>
- if (qpVbv < refFrame->m_encData->m_<wbr>rowStat[row].diagQp)<br>
+ if (qpVbv < refFrame->m_encData->m_<wbr>rowStat[row].rowQp)<br>
{<br>
- uint64_t intraRowSatdCost = curEncData.m_rowStat[row].<wbr>diagIntraSatd;<br>
- if (row == 1)<br>
- intraRowSatdCost += curEncData.m_rowStat[0].<wbr>diagIntraSatd;<br>
+ uint64_t intraRowSatdCost = curEncData.m_rowStat[row].<wbr>rowIntraSatd;<br>
+ if (m_param->bEnableWavefront && row == 1)<br>
+ intraRowSatdCost += curEncData.m_rowStat[0].<wbr>rowIntraSatd;<br>
intraRowSatdCost >>= X265_DEPTH - 8;<br>
updatePredictor(rce->rowPred[<wbr>1], qScaleVbv, (double)intraRowSatdCost, encodedBits);<br>
}<br>
@@ -2328,7 +2328,7 @@<br>
}<br>
<br>
while (qpVbv > qpMin<br>
- && (qpVbv > curEncData.m_rowStat[0].diagQp || m_singleFrameVbv)<br>
+ && (qpVbv > curEncData.m_rowStat[0].rowQp || m_singleFrameVbv)<br>
&& (((accFrameBits < rce->frameSizePlanned * 0.8f && qpVbv <= prevRowQp)<br>
|| accFrameBits < (rce->bufferFill - m_bufferSize + m_bufferRate) * 1.1)<br>
&& (!m_param->rc.bStrictCbr ? 1 : abrOvershoot < 0)))<br>
@@ -2348,7 +2348,7 @@<br>
accFrameBits = predictRowsSizeSum(curFrame, rce, qpVbv, encodedBitsSoFar);<br>
abrOvershoot = (accFrameBits + m_totalBits - m_wantedBitsWindow) / totalBitsNeeded;<br>
}<br>
- if (qpVbv > curEncData.m_rowStat[0].diagQp &&<br>
+ if (qpVbv > curEncData.m_rowStat[0].rowQp &&<br>
abrOvershoot < -0.1 && timeDone > 0.5 && accFrameBits < rce->frameSizePlanned - rcTol)<br>
{<br>
qpVbv -= stepSize;<br>
diff -r bc911034c2a0 -r f9e7422416c9 source/encoder/ratecontrol.h<br>
--- a/source/encoder/ratecontrol.h Tue Oct 25 11:32:10 2016 +0530<br>
+++ b/source/encoder/ratecontrol.h Thu Oct 13 15:22:44 2016 +0530<br>
@@ -243,7 +243,7 @@<br>
int rateControlStart(Frame* curFrame, RateControlEntry* rce, Encoder* enc);<br>
void rateControlUpdateStats(<wbr>RateControlEntry* rce);<br>
int rateControlEnd(Frame* curFrame, int64_t bits, RateControlEntry* rce);<br>
- int rowDiagonalVbvRateControl(<wbr>Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv);<br>
+ int rowVbvRateControl(Frame* curFrame, uint32_t row, RateControlEntry* rce, double& qpVbv);<br>
int rateControlSliceType(int frameNum);<br>
bool cuTreeReadFor2Pass(Frame* curFrame);<br>
void hrdFullness(<wbr>SEIBufferingPeriod* sei);<br>
diff -r bc911034c2a0 -r f9e7422416c9 source/test/rate-control-<wbr>tests.txt<br>
--- a/source/test/rate-control-<wbr>tests.txt Tue Oct 25 11:32:10 2016 +0530<br>
+++ b/source/test/rate-control-<wbr>tests.txt Thu Oct 13 15:22:44 2016 +0530<br>
@@ -21,6 +21,9 @@<br>
big_buck_bunny_360p24.y4m,--<wbr>preset medium --bitrate 400 --vbv-bufsize 600 --vbv-maxrate 600 --aud --hrd --tune fast-decode<br>
sita_1920x1080_30.yuv,--preset superfast --crf 25 --vbv-bufsize 3000 --vbv-maxrate 4000 --vbv-bufsize 5000 --hrd --crf-max 30<br>
sita_1920x1080_30.yuv,--preset superfast --bitrate 3000 --vbv-bufsize 3000 --vbv-maxrate 3000 --aud --strict-cbr<br>
+BasketballDrive_1920x1080_50.<wbr>y4m,--preset ultrafast --bitrate 3000 --vbv-bufsize 3000 --vbv-maxrate 3000 --no-wpp<br>
+big_buck_bunny_360p24.y4m,--<wbr>preset medium --bitrate 400 --vbv-bufsize 600 --vbv-maxrate 600 --no-wpp --aud --hrd --tune fast-decode<br>
+sita_1920x1080_30.yuv,--<wbr>preset superfast --bitrate 3000 --vbv-bufsize 3000 --vbv-maxrate 3000 --aud --strict-cbr --no-wpp<br>
<br>
<br>
<br>
______________________________<wbr>_________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/<wbr>listinfo/x265-devel</a><br>
</blockquote></div><br></div></div>