[x265] [PATCH 3 of 3 RFC] rc: update ratecontrol stats in every frame, avoid frame parallelism lag in abr
santhoshini at multicorewareinc.com
santhoshini at multicorewareinc.com
Fri Jul 11 13:24:46 CEST 2014
# HG changeset patch
# User Santhoshini Sekar <santhoshini at multicorewareinc.com>
# Date 1405077594 -19800
# Fri Jul 11 16:49:54 2014 +0530
# Node ID 070c3f30547aca9af4f8a708b6ae4a108510aad5
# Parent 7acd78cdabfee453ba3b44b034eb2c87e587c7e6
rc: update ratecontrol stats in every frame, avoid frame parallelism lag in abr
diff -r 7acd78cdabfe -r 070c3f30547a source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Fri Jul 11 15:15:32 2014 +0530
+++ b/source/encoder/encoder.cpp Fri Jul 11 16:49:54 2014 +0530
@@ -311,20 +311,6 @@
else
m_lookahead->flush();
- if (m_param->rc.rateControlMode == X265_RC_ABR)
- {
- // delay frame parallelism for non-VBV ABR
- if (m_pocLast == 0 && !m_param->rc.vbvBufferSize && !m_param->rc.vbvMaxBitrate)
- m_param->frameNumThreads = 1;
- else if (m_param->frameNumThreads != m_totalFrameThreads)
- {
- // re-enable frame parallelism after the first few P frames are encoded
- uint32_t frameCnt = (uint32_t)((0.5 * m_param->fpsNum / m_param->fpsDenom) / (m_param->bframes + 1));
- if (m_analyzeP.m_numPics > frameCnt)
- m_param->frameNumThreads = m_totalFrameThreads;
- }
- }
-
FrameEncoder *curEncoder = &m_frameEncoder[m_curEncoder];
m_curEncoder = (m_curEncoder + 1) % m_param->frameNumThreads;
int ret = 0;
@@ -393,26 +379,11 @@
if (bChroma)
m_numChromaWPBiFrames++;
}
-
- uint64_t bytes = 0;
- for (uint32_t i = 0; i < m_nalList.m_numNal; i++)
+ if (m_aborted == true)
{
- int type = m_nalList.m_nal[i].type;
-
- // exclude SEI
- if (type != NAL_UNIT_PREFIX_SEI && type != NAL_UNIT_SUFFIX_SEI)
- {
- bytes += m_nalList.m_nal[i].sizeBytes;
- // and exclude start code prefix
- bytes -= (!i || type == NAL_UNIT_SPS || type == NAL_UNIT_PPS) ? 4 : 3;
- }
- }
- if (m_rateControl->rateControlEnd(out, bytes << 3, &curEncoder->m_rce, &curEncoder->m_frameStats) < 0)
- {
- m_aborted = true;
return -1;
}
- finishFrameStats(out, curEncoder, bytes << 3);
+ finishFrameStats(out, curEncoder, curEncoder->m_accessUnitBits);
// Allow this frame to be recycled if no frame encoders are using it for reference
if (!pic_out)
{
@@ -465,13 +436,16 @@
// determine references, setup RPS, etc
m_dpb->prepareEncode(fenc);
- // set slice QP
- m_rateControl->rateControlStart(fenc, m_lookahead, &curEncoder->m_rce, this);
// Allow FrameEncoder::compressFrame() to start in a worker thread
curEncoder->m_enable.trigger();
}
-
+ else if (!fenc && m_encodedFrameNum > 0)
+ {
+ RateControlEntry rce;
+ rce.encodeOrder = m_encodedFrameNum++;
+ m_rateControl->rateControlStart(NULL, m_lookahead, &rce, this);
+ }
return ret;
}
diff -r 7acd78cdabfe -r 070c3f30547a source/encoder/encoder.h
--- a/source/encoder/encoder.h Fri Jul 11 15:15:32 2014 +0530
+++ b/source/encoder/encoder.h Fri Jul 11 16:49:54 2014 +0530
@@ -71,7 +71,6 @@
{
private:
- bool m_aborted; // fatal error detected
int m_pocLast; ///< time index (POC)
int m_encodedFrameNum;
int m_outputCount;
@@ -82,7 +81,6 @@
int64_t m_prevReorderedPts[2];
ThreadPool* m_threadPool;
- Lookahead* m_lookahead;
FrameEncoder* m_frameEncoder;
DPB* m_dpb;
@@ -91,14 +89,6 @@
int m_curEncoder;
- /* Collect statistics globally */
- EncStats m_analyzeAll;
- EncStats m_analyzeI;
- EncStats m_analyzeP;
- EncStats m_analyzeB;
- FILE* m_csvfpt;
- int64_t m_encodeStartTime;
-
// quality control
TComScalingList m_scalingList; ///< quantization matrix information
@@ -119,6 +109,17 @@
Level::Tier m_levelTier;
Level::Name m_level;
+ /* Collect statistics globally */
+ EncStats m_analyzeAll;
+ EncStats m_analyzeI;
+ EncStats m_analyzeP;
+ EncStats m_analyzeB;
+ FILE* m_csvfpt;
+ int64_t m_encodeStartTime;
+
+ Lookahead* m_lookahead;
+
+ bool m_aborted; // fatal error detected
bool m_nonPackedConstraintFlag;
bool m_frameOnlyConstraintFlag;
diff -r 7acd78cdabfe -r 070c3f30547a source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Fri Jul 11 15:15:32 2014 +0530
+++ b/source/encoder/frameencoder.cpp Fri Jul 11 16:49:54 2014 +0530
@@ -429,26 +429,6 @@
m_nalList.serialize(NAL_UNIT_PREFIX_SEI, m_bs);
}
- int qp = slice->getSliceQp();
-
- int chromaQPOffset = slice->getPPS()->getChromaCbQpOffset() + slice->getSliceQpDeltaCb();
- int qpCb = Clip3(0, MAX_MAX_QP, qp + chromaQPOffset);
-
- double lambda = x265_lambda2_tab[qp];
- /* Assuming qpCb and qpCr are the same, since SAO takes only a single chroma lambda. TODO: Check why */
- double chromaLambda = x265_lambda2_tab[qpCb];
-
- // NOTE: set SAO lambda every Frame
- m_frameFilter.m_sao.lumaLambda = lambda;
- m_frameFilter.m_sao.chromaLambda = chromaLambda;
-
- // Clip qps back to 0-51 range before encoding
- qp = Clip3(-QP_BD_OFFSET, MAX_QP, qp);
- slice->setSliceQp(qp);
- m_frame->m_avgQpAq = qp;
- slice->setSliceQpDelta(0);
- slice->setSliceQpDeltaCb(0);
- slice->setSliceQpDeltaCr(0);
switch (slice->getSliceType())
{
@@ -601,6 +581,24 @@
}
}
+ uint64_t bytes = 0;
+ for (uint32_t i = 0; i < m_nalList.m_numNal; i++)
+ {
+ int type = m_nalList.m_nal[i].type;
+
+ // exclude SEI
+ if (type != NAL_UNIT_PREFIX_SEI && type != NAL_UNIT_SUFFIX_SEI)
+ {
+ bytes += m_nalList.m_nal[i].sizeBytes;
+ // and exclude start code prefix
+ bytes -= (!i || type == NAL_UNIT_SPS || type == NAL_UNIT_PPS) ? 4 : 3;
+ }
+ }
+ m_accessUnitBits = bytes << 3;
+ if (m_top->m_rateControl->rateControlEnd(m_frame, m_accessUnitBits, &m_rce, &m_frameStats) < 0)
+ {
+ m_top->m_aborted = true;
+ }
noiseReductionUpdate();
m_elapsedCompressTime = (double)(x265_mdate() - startCompressTime) / 1000000;
@@ -699,7 +697,28 @@
PPAScopeEvent(FrameEncoder_compressRows);
TComSlice* slice = m_frame->getSlice();
- // reset entropy coders
+ //set slice QP
+ m_top->m_rateControl->rateControlStart(m_frame, m_top->m_lookahead, &m_rce, m_top);
+ int qp = slice->getSliceQp();
+
+ int chromaQPOffset = slice->getPPS()->getChromaCbQpOffset() + slice->getSliceQpDeltaCb();
+ int qpCb = Clip3(0, MAX_MAX_QP, qp + chromaQPOffset);
+ double lambda = x265_lambda2_tab[qp];
+ /* Assuming qpCb and qpCr are the same, since SAO takes only a single chroma lambda. TODO: Check why */
+ double chromaLambda = x265_lambda2_tab[qpCb];
+
+ // NOTE: set SAO lambda every Frame
+ m_frameFilter.m_sao.lumaLambda = lambda;
+ m_frameFilter.m_sao.chromaLambda = chromaLambda;
+
+ // Clip qps back to 0-51 range before encoding
+ qp = Clip3(-QP_BD_OFFSET, MAX_QP, qp);
+ slice->setSliceQp(qp);
+ m_frame->m_avgQpAq = qp;
+ slice->setSliceQpDelta(0);
+ slice->setSliceQpDeltaCb(0);
+ slice->setSliceQpDeltaCr(0);
+ //reset entropy coders
m_sbacCoder.resetEntropy(slice);
for (int i = 0; i < this->m_numRows; i++)
{
@@ -1026,7 +1045,26 @@
}
}
+ int rowCount;
+
+ if (m_top->m_analyzeAll.m_numPics <= 2 * (m_param->fpsNum / m_param->fpsDenom))
+ rowCount = m_numRows/2 ;
+ else
+ rowCount = m_refLagRows;
+
// this row of CTUs has been encoded
+ if (row == rowCount)
+ {
+ int64_t bits = 0;
+ for(uint32_t col = 0; col < rowCount * numCols; col++)
+ {
+ TComDataCU* cu = m_frame->getCU(col);
+ bits += cu->m_totalBits;
+ }
+
+ m_rce.rowTotalBits = bits;
+ m_top->m_rateControl->rateControlUpdateStats(&m_rce);
+ }
// trigger row-wise loop filters
if (row >= m_filterRowDelay)
diff -r 7acd78cdabfe -r 070c3f30547a source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h Fri Jul 11 15:15:32 2014 +0530
+++ b/source/encoder/frameencoder.h Fri Jul 11 16:49:54 2014 +0530
@@ -137,6 +137,8 @@
FrameStats m_frameStats; // stats of current frame for multipass encodes
volatile bool m_bAllRowsStop;
volatile int m_vbvResetTriggerRow;
+ Frame* m_frame;
+ uint64_t m_accessUnitBits;
protected:
@@ -155,7 +157,6 @@
NALList m_nalList;
ThreadLocalData m_tld;
- Frame* m_frame;
int m_filterRowDelay;
int m_filterRowDelayCus;
diff -r 7acd78cdabfe -r 070c3f30547a source/encoder/ratecontrol.cpp
--- a/source/encoder/ratecontrol.cpp Fri Jul 11 15:15:32 2014 +0530
+++ b/source/encoder/ratecontrol.cpp Fri Jul 11 16:49:54 2014 +0530
@@ -263,7 +263,6 @@
int lowresCuWidth = ((m_param->sourceWidth / 2) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
int lowresCuHeight = ((m_param->sourceHeight / 2) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
m_ncu = lowresCuWidth * lowresCuHeight;
-
if (m_param->rc.cuTree)
m_qCompress = 1;
else
@@ -541,74 +540,88 @@
void RateControl::rateControlStart(Frame* pic, Lookahead *l, RateControlEntry* rce, Encoder* enc)
{
- m_curSlice = pic->getSlice();
- m_sliceType = m_curSlice->getSliceType();
- rce->sliceType = m_sliceType;
- rce->isActive = true;
- if (m_sliceType == B_SLICE)
- rce->bframes = m_bframes;
- else
- m_bframes = pic->m_lowres.leadingBframes;
+ int orderValue = m_startEndOrder.get();
+ int startOrdinal = rce->encodeOrder * 2;
- rce->bLastMiniGopBFrame = pic->m_lowres.bLastMiniGopBFrame;
- rce->bufferRate = m_bufferRate;
- rce->poc = m_curSlice->getPOC();
- if (m_isVbv)
- {
- if (rce->rowPreds[0][0].count == 0)
+ while (orderValue != startOrdinal && pic)
+ orderValue = m_startEndOrder.waitForChange(orderValue);
+
+ ScopedLock scope(m_lock);
+ if (pic)
{
- for (int i = 0; i < 3; i++)
+ m_curSlice = pic->getSlice();
+ m_sliceType = m_curSlice->getSliceType();
+ rce->sliceType = m_sliceType;
+
+ rce->isActive = true;
+ if (m_sliceType == B_SLICE)
+ rce->bframes = m_bframes;
+ else
+ m_bframes = pic->m_lowres.leadingBframes;
+ rce->bLastMiniGopBFrame = pic->m_lowres.bLastMiniGopBFrame;
+ rce->bufferRate = m_bufferRate;
+ rce->poc = m_curSlice->getPOC();
+ rce->rowCplxrSum = 0.0;
+ rce->rowTotalBits = 0;
+ if (m_isVbv)
+ {
+ if (rce->rowPreds[0][0].count == 0)
{
- for (int j = 0; j < 2; j++)
+ for (int i = 0; i < 3; i++)
{
- rce->rowPreds[i][j].coeff = 0.25;
- rce->rowPreds[i][j].count = 1.0;
- rce->rowPreds[i][j].decay = 0.5;
- rce->rowPreds[i][j].offset = 0.0;
+ for (int j = 0; j < 2; j++)
+ {
+ rce->rowPreds[i][j].coeff = 0.25;
+ rce->rowPreds[i][j].count = 1.0;
+ rce->rowPreds[i][j].decay = 0.5;
+ rce->rowPreds[i][j].offset = 0.0;
+ }
}
}
+ rce->rowPred[0] = &rce->rowPreds[m_sliceType][0];
+ rce->rowPred[1] = &rce->rowPreds[m_sliceType][1];
+ updateVbvPlan(enc);
+ rce->bufferFill = m_bufferFill;
}
- rce->rowPred[0] = &rce->rowPreds[m_sliceType][0];
- rce->rowPred[1] = &rce->rowPreds[m_sliceType][1];
- updateVbvPlan(enc);
- rce->bufferFill = m_bufferFill;
- }
- if (m_isAbr) //ABR,CRF
- {
- m_currentSatd = l->getEstimatedPictureCost(pic) >> (X265_DEPTH - 8);
- /* Update rce for use in rate control VBV later */
- rce->lastSatd = m_currentSatd;
- double q = x265_qScale2qp(rateEstimateQscale(pic, rce));
- q = Clip3((double)MIN_QP, (double)MAX_MAX_QP, q);
- m_qp = int(q + 0.5);
- rce->qpaRc = pic->m_avgQpRc = pic->m_avgQpAq = q;
- /* copy value of lastRceq into thread local rce struct *to be used in RateControlEnd() */
- rce->qRceq = m_lastRceq;
- accumPQpUpdate();
- }
- else //CQP
- {
- if (m_sliceType == B_SLICE && m_curSlice->isReferenced())
- m_qp = (m_qpConstant[B_SLICE] + m_qpConstant[P_SLICE]) / 2;
- else
- m_qp = m_qpConstant[m_sliceType];
- pic->m_avgQpAq = pic->m_avgQpRc = m_qp;
- }
- if (m_sliceType != B_SLICE)
- {
- m_lastNonBPictType = m_sliceType;
- m_leadingNoBSatd = m_currentSatd;
- }
- rce->leadingNoBSatd = m_leadingNoBSatd;
- if (pic->m_forceqp)
- {
- m_qp = int32_t(pic->m_forceqp + 0.5) - 1;
- m_qp = Clip3(MIN_QP, MAX_MAX_QP, m_qp);
- rce->qpaRc = pic->m_avgQpRc = pic->m_avgQpAq = m_qp;
- }
- m_framesDone++;
- /* set the final QP to slice structure */
- m_curSlice->setSliceQp(m_qp);
+ if (m_isAbr) //ABR,CRF
+ {
+ m_currentSatd = l->getEstimatedPictureCost(pic) >> (X265_DEPTH - 8);
+ /* Update rce for use in rate control VBV later */
+ rce->lastSatd = m_currentSatd;
+ double q = x265_qScale2qp(rateEstimateQscale(pic, rce));
+ q = Clip3((double)MIN_QP, (double)MAX_MAX_QP, q);
+ m_qp = int(q + 0.5);
+ rce->qpaRc = pic->m_avgQpRc = pic->m_avgQpAq = q;
+ /* copy value of lastRceq into thread local rce struct *to be used in RateControlEnd() */
+ rce->qRceq = m_lastRceq;
+ accumPQpUpdate();
+ }
+ else //CQP
+ {
+ if (m_sliceType == B_SLICE && m_curSlice->isReferenced())
+ m_qp = (m_qpConstant[B_SLICE] + m_qpConstant[P_SLICE]) / 2;
+ else
+ m_qp = m_qpConstant[m_sliceType];
+ pic->m_avgQpAq = pic->m_avgQpRc = m_qp;
+ }
+ if (m_sliceType != B_SLICE)
+ {
+ m_lastNonBPictType = m_sliceType;
+ m_leadingNoBSatd = m_currentSatd;
+ }
+ rce->leadingNoBSatd = m_leadingNoBSatd;
+ if (pic->m_forceqp)
+ {
+ m_qp = int32_t(pic->m_forceqp + 0.5) - 1;
+ m_qp = Clip3(MIN_QP, MAX_MAX_QP, m_qp);
+ rce->qpaRc = pic->m_avgQpRc = pic->m_avgQpAq = m_qp;
+ }
+ m_framesDone++;
+ /* set the final QP to slice structure */
+ m_curSlice->setSliceQp(m_qp);
+ }
+ else
+ m_startEndOrder.incr();
}
void RateControl::accumPQpUpdate()
@@ -1268,6 +1281,12 @@
/* After encoding one frame, update rate control state */
int RateControl::rateControlEnd(Frame* pic, int64_t bits, RateControlEntry* rce, FrameStats* stats)
{
+ int orderValue = m_startEndOrder.get();
+ int endOrdinal = (rce->encodeOrder + m_param->frameNumThreads) * 2 - 1;
+ while (orderValue != endOrdinal)
+ orderValue = m_startEndOrder.waitForChange(orderValue);
+ ScopedLock scope(m_lock);
+
int64_t actualBits = bits;
if (m_isAbr)
{
@@ -1366,17 +1385,19 @@
}
if (rce->sliceType != B_SLICE)
+ {
/* The factor 1.5 is to tune up the actual bits, otherwise the cplxrSum is scaled too low
* to improve short term compensation for next frame. */
- m_cplxrSum += bits * x265_qp2qScale(rce->qpaRc) / rce->qRceq;
+ m_cplxrSum += (bits * x265_qp2qScale(rce->qpaRc) / rce->qRceq) - (rce->rowCplxrSum);
+ }
else
{
/* Depends on the fact that B-frame's QP is an offset from the following P-frame's.
* Not perfectly accurate with B-refs, but good enough. */
- m_cplxrSum += bits * x265_qp2qScale(rce->qpaRc) / (rce->qRceq * fabs(m_param->rc.pbFactor));
+ m_cplxrSum += (bits * x265_qp2qScale(rce->qpaRc) / (rce->qRceq * fabs(m_param->rc.pbFactor))) - (rce->rowCplxrSum);
}
m_wantedBitsWindow += m_frameDuration * m_bitrate;
- m_totalBits += bits;
+ m_totalBits += bits - rce->rowTotalBits;
}
}
@@ -1425,6 +1446,7 @@
rce->hrdTiming->dpbOutputTime = (double)rce->picTimingSEI->m_picDpbOutputDelay * time->getNumUnitsInTick() / time->getTimeScale() + rce->hrdTiming->cpbRemovalTime;
}
}
+ m_startEndOrder.incr();
rce->isActive = false;
return 0;
More information about the x265-devel
mailing list