[x265] [PATCH 5 of 5] rc: init RC data for second pass in a multi pass encode
aarthi at multicorewareinc.com
aarthi at multicorewareinc.com
Fri Jul 11 13:13:00 CEST 2014
# HG changeset patch
# User Aarthi Thirumalai
# Date 1405076729 -19800
# Fri Jul 11 16:35:29 2014 +0530
# Node ID 33d55e581aacbfcb91958a005a239760bd8b9163
# Parent fc4c54e9f2115d49348e104d962c9073f323987e
rc: init RC data for second pass in a multi pass encode
diff -r fc4c54e9f211 -r 33d55e581aac source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Fri Jul 11 16:32:41 2014 +0530
+++ b/source/encoder/encoder.cpp Fri Jul 11 16:35:29 2014 +0530
@@ -1327,8 +1327,7 @@
p->rc.aqMode = X265_AQ_VARIANCE;
p->rc.aqStrength = 0.0;
}
-
- if (p->lookaheadDepth == 0 && p->rc.cuTree)
+ if (p->lookaheadDepth == 0 && p->rc.cuTree && !p->rc.bStatRead)
{
x265_log(p, X265_LOG_WARNING, "cuTree disabled, requires lookahead to be enabled\n");
p->rc.cuTree = 0;
diff -r fc4c54e9f211 -r 33d55e581aac source/encoder/ratecontrol.cpp
--- a/source/encoder/ratecontrol.cpp Fri Jul 11 16:32:41 2014 +0530
+++ b/source/encoder/ratecontrol.cpp Fri Jul 11 16:35:29 2014 +0530
@@ -42,8 +42,24 @@
const double RateControl::s_amortizeFraction = 0.85;
const int RateControl::s_amortizeFrames = 75;
const char *RateControl::s_defaultStatFileName = "x265_2pass.log";
-
namespace {
+#define CMP_OPT_FIRST_PASS(opt, param_val)\
+{\
+ bErr = 0;\
+ p = strstr(opts, opt "=");\
+ char* q = strstr(opts, "no-"opt);\
+ if (p && sscanf(p, opt "=%d" , &i) && param_val != i)\
+ bErr = 1;\
+ else if (!param_val && !q)\
+ bErr = 1;\
+ else if (param_val && (q || !strstr(opts, opt)))\
+ bErr = 1;\
+ if (bErr)\
+ {\
+ x265_log(m_param, X265_LOG_ERROR, "different " opt " setting than first pass (%d vs %d)\n", param_val, i);\
+ return false;\
+ }\
+}
inline int calcScale(uint32_t x)
{
@@ -96,6 +112,14 @@
strcat(output, suffix);
return output;
}
+inline double qScale2bits(RateControlEntry *rce, double qScale)
+{
+ if (qScale < 0.1)
+ qScale = 0.1;
+ return (rce->coeffBits + .1) * pow(rce->qScale / qScale, 1.1)
+ + rce->mvBits * pow(X265_MAX(rce->qScale, 1) / X265_MAX(qScale, 1), 0.5)
+ + rce->miscBits;
+}
} // end anonymous namespace
/* Compute variance to derive AC energy of each block */
@@ -274,7 +298,7 @@
m_residualCost = 0;
m_rateFactorMaxIncrement = 0;
m_rateFactorMaxDecrement = 0;
-
+ m_fps = m_param->fpsNum / m_param->fpsDenom;
if (m_param->rc.rateControlMode == X265_RC_CRF)
{
m_param->rc.qp = (int)m_param->rc.rfConstant;
@@ -397,7 +421,174 @@
const char *fileName = m_param->rc.statFileName;
if (!fileName)
fileName = s_defaultStatFileName;
+ /* Load stat file and init 2pass algo */
+ if (m_param->rc.bStatRead)
+ {
+ char *p, *statsIn, *statsBuf;
+ /* read 1st pass stats */
+ statsIn = statsBuf = x265_slurp_file(fileName);
+ if (!statsBuf)
+ return false;
+ if (m_param->rc.cuTree)
+ {
+ char *tmpFile = strcatFilename(fileName, ".cutree");
+ if (!tmpFile)
+ return false;
+ m_cutreeStatFileIn = fopen(tmpFile, "rb");
+ x265_free(tmpFile);
+ if (!m_cutreeStatFileIn)
+ {
+ x265_log(m_param, X265_LOG_ERROR, "can't open stats file %s\n", tmpFile);
+ return false;
+ }
+ }
+ /* check whether 1st pass options were compatible with current options */
+ if (strncmp(statsBuf, "#options:", 9))
+ {
+ x265_log(m_param, X265_LOG_ERROR,"options list in stats file not valid\n");
+ return false;
+ }
+ {
+ int i, j;
+ uint32_t k , l;
+ bool bErr = false;
+ char *opts = statsBuf;
+ statsIn = strchr(statsBuf, '\n');
+ if (!statsIn)
+ return false;
+ *statsIn = '\0';
+ statsIn++;
+ if (sscanf(opts, "#options: %dx%d", &i, &j) != 2)
+ {
+ x265_log(m_param, X265_LOG_ERROR, "Resolution specified in stats file not valid\n");
+ return false;
+ }
+ if ((p = strstr(opts, " fps=")) == 0 || sscanf(p, " fps=%u/%u", &k, &l) != 2)
+ {
+ x265_log(m_param, X265_LOG_ERROR, "fps specified in stats file not valid\n");
+ return false;
+ }
+ if (k != m_param->fpsNum || l != m_param->fpsDenom)
+ {
+ x265_log(m_param, X265_LOG_ERROR, "fps mismatch with 1st pass (%u/%u vs %u/%u)\n",
+ m_param->fpsNum, m_param->fpsDenom, k, l);
+ return false;
+ }
+ CMP_OPT_FIRST_PASS("bitdepth", m_param->internalBitDepth);
+ CMP_OPT_FIRST_PASS("weightp", m_param->bEnableWeightedPred);
+ CMP_OPT_FIRST_PASS("bframes", m_param->bframes);
+ CMP_OPT_FIRST_PASS("b-pyramid", m_param->bBPyramid);
+ CMP_OPT_FIRST_PASS("open-gop", m_param->bOpenGOP);
+ CMP_OPT_FIRST_PASS("keyint", m_param->keyframeMax);
+ CMP_OPT_FIRST_PASS("wpp", m_param->bEnableWavefront);
+
+ if ((p = strstr(opts, "b-adapt=")) != 0 && sscanf(p, "b-adapt=%d", &i) && i >= X265_B_ADAPT_NONE && i <= X265_B_ADAPT_TRELLIS)
+ {
+ m_param->bFrameAdaptive = i;
+ }
+ else if (m_param->bframes)
+ {
+ x265_log(m_param, X265_LOG_ERROR, "b-adapt method specified in stats file not valid\n");
+ return false;
+ }
+
+ if ((m_param->rc.cuTree || m_param->rc.vbvBufferSize) && ((p = strstr(opts, "rc-lookahead=")) != 0) && sscanf(p, "rc-lookahead=%d", &i))
+ m_param->lookaheadDepth = i;
+ }
+ /* find number of pics */
+ p = statsIn;
+ int numEntries;
+ for (numEntries = -1; p; numEntries++)
+ p = strchr(p + 1, ';');
+ if (!numEntries)
+ {
+ x265_log(m_param, X265_LOG_ERROR, "empty stats file\n");
+ return false;
+ }
+ m_numEntries = numEntries;
+
+ if (m_param->totalFrames < m_numEntries && m_param->totalFrames > 0)
+ {
+ x265_log(m_param, X265_LOG_WARNING, "2nd pass has fewer frames than 1st pass (%d vs %d)\n",
+ m_param->totalFrames, m_numEntries);
+ }
+ if (m_param->totalFrames > m_numEntries)
+ {
+ x265_log(m_param, X265_LOG_ERROR, "2nd pass has more frames than 1st pass (%d vs %d)\n",
+ m_param->totalFrames, m_numEntries);
+ return false;
+ }
+
+ m_rce2Pass = X265_MALLOC(RateControlEntry, m_numEntries);
+ if (!m_rce2Pass)
+ {
+ x265_log(m_param, X265_LOG_ERROR, "Rce Entries for 2 pass cannot be allocated\n");
+ return false;
+ }
+ /* init all to skipped p frames */
+ for (int i = 0; i < m_numEntries; i++)
+ {
+ RateControlEntry *rce = &m_rce2Pass[i];
+ rce->sliceType = P_SLICE;
+ rce->qScale = rce->newQScale = x265_qp2qScale(20);
+ rce->miscBits = m_ncu + 10;
+ rce->newQp = 0;
+ }
+ /* read stats */
+ p = statsIn;
+ double totalQpAq = 0;
+ for (int i = 0; i < m_numEntries; i++)
+ {
+ RateControlEntry *rce;
+ int frameNumber;
+ char picType;
+ int e;
+ char *next;
+ double qpRc, qpAq;
+ next = strstr(p, ";");
+ if (next)
+ *next++ = 0;
+ e = sscanf(p, " in:%d ", &frameNumber);
+
+ if (frameNumber < 0 || frameNumber >= m_numEntries)
+ {
+ x265_log(m_param, X265_LOG_ERROR, "bad frame number (%d) at stats line %d\n", frameNumber, i);
+ return false;
+ }
+ rce = &m_rce2Pass[frameNumber];
+ e += sscanf(p, " in:%*d out:%*d type:%c dur:%lf q:%lf q-aq:%lf tex:%d mv:%d misc:%d icu:%lf pcu:%lf scu:%lf",
+ &picType, &rce->frameDuration, &qpRc, &qpAq, &rce->coeffBits,
+ &rce->mvBits, &rce->miscBits, &rce->iCuCount, &rce->pCuCount,
+ &rce->skipCuCount);
+
+ if (picType != 'b' || picType != 'p')
+ rce->keptAsRef = true;
+ if (picType == 'I' || picType == 'i')
+ rce->sliceType = I_SLICE;
+ else if (picType == 'P' || picType == 'p')
+ rce->sliceType = P_SLICE;
+ else if (picType == 'B' || picType == 'b')
+ rce->sliceType = B_SLICE;
+ else
+ e = -1;
+ if (e < 11)
+ {
+ x265_log(m_param, X265_LOG_ERROR, "statistics are damaged at line %d, parser out=%d\n", i, e);
+ return false;
+ }
+ rce->qScale = x265_qp2qScale(qpRc);
+ totalQpAq += qpAq;
+ p = next;
+ }
+ X265_FREE(statsBuf);
+
+ if (m_param->rc.rateControlMode == X265_RC_ABR)
+ {
+ if (!initPass2())
+ return false;
+ } /* else we're using constant quant, so no need to run the bitrate allocation */
+ }
/* Open output file */
/* If input and output files are the same, output to a temp file
* and move it to the real name only when it's complete */
@@ -411,12 +602,12 @@
x265_free(statFileTmpname);
if (!m_statFileOut)
{
- x265_log(m_param, X265_LOG_ERROR, "RateControl Init: can't open stats file\n");
+ x265_log(m_param, X265_LOG_ERROR, " can't open stats file %s \n, statFileTmpname");
return false;
}
p = x265_param2string(m_param);
if (p)
- fprintf(m_statFileOut, "#options: %s\n", p);
+ fprintf(m_statFileOut, "#options: %s \n", p);
X265_FREE(p);
if (m_param->rc.cuTree && !m_param->rc.bStatRead)
{
@@ -427,7 +618,7 @@
x265_free(statFileTmpname);
if (!m_cutreeStatFileOut)
{
- x265_log(m_param, X265_LOG_ERROR, "RateControl Init: can't open mbtree stats file\n");
+ x265_log(m_param, X265_LOG_ERROR, "can't open mbtree stats file %s \n", statFileTmpname);
return false;
}
}
@@ -440,16 +631,13 @@
m_cuTreeStats.qpBufPos = -1;
}
}
-
if (m_isVbv && !m_2pass)
{
- double fps = (double)m_param->fpsNum / m_param->fpsDenom;
-
/* We don't support changing the ABR bitrate right now,
* so if the stream starts as CBR, keep it CBR. */
- if (m_param->rc.vbvBufferSize < (int)(m_param->rc.vbvMaxBitrate / fps))
+ if (m_param->rc.vbvBufferSize < (int)(m_param->rc.vbvMaxBitrate / m_fps))
{
- m_param->rc.vbvBufferSize = (int)(m_param->rc.vbvMaxBitrate / fps);
+ m_param->rc.vbvBufferSize = (int)(m_param->rc.vbvMaxBitrate / m_fps);
x265_log(m_param, X265_LOG_WARNING, "VBV buffer size cannot be smaller than one frame, using %d kbit\n",
m_param->rc.vbvBufferSize);
}
@@ -465,8 +653,7 @@
vbvMaxBitrate = (hrd->getBitRateValueMinus1(0, 0, 0) + 1) << (hrd->getBitRateScale() + BR_SHIFT);
}
}
-
- m_bufferRate = vbvMaxBitrate / fps;
+ m_bufferRate = vbvMaxBitrate / m_fps;
m_vbvMaxRate = vbvMaxBitrate;
m_bufferSize = vbvBufferSize;
m_singleFrameVbv = m_bufferRate * 1.1 > m_bufferSize;
@@ -537,6 +724,261 @@
#undef MAX_DURATION
}
+ bool RateControl::initPass2()
+{
+ uint64_t allConstBits = 0;
+ double duration = 0;
+ for (int i = 0; i < m_numEntries; i++)
+ duration += m_rce2Pass[i].frameDuration;
+ uint64_t allAvailableBits = uint64_t(m_param->rc.bitrate * 1000. * duration);
+ double rateFactor, stepMult;
+ double qBlur = m_param->rc.qblur;
+ double cplxBlur = m_param->rc.complexityBlur;
+ const int filterSize = (int)(qBlur * 4) | 1;
+ double expectedBits;
+ double *qScale, *blurredQscale;
+ double baseCplx = m_ncu * (m_param->bframes ? 120 : 80);
+
+ /* find total/average complexity & const_bits */
+ for (int i = 0; i < m_numEntries; i++)
+ {
+ allConstBits += m_rce2Pass[i].miscBits;
+ }
+
+ if (allAvailableBits < allConstBits)
+ {
+ x265_log(m_param, X265_LOG_ERROR, "requested bitrate is too low. estimated minimum is %d kbps\n",
+ (int)(allConstBits * m_fps / m_numEntries * 1000.));
+ return false;
+ }
+
+ /* Blur complexities, to reduce local fluctuation of QP.
+ * We don't blur the QPs directly, because then one very simple frame
+ * could drag down the QP of a nearby complex frame and give it more
+ * bits than intended. */
+ for (int i = 0; i < m_numEntries; i++)
+ {
+ double weightSum = 0;
+ double cplxSum = 0;
+ double weight = 1.0;
+ double gaussianWeight;
+ /* weighted average of cplx of future frames */
+ for (int j = 1; j < cplxBlur * 2 && j < m_numEntries - i; j++)
+ {
+ RateControlEntry *rcj = &m_rce2Pass[i + j];
+ double frameDuration = CLIP_DURATION(rcj->frameDuration) / BASE_FRAME_DURATION;
+ weight *= 1 - pow(rcj->iCuCount / m_ncu, 2);
+ if (weight < 0.0001)
+ break;
+ gaussianWeight = weight * exp(-j * j / 200.0);
+ weightSum += gaussianWeight;
+ cplxSum += gaussianWeight * (qScale2bits(rcj, 1) - rcj->miscBits) / frameDuration;
+ }
+ /* weighted average of cplx of past frames */
+ weight = 1.0;
+ for (int j = 0; j <= cplxBlur * 2 && j <= i; j++)
+ {
+ RateControlEntry *rcj = &m_rce2Pass[i - j];
+ double frameDuration = CLIP_DURATION(rcj->frameDuration) / BASE_FRAME_DURATION;
+ gaussianWeight = weight * exp(-j * j / 200.0);
+ weightSum += gaussianWeight;
+ cplxSum += gaussianWeight * (qScale2bits(rcj, 1) - rcj->miscBits) / frameDuration;
+ weight *= 1 - pow(rcj->iCuCount / m_ncu, 2);
+ if (weight < .0001)
+ break;
+ }
+ m_rce2Pass[i].blurredComplexity = cplxSum / weightSum;
+ }
+
+ CHECKED_MALLOC(qScale, double, m_numEntries);
+ if (filterSize > 1)
+ {
+ CHECKED_MALLOC(blurredQscale, double, m_numEntries);
+ }
+ else
+ blurredQscale = qScale;
+
+ /* Search for a factor which, when multiplied by the RCEQ values from
+ * each frame, adds up to the desired total size.
+ * There is no exact closed-form solution because of VBV constraints and
+ * because qscale2bits is not invertible, but we can start with the simple
+ * approximation of scaling the 1st pass by the ratio of bitrates.
+ * The search range is probably overkill, but speed doesn't matter here. */
+
+ expectedBits = 1;
+ for (int i = 0; i < m_numEntries; i++)
+ {
+ RateControlEntry* rce = &m_rce2Pass[i];
+ double q = getQScale(rce, 1.0);
+ expectedBits += qScale2bits(rce, q);
+ m_lastQScaleFor[rce->sliceType] = q;
+ }
+ stepMult = allAvailableBits / expectedBits;
+
+ rateFactor = 0;
+ for (double step = 1E4 * stepMult; step > 1E-7 * stepMult; step *= 0.5)
+ {
+ expectedBits = 0;
+ rateFactor += step;
+
+ m_lastNonBPictType = -1;
+ m_lastAccumPNorm = 1;
+ m_accumPNorm = 0;
+
+ m_lastQScaleFor[0] = m_lastQScaleFor[1] =
+ m_lastQScaleFor[2] = pow(baseCplx, 1 - m_qCompress) / rateFactor;
+
+ /* find qscale */
+ for (int i = 0; i < m_numEntries; i++)
+ {
+ RateControlEntry *rce = &m_rce2Pass[i];
+ qScale[i] = getQScale(rce, rateFactor);
+ m_lastQScaleFor[rce->sliceType] = qScale[i];
+ }
+
+ /* fixed I/B qscale relative to P */
+ for (int i = m_numEntries - 1; i >= 0; i--)
+ {
+ qScale[i] = getDiffLimitedQScale(&m_rce2Pass[i], qScale[i]);
+ assert(qScale[i] >= 0);
+ }
+
+ /* smooth curve */
+ if (filterSize > 1)
+ {
+ assert(filterSize % 2 == 1);
+ for (int i = 0; i < m_numEntries; i++)
+ {
+ double q = 0.0, sum = 0.0;
+
+ for (int j = 0; j < filterSize; j++)
+ {
+ int idx = i + j - filterSize / 2;
+ double d = idx - i;
+ double coeff = qBlur == 0 ? 1.0 : exp(-d * d / (qBlur * qBlur));
+ if (idx < 0 || idx >= m_numEntries)
+ continue;
+ if (m_rce2Pass[i].sliceType != m_rce2Pass[idx].sliceType)
+ continue;
+ q += qScale[idx] * coeff;
+ sum += coeff;
+ }
+ blurredQscale[i] = q / sum;
+ }
+ }
+
+ /* find expected bits */
+ for (int i = 0; i < m_numEntries; i++)
+ {
+ RateControlEntry *rce = &m_rce2Pass[i];
+ rce->newQScale = clipQscale(NULL, blurredQscale[i]); // check if needed
+ assert(rce->newQScale >= 0);
+ expectedBits += qScale2bits(rce, rce->newQScale);
+ }
+
+ if (expectedBits > allAvailableBits)
+ rateFactor -= step;
+ }
+
+ X265_FREE(qScale);
+ if (filterSize > 1)
+ X265_FREE(blurredQscale);
+
+ if (m_isVbv)
+ if (vbv2Pass(allAvailableBits))
+ return false;
+ expectedBits = countExpectedBits();
+
+ if (fabs(expectedBits / allAvailableBits - 1.0) > 0.01)
+ {
+ double avgq = 0;
+ for (int i = 0; i < m_numEntries; i++)
+ avgq += m_rce2Pass[i].newQScale;
+ avgq = x265_qScale2qp(avgq / m_numEntries);
+
+ if (expectedBits > allAvailableBits || !m_isVbv)
+ x265_log(m_param, X265_LOG_WARNING, "Error: 2pass curve failed to converge\n");
+ x265_log(m_param, X265_LOG_WARNING, "target: %.2f kbit/s, expected: %.2f kbit/s, avg QP: %.4f\n",
+ (double)m_param->rc.bitrate,
+ expectedBits * m_fps / (m_numEntries * 1000.),
+ avgq);
+ if (expectedBits < allAvailableBits && avgq < MIN_QP + 2)
+ {
+ x265_log(m_param, X265_LOG_WARNING, "try reducing target bitrate\n");
+ }
+ else if (expectedBits > allAvailableBits && avgq > MAX_QP - 2)
+ {
+ x265_log(m_param, X265_LOG_WARNING, "try increasing target bitrate\n");
+ }
+ else if (!(m_2pass && m_isVbv))
+ x265_log(m_param, X265_LOG_WARNING, "internal error\n");
+ }
+
+ return true;
+fail:
+ return false;
+}
+
+bool RateControl::vbv2Pass(uint64_t allAvailableBits)
+{
+ /* for each interval of bufferFull .. underflow, uniformly increase the qp of all
+ * frames in the interval until either buffer is full at some intermediate frame or the
+ * last frame in the interval no longer underflows. Recompute intervals and repeat.
+ * Then do the converse to put bits back into overflow areas until target size is met */
+
+ double *fills;
+ double expectedBits = 0;
+ double adjustment;
+ double prevBits = 0;
+ int t0, t1;
+ int iterations = 0 , adjMin, adjMax;
+ CHECKED_MALLOC(fills, double, m_numEntries + 1);
+ fills++;
+
+ /* adjust overall stream size */
+ do
+ {
+ iterations++;
+ prevBits = expectedBits;
+
+ if (expectedBits)
+ { /* not first iteration */
+ adjustment = X265_MAX(X265_MIN(expectedBits / allAvailableBits, 0.999), 0.9);
+ fills[-1] = m_bufferSize * m_param->rc.vbvBufferInit;
+ t0 = 0;
+ /* fix overflows */
+ adjMin = 1;
+ while (adjMin && findUnderflow(fills, &t0, &t1, 1))
+ {
+ adjMin = fixUnderflow(t0, t1, adjustment, MIN_QPSCALE, MAX_MAX_QPSCALE);
+ t0 = t1;
+ }
+ }
+
+ fills[-1] = m_bufferSize * (1. - m_param->rc.vbvBufferInit);
+ t0 = 0;
+ /* fix underflows -- should be done after overflow, as we'd better undersize target than underflowing VBV */
+ adjMax = 1;
+ while (adjMax && findUnderflow(fills, &t0, &t1, 0))
+ {
+ adjMax = fixUnderflow(t0, t1, 1.001, MIN_QPSCALE, MAX_MAX_QPSCALE );
+ }
+
+ expectedBits = countExpectedBits();
+ } while ((expectedBits < .995 * allAvailableBits) && ((int64_t)(expectedBits+.5) > (int64_t)(prevBits+.5)));
+
+ if (!adjMax)
+ x265_log(m_param, X265_LOG_WARNING, "vbv-maxrate issue, qpmax or vbv-maxrate too low\n");
+
+ /* store expected vbv filling values for tracking when encoding */
+ for (int i = 0; i < m_numEntries; i++)
+ m_rce2Pass[i].expectedVbv = m_bufferSize - fills[i];
+
+ X265_FREE(fills - 1);
+ return true;
+fail:
+ return false;
+}
void RateControl::rateControlStart(Frame* pic, Lookahead *l, RateControlEntry* rce, Encoder* enc)
{
@@ -620,6 +1062,127 @@
else
m_accumPQp += m_qp;
}
+double RateControl::getDiffLimitedQScale(RateControlEntry *rce, double q)
+{
+ // force I/B quants as a function of P quants
+ const double lastPqScale = m_lastQScaleFor[P_SLICE];
+ const double lastNonBqScale= m_lastQScaleFor[m_lastNonBPictType];
+ if (rce->sliceType == I_SLICE)
+ {
+ double iq = q;
+ double pq = x265_qp2qScale(m_accumPQp / m_accumPNorm);
+ double ipFactor = fabs(m_param->rc.ipFactor);
+ /* don't apply ipFactor if the following frame is also I */
+ if (m_accumPNorm <= 0)
+ q = iq;
+ else if (m_param->rc.ipFactor < 0)
+ q = iq / ipFactor;
+ else if (m_accumPNorm >= 1)
+ q = pq / ipFactor;
+ else
+ q = m_accumPNorm * pq / ipFactor + (1 - m_accumPNorm) * iq;
+ }
+ else if (rce->sliceType == B_SLICE)
+ {
+ if (m_param->rc.pbFactor > 0)
+ q = lastNonBqScale;
+ if (!rce->keptAsRef)
+ q *= fabs(m_param->rc.pbFactor);
+ }
+ else if (rce->sliceType == P_SLICE
+ && m_lastNonBPictType == P_SLICE
+ && rce->coeffBits == 0)
+ {
+ q = lastPqScale;
+ }
+
+ /* last qscale / qdiff stuff */
+ if (m_lastNonBPictType == rce->sliceType &&
+ (rce->sliceType != I_SLICE || m_lastAccumPNorm < 1))
+ {
+ double maxQscale = m_lastQScaleFor[rce->sliceType] * m_lstep;
+ double minQscale = m_lastQScaleFor[rce->sliceType] / m_lstep;
+ q = Clip3(minQscale, maxQscale, q);
+ }
+
+ m_lastQScaleFor[rce->sliceType] = q;
+ if (rce->sliceType != B_SLICE)
+ m_lastNonBPictType = rce->sliceType;
+ if (rce->sliceType == I_SLICE)
+ {
+ m_lastAccumPNorm = m_accumPNorm;
+ m_accumPNorm = 0;
+ m_accumPQp = 0;
+ }
+ if (rce->sliceType == P_SLICE)
+ {
+ double mask = 1 - pow(rce->iCuCount / m_ncu, 2);
+ m_accumPQp = mask * (x265_qScale2qp(q) + m_accumPQp);
+ m_accumPNorm = mask * (1 + m_accumPNorm);
+ }
+
+ return q;
+}
+
+double RateControl::countExpectedBits()
+{
+ double expectedBits = 0;
+ for( int i = 0; i < m_numEntries; i++ )
+ {
+ RateControlEntry *rce = &m_rce2Pass[i];
+ rce->expectedBits = (uint64_t)expectedBits;
+ expectedBits += qScale2bits(rce, rce->newQScale);
+ }
+ return expectedBits;
+}
+
+bool RateControl::findUnderflow(double *fills, int *t0, int *t1, int over)
+{
+ /* find an interval ending on an overflow or underflow (depending on whether
+ * we're adding or removing bits), and starting on the earliest frame that
+ * can influence the buffer fill of that end frame. */
+ const double bufferMin = .1 * m_bufferSize;
+ const double bufferMax = .9 * m_bufferSize;
+ double fill = fills[*t0 - 1];
+ double parity = over ? 1. : -1.;
+ int start = -1, end = -1;
+ for (int i = *t0; i < m_numEntries; i++)
+ {
+ fill += (m_rce2Pass[i].frameDuration * m_vbvMaxRate -
+ qScale2bits(&m_rce2Pass[i], m_rce2Pass[i].newQScale)) * parity;
+ fill = Clip3(0.0, m_bufferSize, fill);
+ fills[i] = fill;
+ if (fill <= bufferMin || i == 0)
+ {
+ if (end >= 0)
+ break;
+ start = i;
+ }
+ else if (fill >= bufferMax && start >= 0)
+ end = i;
+ }
+ *t0 = start;
+ *t1 = end;
+ return start >= 0 && end >= 0;
+}
+
+bool RateControl::fixUnderflow(int t0, int t1, double adjustment, double qscaleMin, double qscaleMax)
+{
+ double qscaleOrig, qscaleNew;
+ bool adjusted = false;
+ if (t0 > 0)
+ t0++;
+ for (int i = t0; i <= t1; i++)
+ {
+ qscaleOrig = m_rce2Pass[i].newQScale;
+ qscaleOrig = Clip3(qscaleMin, qscaleMax, qscaleOrig);
+ qscaleNew = qscaleOrig * adjustment;
+ qscaleNew = Clip3(qscaleMin, qscaleMax, qscaleNew);
+ m_rce2Pass[i].newQScale = qscaleNew;
+ adjusted = adjusted || (qscaleNew != qscaleOrig);
+ }
+ return adjusted;
+}
double RateControl::rateEstimateQscale(Frame* pic, RateControlEntry *rce)
{
@@ -694,7 +1257,7 @@
m_shortTermCplxSum += m_currentSatd / (CLIP_DURATION(m_frameDuration) / BASE_FRAME_DURATION);
m_shortTermCplxCount++;
/* coeffBits to be used in 2-pass */
- rce->coeffBits = m_currentSatd;
+ rce->coeffBits = (int)m_currentSatd;
rce->blurredComplexity = m_shortTermCplxSum / m_shortTermCplxCount;
rce->mvBits = 0;
rce->sliceType = m_sliceType;
@@ -759,15 +1322,10 @@
double lqmax = x265_qp2qScale(ABR_INIT_QP_MAX) * m_lstep;
q = X265_MIN(lqmax, q);
}
-
q = Clip3(MIN_QPSCALE, MAX_MAX_QPSCALE, q);
rce->qpNoVbv = x265_qScale2qp(q);
-
- if (m_isVbv && m_currentSatd > 0)
- q = clipQscale(pic, q);
-
+ q = clipQscale(pic, q);
m_lastQScaleFor[m_sliceType] = q;
-
if (m_curSlice->getPOC() == 0 || m_lastQScaleFor[P_SLICE] < q)
m_lastQScaleFor[P_SLICE] = q * fabs(m_param->rc.ipFactor);
@@ -845,128 +1403,134 @@
// B-frames are not directly subject to VBV,
// since they are controlled by referenced P-frames' QPs.
double q0 = q;
+ if (m_isVbv && m_currentSatd > 0)
+ {
+ if (m_param->lookaheadDepth || m_param->rc.cuTree ||
+ m_param->scenecutThreshold ||
+ (m_param->bFrameAdaptive && m_param->bframes))
+ {
+ /* Lookahead VBV: If lookahead is done, raise the quantizer as necessary
+ * such that no frames in the lookahead overflow and such that the buffer
+ * is in a reasonable state by the end of the lookahead. */
+ int terminate = 0;
+ /* Avoid an infinite loop. */
+ for (int iterations = 0; iterations < 1000 && terminate != 3; iterations++)
+ {
+ double frameQ[3];
+ double curBits = predictSize(&m_pred[m_sliceType], q, (double)m_currentSatd);
+ double bufferFillCur = m_bufferFill - curBits;
+ double targetFill;
+ double totalDuration = 0;
+ frameQ[P_SLICE] = m_sliceType == I_SLICE ? q * m_param->rc.ipFactor : q;
+ frameQ[B_SLICE] = frameQ[P_SLICE] * m_param->rc.pbFactor;
+ frameQ[I_SLICE] = frameQ[P_SLICE] / m_param->rc.ipFactor;
+ /* Loop over the planned future frames. */
+ for (int j = 0; bufferFillCur >= 0 && bufferFillCur <= m_bufferSize; j++)
+ {
+ totalDuration += m_frameDuration;
+ bufferFillCur += m_vbvMaxRate * m_frameDuration;
+ int type = pic->m_lowres.plannedType[j];
+ int64_t satd = pic->m_lowres.plannedSatd[j] >> (X265_DEPTH - 8);
+ if (type == X265_TYPE_AUTO)
+ break;
+ type = IS_X265_TYPE_I(type) ? I_SLICE : IS_X265_TYPE_B(type) ? B_SLICE : P_SLICE;
+ curBits = predictSize(&m_pred[type], frameQ[type], (double)satd);
+ bufferFillCur -= curBits;
+ }
- if (m_param->lookaheadDepth || m_param->rc.cuTree ||
- m_param->scenecutThreshold ||
- (m_param->bFrameAdaptive && m_param->bframes))
- {
- /* Lookahead VBV: If lookahead is done, raise the quantizer as necessary
- * such that no frames in the lookahead overflow and such that the buffer
- * is in a reasonable state by the end of the lookahead. */
+ /* Try to get the buffer at least 50% filled, but don't set an impossible goal. */
+ targetFill = X265_MIN(m_bufferFill + totalDuration * m_vbvMaxRate * 0.5, m_bufferSize * 0.5);
+ if (bufferFillCur < targetFill)
+ {
+ q *= 1.01;
+ terminate |= 1;
+ continue;
+ }
+ /* Try to get the buffer no more than 80% filled, but don't set an impossible goal. */
+ targetFill = Clip3(m_bufferSize * 0.8, m_bufferSize, m_bufferFill - totalDuration * m_vbvMaxRate * 0.5);
+ if (m_isCbr && bufferFillCur > targetFill)
+ {
+ q /= 1.01;
+ terminate |= 2;
+ continue;
+ }
+ break;
+ }
+ }
+ else
+ {
+ /* Fallback to old purely-reactive algorithm: no lookahead. */
+ if ((m_sliceType == P_SLICE ||
+ (m_sliceType == I_SLICE && m_lastNonBPictType == I_SLICE)) &&
+ m_bufferFill / m_bufferSize < 0.5)
+ {
+ q /= Clip3(0.5, 1.0, 2.0 * m_bufferFill / m_bufferSize);
+ }
+ // Now a hard threshold to make sure the frame fits in VBV.
+ // This one is mostly for I-frames.
+ double bits = predictSize(&m_pred[m_sliceType], q, (double)m_currentSatd);
- int terminate = 0;
+ // For small VBVs, allow the frame to use up the entire VBV.
+ double maxFillFactor;
+ maxFillFactor = m_bufferSize >= 5 * m_bufferRate ? 2 : 1;
+ // For single-frame VBVs, request that the frame use up the entire VBV.
+ double minFillFactor = m_singleFrameVbv ? 1 : 2;
- /* Avoid an infinite loop. */
- for (int iterations = 0; iterations < 1000 && terminate != 3; iterations++)
- {
- double frameQ[3];
- double curBits = predictSize(&m_pred[m_sliceType], q, (double)m_currentSatd);
- double bufferFillCur = m_bufferFill - curBits;
- double targetFill;
- double totalDuration = 0;
- frameQ[P_SLICE] = m_sliceType == I_SLICE ? q * m_param->rc.ipFactor : q;
- frameQ[B_SLICE] = frameQ[P_SLICE] * m_param->rc.pbFactor;
- frameQ[I_SLICE] = frameQ[P_SLICE] / m_param->rc.ipFactor;
- /* Loop over the planned future frames. */
- for (int j = 0; bufferFillCur >= 0 && bufferFillCur <= m_bufferSize; j++)
+ for (int iterations = 0; iterations < 10; iterations++)
{
- totalDuration += m_frameDuration;
- bufferFillCur += m_vbvMaxRate * m_frameDuration;
- int type = pic->m_lowres.plannedType[j];
- int64_t satd = pic->m_lowres.plannedSatd[j] >> (X265_DEPTH - 8);
- if (type == X265_TYPE_AUTO)
- break;
- type = IS_X265_TYPE_I(type) ? I_SLICE : IS_X265_TYPE_B(type) ? B_SLICE : P_SLICE;
- curBits = predictSize(&m_pred[type], frameQ[type], (double)satd);
- bufferFillCur -= curBits;
+ double qf = 1.0;
+ if (bits > m_bufferFill / maxFillFactor)
+ qf = Clip3(0.2, 1.0, m_bufferFill / (maxFillFactor * bits));
+ q /= qf;
+ bits *= qf;
+ if (bits < m_bufferRate / minFillFactor)
+ q *= bits * minFillFactor / m_bufferRate;
+ bits = predictSize(&m_pred[m_sliceType], q, (double)m_currentSatd);
}
- /* Try to get the buffer at least 50% filled, but don't set an impossible goal. */
- targetFill = X265_MIN(m_bufferFill + totalDuration * m_vbvMaxRate * 0.5, m_bufferSize * 0.5);
- if (bufferFillCur < targetFill)
+ q = X265_MAX(q0, q);
+ }
+
+ // Check B-frame complexity, and use up any bits that would
+ // overflow before the next P-frame.
+ if (m_sliceType == P_SLICE && !m_singleFrameVbv)
+ {
+ int nb = m_bframes;
+ double bits = predictSize(&m_pred[m_sliceType], q, (double)m_currentSatd);
+ double bbits = predictSize(&m_predBfromP, q * m_param->rc.pbFactor, (double)m_currentSatd);
+ double space;
+ if (bbits > m_bufferRate)
+ nb = 0;
+ double pbbits = nb * bbits;
+
+ space = m_bufferFill + (1 + nb) * m_bufferRate - m_bufferSize;
+ if (pbbits < space)
{
- q *= 1.01;
- terminate |= 1;
- continue;
+ q *= X265_MAX(pbbits / space, bits / (0.5 * m_bufferSize));
}
- /* Try to get the buffer no more than 80% filled, but don't set an impossible goal. */
- targetFill = Clip3(m_bufferSize * 0.8, m_bufferSize, m_bufferFill - totalDuration * m_vbvMaxRate * 0.5);
- if (m_isCbr && bufferFillCur > targetFill)
- {
- q /= 1.01;
- terminate |= 2;
- continue;
- }
- break;
+ q = X265_MAX(q0 / 2, q);
+ }
+ if (!m_isCbr)
+ q = X265_MAX(q0, q);
+
+ if (m_rateFactorMaxIncrement)
+ {
+ double qpNoVbv = x265_qScale2qp(q0);
+ double qmax = X265_MIN(MAX_MAX_QPSCALE,x265_qp2qScale(qpNoVbv + m_rateFactorMaxIncrement));
+ return Clip3(MIN_QPSCALE, qmax, q);
}
}
- else
+ if (m_2pass)
{
- /* Fallback to old purely-reactive algorithm: no lookahead. */
- if ((m_sliceType == P_SLICE ||
- (m_sliceType == I_SLICE && m_lastNonBPictType == I_SLICE)) &&
- m_bufferFill / m_bufferSize < 0.5)
- {
- q /= Clip3(0.5, 1.0, 2.0 * m_bufferFill / m_bufferSize);
- }
-
- // Now a hard threshold to make sure the frame fits in VBV.
- // This one is mostly for I-frames.
- double bits = predictSize(&m_pred[m_sliceType], q, (double)m_currentSatd);
-
- // For small VBVs, allow the frame to use up the entire VBV.
- double maxFillFactor;
- maxFillFactor = m_bufferSize >= 5 * m_bufferRate ? 2 : 1;
- // For single-frame VBVs, request that the frame use up the entire VBV.
- double minFillFactor = m_singleFrameVbv ? 1 : 2;
-
- for (int iterations = 0; iterations < 10; iterations++)
- {
- double qf = 1.0;
- if (bits > m_bufferFill / maxFillFactor)
- qf = Clip3(0.2, 1.0, m_bufferFill / (maxFillFactor * bits));
- q /= qf;
- bits *= qf;
- if (bits < m_bufferRate / minFillFactor)
- q *= bits * minFillFactor / m_bufferRate;
- bits = predictSize(&m_pred[m_sliceType], q, (double)m_currentSatd);
- }
-
- q = X265_MAX(q0, q);
+ double min = log(MIN_QPSCALE);
+ double max = log(MAX_MAX_QPSCALE);
+ q = (log(q) - min) / (max - min) - 0.5;
+ q = 1.0 / (1.0 + exp(-4 * q));
+ q = q*(max - min) + min;
+ return exp(q);
}
-
- // Check B-frame complexity, and use up any bits that would
- // overflow before the next P-frame.
- if (m_sliceType == P_SLICE && !m_singleFrameVbv)
- {
- int nb = m_bframes;
- double bits = predictSize(&m_pred[m_sliceType], q, (double)m_currentSatd);
- double bbits = predictSize(&m_predBfromP, q * m_param->rc.pbFactor, (double)m_currentSatd);
- double space;
- if (bbits > m_bufferRate)
- nb = 0;
- double pbbits = nb * bbits;
-
- space = m_bufferFill + (1 + nb) * m_bufferRate - m_bufferSize;
- if (pbbits < space)
- {
- q *= X265_MAX(pbbits / space, bits / (0.5 * m_bufferSize));
- }
- q = X265_MAX(q0 / 2, q);
- }
- if (!m_isCbr)
- q = X265_MAX(q0, q);
-
- if (m_rateFactorMaxIncrement)
- {
- double qpNoVbv = x265_qScale2qp(q0);
- double qmax = X265_MIN(MAX_MAX_QPSCALE,x265_qp2qScale(qpNoVbv + m_rateFactorMaxIncrement));
- return Clip3(MIN_QPSCALE, qmax, q);
- }
-
return Clip3(MIN_QPSCALE, MAX_MAX_QPSCALE, q);
}
-
double RateControl::predictRowsSizeSum(Frame* pic, RateControlEntry* rce, double qpVbv, int32_t & encodedBitsSoFar)
{
uint32_t rowSatdCostSoFar = 0, totalSatdBits = 0;
@@ -1179,10 +1743,14 @@
}
else
q = pow(rce->blurredComplexity, 1 - m_param->rc.qCompress);
-
- m_lastRceq = q;
- q /= rateFactor;
-
+ // avoid NaN's in the Rceq
+ if (rce->coeffBits + rce->mvBits == 0)
+ q = m_lastQScaleFor[rce->sliceType];
+ else
+ {
+ m_lastRceq = q;
+ q /= rateFactor;
+ }
return q;
}
@@ -1281,16 +1849,16 @@
: rce->sliceType == P_SLICE ? (pic->getSlice()->isReferenced()? 'P' : 'p')
: pic->getSlice()->isReferenced()? 'B' : 'b';
if (fprintf(m_statFileOut,
- "in:%d out:%d type:%c dur:%.3f q:%.2f q-aq:%.2f tex:%d mv:%d misc:%d imb:%.2f pmb:%.2f smb:%.2f ",
+ "in:%d out:%d type:%c dur:%.3f q:%.2f q-aq:%.2f tex:%d mv:%d misc:%d icu:%.2f pcu:%.2f scu:%.2f ",
rce->poc, rce->encodeOrder,
cType, m_frameDuration,
pic->m_avgQpRc, pic->m_avgQpAq,
stats->coeffBits,
stats->mvBits,
stats->miscBits,
- stats->cuCount_i,
- stats->cuCount_p,
- stats->cuCount_skip) < 0)
+ stats->cuCount_i * m_ncu,
+ stats->cuCount_p * m_ncu,
+ stats->cuCount_skip * m_ncu) < 0)
goto writeFailure;
if (fprintf(m_statFileOut, ";\n") < 0)
goto writeFailure;
@@ -1308,22 +1876,23 @@
}
/* amortize part of each I slice over the next several frames, up to
* keyint-max, to avoid over-compensating for the large I slice cost */
- if (rce->sliceType == I_SLICE)
+ if (!m_param->rc.bStatWrite && !m_param->rc.bStatRead)
{
- /* previous I still had a residual; roll it into the new loan */
- if (m_residualFrames)
- bits += m_residualCost * m_residualFrames;
-
- m_residualFrames = X265_MIN(s_amortizeFrames, m_param->keyframeMax);
- m_residualCost = (int)((bits * s_amortizeFraction) / m_residualFrames);
- bits -= m_residualCost * m_residualFrames;
+ if (rce->sliceType == I_SLICE)
+ {
+ /* previous I still had a residual; roll it into the new loan */
+ if (m_residualFrames)
+ bits += m_residualCost * m_residualFrames;
+ m_residualFrames = X265_MIN(s_amortizeFrames, m_param->keyframeMax);
+ m_residualCost = (int)((bits * s_amortizeFraction) / m_residualFrames);
+ bits -= m_residualCost * m_residualFrames;
+ }
+ else if (m_residualFrames)
+ {
+ bits += m_residualCost;
+ m_residualFrames--;
+ }
}
- else if (m_residualFrames)
- {
- bits += m_residualCost;
- m_residualFrames--;
- }
-
if (rce->sliceType != B_SLICE)
/* The factor 1.5 is to tune up the actual bits, otherwise the cplxrSum is scaled too low
* to improve short term compensation for next frame. */
@@ -1431,10 +2000,10 @@
unlink(newFileName);
bError = rename(tmpFileName, newFileName);
}
- if (!bError)
+ if (bError)
{
x265_log(m_param, X265_LOG_ERROR, "failed to rename cutree output stats file to \"%s\"\n",
- m_param->rc.statFileName);
+ newFileName);
}
X265_FREE(tmpFileName);
X265_FREE(newFileName);
@@ -1446,3 +2015,4 @@
for (int i = 0; i < 2; i++)
X265_FREE(m_cuTreeStats.qpBuffer[i]);
}
+
diff -r fc4c54e9f211 -r 33d55e581aac source/encoder/ratecontrol.h
--- a/source/encoder/ratecontrol.h Fri Jul 11 16:32:41 2014 +0530
+++ b/source/encoder/ratecontrol.h Fri Jul 11 16:35:29 2014 +0530
@@ -50,13 +50,10 @@
double decay;
double offset;
};
-
struct RateControlEntry
{
- int64_t coeffBits; /* Required in 2-pass rate control */
int64_t lastSatd; /* Contains the picture cost of the previous frame, required for resetAbr and VBV */
int sliceType;
- int mvBits;
int bframes;
int poc;
int encodeOrder;
@@ -64,21 +61,34 @@
bool bLastMiniGopBFrame;
double blurredComplexity;
double qpaRc;
+ double qpAq;
double qRceq;
double frameSizePlanned; /* frame Size decided by RateCotrol before encoding the frame */
double bufferRate;
double movingAvgSum;
double qpNoVbv;
double bufferFill;
+ double frameDuration;
Predictor rowPreds[3][2];
Predictor* rowPred[2];
double frameSizeEstimated; /* hold frameSize, updated from cu level vbv rc */
bool isActive;
-
SEIPictureTiming *picTimingSEI;
HRDTiming *hrdTiming;
+ /* Required in 2-pass rate control */
+ double iCuCount;
+ double pCuCount;
+ double skipCuCount;
+ bool keptAsRef;
+ double expectedVbv;
+ double qScale;
+ double newQScale;
+ double newQp;
+ int mvBits;
+ int miscBits;
+ int coeffBits;
+ uint64_t expectedBits; /*total expected bits up to the current frame (current one excluded)*/
};
-
class RateControl
{
public:
@@ -131,10 +141,9 @@
double m_shortTermCplxCount;
double m_lastRceq;
double m_qCompress;
-
int64_t m_totalBits; /* total bits used for already encoded frames */
int m_framesDone; /* # of frames passed through RateCotrol already */
-
+ double m_fps;
/* hrd stuff */
SEIBufferingPeriod m_bufPeriodSEI;
double m_nominalRemovalTime;
@@ -145,7 +154,9 @@
FILE* m_statFileOut;
FILE* m_cutreeStatFileOut;
FILE* m_cutreeStatFileIn;
-
+ int m_numEntries;
+ RateControlEntry *m_rce2Pass;
+ double m_lastAccumPNorm;
struct
{
uint16_t *qpBuffer[2]; /* Global buffers for converting MB-tree quantizer data. */
@@ -184,6 +195,12 @@
double predictSize(Predictor *p, double q, double var);
void checkAndResetABR(RateControlEntry* rce, bool isFrameDone);
double predictRowsSizeSum(Frame* pic, RateControlEntry* rce, double qpm, int32_t& encodedBits);
+ bool initPass2();
+ double getDiffLimitedQScale(RateControlEntry *rce, double q);
+ double countExpectedBits();
+ bool RateControl::vbv2Pass(uint64_t allAvailableBits);
+ bool findUnderflow(double *fills, int *t0, int *t1, int over);
+ bool fixUnderflow(int t0, int t1, double adjustment, double qscaleMin, double qscaleMax);
};
}
#endif // ifndef X265_RATECONTROL_H
More information about the x265-devel
mailing list