[x265] [PATCH 5 of 5] rc: init RC data for second pass in a multi pass encode

aarthi at multicorewareinc.com aarthi at multicorewareinc.com
Fri Jul 11 13:13:00 CEST 2014


# HG changeset patch
# User Aarthi Thirumalai
# Date 1405076729 -19800
#      Fri Jul 11 16:35:29 2014 +0530
# Node ID 33d55e581aacbfcb91958a005a239760bd8b9163
# Parent  fc4c54e9f2115d49348e104d962c9073f323987e
rc: init RC data for second pass in a multi pass encode

diff -r fc4c54e9f211 -r 33d55e581aac source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Fri Jul 11 16:32:41 2014 +0530
+++ b/source/encoder/encoder.cpp	Fri Jul 11 16:35:29 2014 +0530
@@ -1327,8 +1327,7 @@
         p->rc.aqMode = X265_AQ_VARIANCE;
         p->rc.aqStrength = 0.0;
     }
-
-    if (p->lookaheadDepth == 0 && p->rc.cuTree)
+    if (p->lookaheadDepth == 0 && p->rc.cuTree && !p->rc.bStatRead)
     {
         x265_log(p, X265_LOG_WARNING, "cuTree disabled, requires lookahead to be enabled\n");
         p->rc.cuTree = 0;
diff -r fc4c54e9f211 -r 33d55e581aac source/encoder/ratecontrol.cpp
--- a/source/encoder/ratecontrol.cpp	Fri Jul 11 16:32:41 2014 +0530
+++ b/source/encoder/ratecontrol.cpp	Fri Jul 11 16:35:29 2014 +0530
@@ -42,8 +42,24 @@
 const double RateControl::s_amortizeFraction = 0.85;
 const int RateControl::s_amortizeFrames = 75;
 const char *RateControl::s_defaultStatFileName = "x265_2pass.log";
-
 namespace {
+#define CMP_OPT_FIRST_PASS(opt, param_val)\
+{\
+    bErr = 0;\
+    p = strstr(opts, opt "=");\
+    char* q = strstr(opts, "no-"opt);\
+    if (p && sscanf(p, opt "=%d" , &i) && param_val != i)\
+        bErr = 1;\
+    else if (!param_val && !q)\
+        bErr = 1;\
+    else if (param_val && (q || !strstr(opts, opt)))\
+        bErr = 1;\
+    if (bErr)\
+    {\
+        x265_log(m_param, X265_LOG_ERROR, "different " opt " setting than first pass (%d vs %d)\n", param_val, i);\
+        return false;\
+    }\
+}
 
 inline int calcScale(uint32_t x)
 {
@@ -96,6 +112,14 @@
     strcat(output, suffix);
     return output;
 }
+inline double qScale2bits(RateControlEntry *rce, double qScale)
+{
+    if (qScale < 0.1)
+        qScale = 0.1;
+    return (rce->coeffBits + .1) * pow(rce->qScale / qScale, 1.1)
+           + rce->mvBits * pow(X265_MAX(rce->qScale, 1) / X265_MAX(qScale, 1), 0.5)
+           + rce->miscBits;
+}
 
 }  // end anonymous namespace
 /* Compute variance to derive AC energy of each block */
@@ -274,7 +298,7 @@
     m_residualCost = 0;
     m_rateFactorMaxIncrement = 0;
     m_rateFactorMaxDecrement = 0;
-
+    m_fps = m_param->fpsNum / m_param->fpsDenom;
     if (m_param->rc.rateControlMode == X265_RC_CRF)
     {
         m_param->rc.qp = (int)m_param->rc.rfConstant;
@@ -397,7 +421,174 @@
         const char *fileName = m_param->rc.statFileName;
         if (!fileName)
             fileName = s_defaultStatFileName;
+       /* Load stat file and init 2pass algo */
+        if (m_param->rc.bStatRead)
+        {
+            char *p, *statsIn, *statsBuf;
+            /* read 1st pass stats */
+            statsIn = statsBuf = x265_slurp_file(fileName);
+            if (!statsBuf)
+                return false;
+            if (m_param->rc.cuTree)
+            {
+                char *tmpFile = strcatFilename(fileName, ".cutree");
+                if (!tmpFile)
+                    return false;
+                m_cutreeStatFileIn = fopen(tmpFile, "rb");
+                x265_free(tmpFile);
+                if (!m_cutreeStatFileIn)
+                {
+                    x265_log(m_param, X265_LOG_ERROR, "can't open stats file %s\n", tmpFile);
+                    return false;
+                }
+            }
 
+            /* check whether 1st pass options were compatible with current options */
+            if (strncmp(statsBuf, "#options:", 9))
+            {
+                x265_log(m_param, X265_LOG_ERROR,"options list in stats file not valid\n");
+                return false;
+            }
+            {
+                int i, j;
+                uint32_t k , l;
+                bool bErr = false;
+                char *opts = statsBuf;
+                statsIn = strchr(statsBuf, '\n');
+                if (!statsIn)
+                    return false;
+                *statsIn = '\0';
+                statsIn++;
+                if (sscanf(opts, "#options: %dx%d", &i, &j) != 2)
+                {
+                    x265_log(m_param, X265_LOG_ERROR, "Resolution specified in stats file not valid\n");
+                    return false;
+                }
+                if ((p = strstr(opts, " fps=")) == 0 || sscanf(p, " fps=%u/%u", &k, &l) != 2)
+                {
+                    x265_log(m_param, X265_LOG_ERROR, "fps specified in stats file not valid\n");
+                    return false;
+                }
+                if (k != m_param->fpsNum || l != m_param->fpsDenom)
+                {
+                    x265_log(m_param, X265_LOG_ERROR, "fps mismatch with 1st pass (%u/%u vs %u/%u)\n",
+                              m_param->fpsNum, m_param->fpsDenom, k, l);
+                    return false;
+                }
+                CMP_OPT_FIRST_PASS("bitdepth", m_param->internalBitDepth);
+                CMP_OPT_FIRST_PASS("weightp", m_param->bEnableWeightedPred);
+                CMP_OPT_FIRST_PASS("bframes", m_param->bframes);
+                CMP_OPT_FIRST_PASS("b-pyramid", m_param->bBPyramid);
+                CMP_OPT_FIRST_PASS("open-gop", m_param->bOpenGOP);
+                CMP_OPT_FIRST_PASS("keyint", m_param->keyframeMax);
+                CMP_OPT_FIRST_PASS("wpp", m_param->bEnableWavefront);
+
+                if ((p = strstr(opts, "b-adapt=")) != 0 && sscanf(p, "b-adapt=%d", &i) && i >= X265_B_ADAPT_NONE && i <= X265_B_ADAPT_TRELLIS)
+                {
+                    m_param->bFrameAdaptive = i;
+                }
+                else if (m_param->bframes)
+                {
+                    x265_log(m_param, X265_LOG_ERROR, "b-adapt method specified in stats file not valid\n");
+                    return false;
+                }
+
+                if ((m_param->rc.cuTree || m_param->rc.vbvBufferSize) && ((p = strstr(opts, "rc-lookahead=")) != 0) && sscanf(p, "rc-lookahead=%d", &i))
+                    m_param->lookaheadDepth = i;
+            }
+            /* find number of pics */
+            p = statsIn;
+            int numEntries;
+            for (numEntries = -1; p; numEntries++)
+                p = strchr(p + 1, ';');
+            if (!numEntries)
+            {
+                x265_log(m_param, X265_LOG_ERROR, "empty stats file\n");
+                return false;
+            }
+            m_numEntries = numEntries;
+
+            if (m_param->totalFrames < m_numEntries && m_param->totalFrames > 0)
+            {
+                x265_log(m_param, X265_LOG_WARNING, "2nd pass has fewer frames than 1st pass (%d vs %d)\n",
+                         m_param->totalFrames, m_numEntries);
+            }
+            if (m_param->totalFrames > m_numEntries)
+            {
+                x265_log(m_param, X265_LOG_ERROR, "2nd pass has more frames than 1st pass (%d vs %d)\n",
+                         m_param->totalFrames, m_numEntries);
+                return false;
+            }
+
+            m_rce2Pass = X265_MALLOC(RateControlEntry, m_numEntries);
+            if (!m_rce2Pass)
+            {
+                 x265_log(m_param, X265_LOG_ERROR, "Rce Entries for 2 pass cannot be allocated\n");
+                 return false;
+            }
+            /* init all to skipped p frames */
+            for (int i = 0; i < m_numEntries; i++)
+            {
+                RateControlEntry *rce = &m_rce2Pass[i];
+                rce->sliceType = P_SLICE;
+                rce->qScale = rce->newQScale = x265_qp2qScale(20);
+                rce->miscBits = m_ncu + 10;
+                rce->newQp = 0;
+            }
+            /* read stats */
+            p = statsIn;
+            double totalQpAq = 0;
+            for (int i = 0; i < m_numEntries; i++)
+            {
+                RateControlEntry *rce;
+                int frameNumber;
+                char picType;
+                int e;
+                char *next;
+                double qpRc, qpAq;
+                next = strstr(p, ";");
+                if (next)
+                    *next++ = 0;
+                e = sscanf(p, " in:%d ", &frameNumber);
+
+                if (frameNumber < 0 || frameNumber >= m_numEntries)
+                {
+                    x265_log(m_param, X265_LOG_ERROR, "bad frame number (%d) at stats line %d\n", frameNumber, i);
+                    return false;
+                }
+                rce = &m_rce2Pass[frameNumber];
+                e += sscanf(p, " in:%*d out:%*d type:%c dur:%lf q:%lf q-aq:%lf tex:%d mv:%d misc:%d icu:%lf pcu:%lf scu:%lf",
+                       &picType, &rce->frameDuration, &qpRc, &qpAq, &rce->coeffBits,
+                       &rce->mvBits, &rce->miscBits, &rce->iCuCount, &rce->pCuCount,
+                       &rce->skipCuCount);
+
+                if (picType != 'b' || picType != 'p')
+                    rce->keptAsRef = true;
+                if (picType == 'I' || picType == 'i')
+                    rce->sliceType = I_SLICE;
+                else if (picType == 'P' || picType == 'p')
+                    rce->sliceType = P_SLICE;
+                else if (picType == 'B' || picType == 'b')
+                    rce->sliceType = B_SLICE;
+                else
+                    e = -1;
+                if (e < 11)
+                {
+                    x265_log(m_param, X265_LOG_ERROR, "statistics are damaged at line %d, parser out=%d\n", i, e);
+                    return false;
+                }
+                rce->qScale = x265_qp2qScale(qpRc);
+                totalQpAq += qpAq;
+                p = next;
+            }
+             X265_FREE(statsBuf);
+
+            if (m_param->rc.rateControlMode == X265_RC_ABR)
+            {
+                if (!initPass2())
+                    return false;
+            } /* else we're using constant quant, so no need to run the bitrate allocation */
+        }
         /* Open output file */
         /* If input and output files are the same, output to a temp file
          * and move it to the real name only when it's complete */
@@ -411,12 +602,12 @@
             x265_free(statFileTmpname);
             if (!m_statFileOut)
             {
-                x265_log(m_param, X265_LOG_ERROR, "RateControl Init: can't open stats file\n");
+                x265_log(m_param, X265_LOG_ERROR, " can't open stats file %s \n, statFileTmpname");
                 return false;
             }
             p = x265_param2string(m_param);
             if (p)
-                fprintf(m_statFileOut, "#options: %s\n", p);
+                fprintf(m_statFileOut, "#options: %s \n", p);
             X265_FREE(p);
             if (m_param->rc.cuTree && !m_param->rc.bStatRead)
             {
@@ -427,7 +618,7 @@
                 x265_free(statFileTmpname);
                 if (!m_cutreeStatFileOut)
                 {
-                    x265_log(m_param, X265_LOG_ERROR, "RateControl Init: can't open mbtree stats file\n");
+                    x265_log(m_param, X265_LOG_ERROR, "can't open mbtree stats file %s \n", statFileTmpname);
                     return false;
                 }
             }
@@ -440,16 +631,13 @@
             m_cuTreeStats.qpBufPos = -1;
         }
     }
-
     if (m_isVbv && !m_2pass)
     {
-        double fps = (double)m_param->fpsNum / m_param->fpsDenom;
-
         /* We don't support changing the ABR bitrate right now,
          * so if the stream starts as CBR, keep it CBR. */
-        if (m_param->rc.vbvBufferSize < (int)(m_param->rc.vbvMaxBitrate / fps))
+        if (m_param->rc.vbvBufferSize < (int)(m_param->rc.vbvMaxBitrate / m_fps))
         {
-            m_param->rc.vbvBufferSize = (int)(m_param->rc.vbvMaxBitrate / fps);
+            m_param->rc.vbvBufferSize = (int)(m_param->rc.vbvMaxBitrate / m_fps);
             x265_log(m_param, X265_LOG_WARNING, "VBV buffer size cannot be smaller than one frame, using %d kbit\n",
                      m_param->rc.vbvBufferSize);
         }
@@ -465,8 +653,7 @@
                 vbvMaxBitrate = (hrd->getBitRateValueMinus1(0, 0, 0) + 1) << (hrd->getBitRateScale() + BR_SHIFT);
             }
         }
-
-        m_bufferRate = vbvMaxBitrate / fps;
+        m_bufferRate = vbvMaxBitrate / m_fps;
         m_vbvMaxRate = vbvMaxBitrate;
         m_bufferSize = vbvBufferSize;
         m_singleFrameVbv = m_bufferRate * 1.1 > m_bufferSize;
@@ -537,6 +724,261 @@
 
     #undef MAX_DURATION
 }
+ bool RateControl::initPass2()
+{
+    uint64_t allConstBits = 0;
+    double duration = 0;
+    for (int i = 0; i < m_numEntries; i++)
+        duration += m_rce2Pass[i].frameDuration;
+    uint64_t allAvailableBits = uint64_t(m_param->rc.bitrate * 1000. * duration);
+    double rateFactor, stepMult;
+    double qBlur = m_param->rc.qblur;
+    double cplxBlur = m_param->rc.complexityBlur;
+    const int filterSize = (int)(qBlur * 4) | 1;
+    double expectedBits;
+    double *qScale, *blurredQscale;
+    double baseCplx = m_ncu * (m_param->bframes ? 120 : 80);
+
+    /* find total/average complexity & const_bits */
+    for (int i = 0; i < m_numEntries; i++)
+    {
+        allConstBits += m_rce2Pass[i].miscBits;
+    }
+
+    if (allAvailableBits < allConstBits)
+    {
+        x265_log(m_param, X265_LOG_ERROR, "requested bitrate is too low. estimated minimum is %d kbps\n",
+                 (int)(allConstBits * m_fps / m_numEntries * 1000.));
+        return false;
+    }
+
+    /* Blur complexities, to reduce local fluctuation of QP.
+     * We don't blur the QPs directly, because then one very simple frame
+     * could drag down the QP of a nearby complex frame and give it more
+     * bits than intended. */
+    for (int i = 0; i < m_numEntries; i++)
+    {
+        double weightSum = 0;
+        double cplxSum = 0;
+        double weight = 1.0;
+        double gaussianWeight;
+        /* weighted average of cplx of future frames */
+        for (int j = 1; j < cplxBlur * 2 && j < m_numEntries - i; j++)
+        {
+            RateControlEntry *rcj = &m_rce2Pass[i + j];
+            double frameDuration = CLIP_DURATION(rcj->frameDuration) / BASE_FRAME_DURATION;
+            weight *= 1 - pow(rcj->iCuCount / m_ncu, 2);
+            if (weight < 0.0001)
+                break;
+            gaussianWeight = weight * exp(-j * j / 200.0);
+            weightSum += gaussianWeight;
+            cplxSum += gaussianWeight * (qScale2bits(rcj, 1) - rcj->miscBits) / frameDuration;
+        }
+        /* weighted average of cplx of past frames */
+        weight = 1.0;
+        for (int j = 0; j <= cplxBlur * 2 && j <= i; j++)
+        {
+            RateControlEntry *rcj = &m_rce2Pass[i - j];
+            double frameDuration = CLIP_DURATION(rcj->frameDuration) / BASE_FRAME_DURATION;
+            gaussianWeight = weight * exp(-j * j / 200.0);
+            weightSum += gaussianWeight;
+            cplxSum += gaussianWeight * (qScale2bits(rcj, 1) - rcj->miscBits) / frameDuration;
+            weight *= 1 - pow(rcj->iCuCount / m_ncu, 2);
+            if (weight < .0001)
+                break;
+        }
+        m_rce2Pass[i].blurredComplexity = cplxSum / weightSum;
+    }
+
+    CHECKED_MALLOC(qScale, double, m_numEntries);
+    if (filterSize > 1)
+    {
+        CHECKED_MALLOC(blurredQscale, double, m_numEntries);
+    }
+    else
+        blurredQscale = qScale;
+
+    /* Search for a factor which, when multiplied by the RCEQ values from
+     * each frame, adds up to the desired total size.
+     * There is no exact closed-form solution because of VBV constraints and
+     * because qscale2bits is not invertible, but we can start with the simple
+     * approximation of scaling the 1st pass by the ratio of bitrates.
+     * The search range is probably overkill, but speed doesn't matter here. */
+
+    expectedBits = 1;
+    for (int i = 0; i < m_numEntries; i++)
+    {
+        RateControlEntry* rce = &m_rce2Pass[i];
+        double q = getQScale(rce, 1.0);
+        expectedBits += qScale2bits(rce, q);
+        m_lastQScaleFor[rce->sliceType] = q;
+    }
+    stepMult = allAvailableBits / expectedBits;
+
+    rateFactor = 0;
+    for (double step = 1E4 * stepMult; step > 1E-7 * stepMult; step *= 0.5)
+    {
+        expectedBits = 0;
+        rateFactor += step;
+
+        m_lastNonBPictType = -1;
+        m_lastAccumPNorm = 1;
+        m_accumPNorm = 0;
+
+        m_lastQScaleFor[0] = m_lastQScaleFor[1] =
+        m_lastQScaleFor[2] = pow(baseCplx, 1 - m_qCompress) / rateFactor;
+
+        /* find qscale */
+        for (int i = 0; i < m_numEntries; i++)
+        {
+            RateControlEntry *rce = &m_rce2Pass[i];
+            qScale[i] = getQScale(rce, rateFactor);
+            m_lastQScaleFor[rce->sliceType] = qScale[i];
+        }
+
+        /* fixed I/B qscale relative to P */
+        for (int i = m_numEntries - 1; i >= 0; i--)
+        {
+            qScale[i] = getDiffLimitedQScale(&m_rce2Pass[i], qScale[i]);
+            assert(qScale[i] >= 0);
+        }
+
+        /* smooth curve */
+        if (filterSize > 1)
+        {
+            assert(filterSize % 2 == 1);
+            for (int i = 0; i < m_numEntries; i++)
+            {
+                double q = 0.0, sum = 0.0;
+
+                for (int j = 0; j < filterSize; j++)
+                {
+                    int idx = i + j - filterSize / 2;
+                    double d = idx - i;
+                    double coeff = qBlur == 0 ? 1.0 : exp(-d * d / (qBlur * qBlur));
+                    if (idx < 0 || idx >= m_numEntries)
+                        continue;
+                    if (m_rce2Pass[i].sliceType != m_rce2Pass[idx].sliceType)
+                        continue;
+                    q += qScale[idx] * coeff;
+                    sum += coeff;
+                }
+                blurredQscale[i] = q / sum;
+            }
+        }
+
+        /* find expected bits */
+        for (int i = 0; i < m_numEntries; i++)
+        {
+            RateControlEntry *rce = &m_rce2Pass[i];
+            rce->newQScale = clipQscale(NULL, blurredQscale[i]); // check if needed
+            assert(rce->newQScale >= 0);
+            expectedBits += qScale2bits(rce, rce->newQScale);
+        }
+
+        if (expectedBits > allAvailableBits)
+            rateFactor -= step;
+    }
+
+    X265_FREE(qScale);
+    if (filterSize > 1)
+        X265_FREE(blurredQscale);
+
+    if (m_isVbv)
+        if (vbv2Pass(allAvailableBits))
+            return false;
+    expectedBits = countExpectedBits();
+
+    if (fabs(expectedBits / allAvailableBits - 1.0) > 0.01)
+    {
+        double avgq = 0;
+        for (int i = 0; i < m_numEntries; i++)
+            avgq += m_rce2Pass[i].newQScale;
+        avgq = x265_qScale2qp(avgq / m_numEntries);
+
+        if (expectedBits > allAvailableBits || !m_isVbv)
+            x265_log(m_param, X265_LOG_WARNING, "Error: 2pass curve failed to converge\n");
+        x265_log(m_param, X265_LOG_WARNING, "target: %.2f kbit/s, expected: %.2f kbit/s, avg QP: %.4f\n",
+                 (double)m_param->rc.bitrate,
+                 expectedBits * m_fps / (m_numEntries * 1000.),
+                 avgq);
+        if (expectedBits < allAvailableBits && avgq < MIN_QP + 2)
+        {
+            x265_log(m_param, X265_LOG_WARNING, "try reducing target bitrate\n");
+        }
+        else if (expectedBits > allAvailableBits && avgq > MAX_QP - 2)
+        {
+            x265_log(m_param, X265_LOG_WARNING, "try increasing target bitrate\n");
+        }
+        else if (!(m_2pass && m_isVbv))
+            x265_log(m_param, X265_LOG_WARNING, "internal error\n");
+    }
+
+    return true;
+fail:
+    return false;
+}
+
+bool RateControl::vbv2Pass(uint64_t allAvailableBits)
+{
+    /* for each interval of bufferFull .. underflow, uniformly increase the qp of all
+     * frames in the interval until either buffer is full at some intermediate frame or the
+     * last frame in the interval no longer underflows.  Recompute intervals and repeat.
+     * Then do the converse to put bits back into overflow areas until target size is met */
+
+    double *fills;
+    double expectedBits = 0;
+    double adjustment;
+    double prevBits = 0;
+    int t0, t1;
+    int iterations = 0 , adjMin, adjMax;
+    CHECKED_MALLOC(fills, double, m_numEntries + 1);
+    fills++;
+
+    /* adjust overall stream size */
+    do
+    {
+        iterations++;
+        prevBits = expectedBits;
+
+        if (expectedBits)
+        {   /* not first iteration */
+            adjustment = X265_MAX(X265_MIN(expectedBits / allAvailableBits, 0.999), 0.9);
+            fills[-1] = m_bufferSize * m_param->rc.vbvBufferInit;
+            t0 = 0;
+            /* fix overflows */
+            adjMin = 1;
+            while (adjMin && findUnderflow(fills, &t0, &t1, 1))
+            {
+                adjMin = fixUnderflow(t0, t1, adjustment, MIN_QPSCALE, MAX_MAX_QPSCALE);
+                t0 = t1;
+            }
+        }
+
+        fills[-1] = m_bufferSize * (1. - m_param->rc.vbvBufferInit);
+        t0 = 0;
+        /* fix underflows -- should be done after overflow, as we'd better undersize target than underflowing VBV */
+        adjMax = 1;
+        while (adjMax && findUnderflow(fills, &t0, &t1, 0))
+        {
+            adjMax = fixUnderflow(t0, t1, 1.001, MIN_QPSCALE, MAX_MAX_QPSCALE );
+        }
+
+        expectedBits = countExpectedBits();
+    } while ((expectedBits < .995 * allAvailableBits) && ((int64_t)(expectedBits+.5) > (int64_t)(prevBits+.5)));
+
+    if (!adjMax)
+        x265_log(m_param, X265_LOG_WARNING, "vbv-maxrate issue, qpmax or vbv-maxrate too low\n");
+
+    /* store expected vbv filling values for tracking when encoding */
+    for (int i = 0; i < m_numEntries; i++)
+        m_rce2Pass[i].expectedVbv = m_bufferSize - fills[i];
+
+    X265_FREE(fills - 1);
+    return true;
+fail:
+    return false;
+}
 
 void RateControl::rateControlStart(Frame* pic, Lookahead *l, RateControlEntry* rce, Encoder* enc)
 {
@@ -620,6 +1062,127 @@
     else
         m_accumPQp += m_qp;
 }
+double RateControl::getDiffLimitedQScale(RateControlEntry *rce, double q)
+{
+    // force I/B quants as a function of P quants
+    const double lastPqScale    = m_lastQScaleFor[P_SLICE];
+    const double lastNonBqScale= m_lastQScaleFor[m_lastNonBPictType];
+    if (rce->sliceType == I_SLICE)
+    {
+        double iq = q;
+        double pq = x265_qp2qScale(m_accumPQp / m_accumPNorm);
+        double ipFactor = fabs(m_param->rc.ipFactor);
+        /* don't apply ipFactor if the following frame is also I */
+        if (m_accumPNorm <= 0)
+            q = iq;
+        else if (m_param->rc.ipFactor < 0)
+            q = iq / ipFactor;
+        else if (m_accumPNorm >= 1)
+            q = pq / ipFactor;
+        else
+            q = m_accumPNorm * pq / ipFactor + (1 - m_accumPNorm) * iq;
+    }
+    else if (rce->sliceType == B_SLICE)
+    {
+        if (m_param->rc.pbFactor > 0)
+            q = lastNonBqScale;
+        if (!rce->keptAsRef)
+            q *= fabs(m_param->rc.pbFactor);
+    }
+    else if (rce->sliceType == P_SLICE
+             && m_lastNonBPictType == P_SLICE
+             && rce->coeffBits == 0)
+    {
+        q = lastPqScale;
+    }
+
+    /* last qscale / qdiff stuff */
+    if (m_lastNonBPictType == rce->sliceType &&
+        (rce->sliceType != I_SLICE || m_lastAccumPNorm < 1))
+    {
+        double maxQscale = m_lastQScaleFor[rce->sliceType] * m_lstep;
+        double minQscale = m_lastQScaleFor[rce->sliceType] / m_lstep;
+        q = Clip3(minQscale, maxQscale, q);
+    }
+
+    m_lastQScaleFor[rce->sliceType] = q;
+    if (rce->sliceType != B_SLICE)
+        m_lastNonBPictType = rce->sliceType;
+    if (rce->sliceType == I_SLICE)
+    {
+        m_lastAccumPNorm = m_accumPNorm;
+        m_accumPNorm = 0;
+        m_accumPQp = 0;
+    }
+    if (rce->sliceType == P_SLICE)
+    {
+        double mask = 1 - pow(rce->iCuCount / m_ncu, 2);
+        m_accumPQp   = mask * (x265_qScale2qp(q) + m_accumPQp);
+        m_accumPNorm = mask * (1 + m_accumPNorm);
+    }
+
+    return q;
+}
+
+double RateControl::countExpectedBits()
+{
+    double expectedBits = 0;
+    for( int i = 0; i < m_numEntries; i++ )
+    {
+        RateControlEntry *rce = &m_rce2Pass[i];
+        rce->expectedBits = (uint64_t)expectedBits;
+        expectedBits += qScale2bits(rce, rce->newQScale);
+    }
+    return expectedBits;
+}
+
+bool RateControl::findUnderflow(double *fills, int *t0, int *t1, int over)
+{
+    /* find an interval ending on an overflow or underflow (depending on whether
+     * we're adding or removing bits), and starting on the earliest frame that
+     * can influence the buffer fill of that end frame. */
+    const double bufferMin = .1 * m_bufferSize;
+    const double bufferMax = .9 * m_bufferSize;
+    double fill = fills[*t0 - 1];
+    double parity = over ? 1. : -1.;
+    int start = -1, end = -1;
+    for (int i = *t0; i < m_numEntries; i++)
+    {
+        fill += (m_rce2Pass[i].frameDuration * m_vbvMaxRate -
+                 qScale2bits(&m_rce2Pass[i], m_rce2Pass[i].newQScale)) * parity;
+        fill = Clip3(0.0, m_bufferSize, fill);
+        fills[i] = fill;
+        if (fill <= bufferMin || i == 0)
+        {
+            if (end >= 0)
+                break;
+            start = i;
+        }
+        else if (fill >= bufferMax && start >= 0)
+            end = i;
+    }
+    *t0 = start;
+    *t1 = end;
+    return start >= 0 && end >= 0;
+}
+
+bool RateControl::fixUnderflow(int t0, int t1, double adjustment, double qscaleMin, double qscaleMax)
+{
+    double qscaleOrig, qscaleNew;
+    bool adjusted = false;
+    if (t0 > 0)
+        t0++;
+    for (int i = t0; i <= t1; i++)
+    {
+        qscaleOrig = m_rce2Pass[i].newQScale;
+        qscaleOrig = Clip3(qscaleMin, qscaleMax, qscaleOrig);
+        qscaleNew  = qscaleOrig * adjustment;
+        qscaleNew  = Clip3(qscaleMin, qscaleMax, qscaleNew);
+        m_rce2Pass[i].newQScale = qscaleNew;
+        adjusted = adjusted || (qscaleNew != qscaleOrig);
+    }
+    return adjusted;
+}
 
 double RateControl::rateEstimateQscale(Frame* pic, RateControlEntry *rce)
 {
@@ -694,7 +1257,7 @@
         m_shortTermCplxSum += m_currentSatd / (CLIP_DURATION(m_frameDuration) / BASE_FRAME_DURATION);
         m_shortTermCplxCount++;
         /* coeffBits to be used in 2-pass */
-        rce->coeffBits = m_currentSatd;
+        rce->coeffBits = (int)m_currentSatd;
         rce->blurredComplexity = m_shortTermCplxSum / m_shortTermCplxCount;
         rce->mvBits = 0;
         rce->sliceType = m_sliceType;
@@ -759,15 +1322,10 @@
             double lqmax = x265_qp2qScale(ABR_INIT_QP_MAX) * m_lstep;
             q = X265_MIN(lqmax, q);
         }
-
         q = Clip3(MIN_QPSCALE, MAX_MAX_QPSCALE, q);
         rce->qpNoVbv = x265_qScale2qp(q);
-
-        if (m_isVbv && m_currentSatd > 0)
-            q = clipQscale(pic, q);
-
+        q = clipQscale(pic, q);
         m_lastQScaleFor[m_sliceType] = q;
-
         if (m_curSlice->getPOC() == 0 || m_lastQScaleFor[P_SLICE] < q)
             m_lastQScaleFor[P_SLICE] = q * fabs(m_param->rc.ipFactor);
 
@@ -845,128 +1403,134 @@
     // B-frames are not directly subject to VBV,
     // since they are controlled by referenced P-frames' QPs.
     double q0 = q;
+    if (m_isVbv && m_currentSatd > 0)
+    {
+        if (m_param->lookaheadDepth || m_param->rc.cuTree ||
+            m_param->scenecutThreshold ||
+            (m_param->bFrameAdaptive && m_param->bframes))
+        {
+           /* Lookahead VBV: If lookahead is done, raise the quantizer as necessary
+            * such that no frames in the lookahead overflow and such that the buffer
+            * is in a reasonable state by the end of the lookahead. */
+            int terminate = 0;
+            /* Avoid an infinite loop. */
+            for (int iterations = 0; iterations < 1000 && terminate != 3; iterations++)
+            {
+                double frameQ[3];
+                double curBits = predictSize(&m_pred[m_sliceType], q, (double)m_currentSatd);
+                double bufferFillCur = m_bufferFill - curBits;
+                double targetFill;
+                double totalDuration = 0;
+                frameQ[P_SLICE] = m_sliceType == I_SLICE ? q * m_param->rc.ipFactor : q;
+                frameQ[B_SLICE] = frameQ[P_SLICE] * m_param->rc.pbFactor;
+                frameQ[I_SLICE] = frameQ[P_SLICE] / m_param->rc.ipFactor;
+                /* Loop over the planned future frames. */
+                for (int j = 0; bufferFillCur >= 0 && bufferFillCur <= m_bufferSize; j++)
+                {
+                    totalDuration += m_frameDuration;
+                    bufferFillCur += m_vbvMaxRate * m_frameDuration;
+                    int type = pic->m_lowres.plannedType[j];
+                    int64_t satd = pic->m_lowres.plannedSatd[j] >> (X265_DEPTH - 8);
+                    if (type == X265_TYPE_AUTO)
+                        break;
+                    type = IS_X265_TYPE_I(type) ? I_SLICE : IS_X265_TYPE_B(type) ? B_SLICE : P_SLICE;
+                    curBits = predictSize(&m_pred[type], frameQ[type], (double)satd);
+                    bufferFillCur -= curBits;
+                }
 
-    if (m_param->lookaheadDepth || m_param->rc.cuTree ||
-       m_param->scenecutThreshold ||
-       (m_param->bFrameAdaptive && m_param->bframes))
-    {
-       /* Lookahead VBV: If lookahead is done, raise the quantizer as necessary
-        * such that no frames in the lookahead overflow and such that the buffer
-        * is in a reasonable state by the end of the lookahead. */
+                /* Try to get the buffer at least 50% filled, but don't set an impossible goal. */
+                targetFill = X265_MIN(m_bufferFill + totalDuration * m_vbvMaxRate * 0.5, m_bufferSize * 0.5);
+                if (bufferFillCur < targetFill)
+                {
+                    q *= 1.01;
+                    terminate |= 1;
+                    continue;
+                }
+                /* Try to get the buffer no more than 80% filled, but don't set an impossible goal. */
+                targetFill = Clip3(m_bufferSize * 0.8, m_bufferSize, m_bufferFill - totalDuration * m_vbvMaxRate * 0.5);
+                if (m_isCbr && bufferFillCur > targetFill)
+                {
+                    q /= 1.01;
+                    terminate |= 2;
+                    continue;
+                }
+                break;
+            }
+        }
+        else
+        {
+            /* Fallback to old purely-reactive algorithm: no lookahead. */
+            if ((m_sliceType == P_SLICE ||
+                    (m_sliceType == I_SLICE && m_lastNonBPictType == I_SLICE)) &&
+                m_bufferFill / m_bufferSize < 0.5)
+            {
+                q /= Clip3(0.5, 1.0, 2.0 * m_bufferFill / m_bufferSize);
+            }
+            // Now a hard threshold to make sure the frame fits in VBV.
+            // This one is mostly for I-frames.
+            double bits = predictSize(&m_pred[m_sliceType], q, (double)m_currentSatd);
 
-        int terminate = 0;
+            // For small VBVs, allow the frame to use up the entire VBV.
+            double maxFillFactor;
+            maxFillFactor = m_bufferSize >= 5 * m_bufferRate ? 2 : 1;
+            // For single-frame VBVs, request that the frame use up the entire VBV.
+            double minFillFactor = m_singleFrameVbv ? 1 : 2;
 
-        /* Avoid an infinite loop. */
-        for (int iterations = 0; iterations < 1000 && terminate != 3; iterations++)
-        {
-            double frameQ[3];
-            double curBits = predictSize(&m_pred[m_sliceType], q, (double)m_currentSatd);
-            double bufferFillCur = m_bufferFill - curBits;
-            double targetFill;
-            double totalDuration = 0;
-            frameQ[P_SLICE] = m_sliceType == I_SLICE ? q * m_param->rc.ipFactor : q;
-            frameQ[B_SLICE] = frameQ[P_SLICE] * m_param->rc.pbFactor;
-            frameQ[I_SLICE] = frameQ[P_SLICE] / m_param->rc.ipFactor;
-            /* Loop over the planned future frames. */
-            for (int j = 0; bufferFillCur >= 0 && bufferFillCur <= m_bufferSize; j++)
+            for (int iterations = 0; iterations < 10; iterations++)
             {
-                totalDuration += m_frameDuration;
-                bufferFillCur += m_vbvMaxRate * m_frameDuration;
-                int type = pic->m_lowres.plannedType[j];
-                int64_t satd = pic->m_lowres.plannedSatd[j] >> (X265_DEPTH - 8);
-                if (type == X265_TYPE_AUTO)
-                    break;
-                type = IS_X265_TYPE_I(type) ? I_SLICE : IS_X265_TYPE_B(type) ? B_SLICE : P_SLICE;
-                curBits = predictSize(&m_pred[type], frameQ[type], (double)satd);
-                bufferFillCur -= curBits;
+                double qf = 1.0;
+                if (bits > m_bufferFill / maxFillFactor)
+                    qf = Clip3(0.2, 1.0, m_bufferFill / (maxFillFactor * bits));
+                q /= qf;
+                bits *= qf;
+                if (bits < m_bufferRate / minFillFactor)
+                    q *= bits * minFillFactor / m_bufferRate;
+                bits = predictSize(&m_pred[m_sliceType], q, (double)m_currentSatd);
             }
 
-            /* Try to get the buffer at least 50% filled, but don't set an impossible goal. */
-            targetFill = X265_MIN(m_bufferFill + totalDuration * m_vbvMaxRate * 0.5, m_bufferSize * 0.5);
-            if (bufferFillCur < targetFill)
+            q = X265_MAX(q0, q);
+        }
+
+        // Check B-frame complexity, and use up any bits that would
+        // overflow before the next P-frame.
+        if (m_sliceType == P_SLICE && !m_singleFrameVbv)
+        {
+            int nb = m_bframes;
+            double bits = predictSize(&m_pred[m_sliceType], q, (double)m_currentSatd);
+            double bbits = predictSize(&m_predBfromP, q * m_param->rc.pbFactor, (double)m_currentSatd);
+            double space;
+            if (bbits > m_bufferRate)
+                nb = 0;
+            double pbbits = nb * bbits;
+
+            space = m_bufferFill + (1 + nb) * m_bufferRate - m_bufferSize;
+            if (pbbits < space)
             {
-                q *= 1.01;
-                terminate |= 1;
-                continue;
+                q *= X265_MAX(pbbits / space, bits / (0.5 * m_bufferSize));
             }
-            /* Try to get the buffer no more than 80% filled, but don't set an impossible goal. */
-            targetFill = Clip3(m_bufferSize * 0.8, m_bufferSize, m_bufferFill - totalDuration * m_vbvMaxRate * 0.5);
-            if (m_isCbr && bufferFillCur > targetFill)
-            {
-                q /= 1.01;
-                terminate |= 2;
-                continue;
-            }
-            break;
+            q = X265_MAX(q0 / 2, q);
+        }
+        if (!m_isCbr)
+            q = X265_MAX(q0, q);
+
+        if (m_rateFactorMaxIncrement)
+        {
+            double qpNoVbv = x265_qScale2qp(q0);
+            double qmax = X265_MIN(MAX_MAX_QPSCALE,x265_qp2qScale(qpNoVbv + m_rateFactorMaxIncrement));
+            return Clip3(MIN_QPSCALE, qmax, q);
         }
     }
-    else
+    if (m_2pass)
     {
-        /* Fallback to old purely-reactive algorithm: no lookahead. */
-        if ((m_sliceType == P_SLICE ||
-                (m_sliceType == I_SLICE && m_lastNonBPictType == I_SLICE)) &&
-            m_bufferFill / m_bufferSize < 0.5)
-        {
-            q /= Clip3(0.5, 1.0, 2.0 * m_bufferFill / m_bufferSize);
-        }
-
-        // Now a hard threshold to make sure the frame fits in VBV.
-        // This one is mostly for I-frames.
-        double bits = predictSize(&m_pred[m_sliceType], q, (double)m_currentSatd);
-
-        // For small VBVs, allow the frame to use up the entire VBV.
-        double maxFillFactor;
-        maxFillFactor = m_bufferSize >= 5 * m_bufferRate ? 2 : 1;
-        // For single-frame VBVs, request that the frame use up the entire VBV.
-        double minFillFactor = m_singleFrameVbv ? 1 : 2;
-
-        for (int iterations = 0; iterations < 10; iterations++)
-        {
-            double qf = 1.0;
-            if (bits > m_bufferFill / maxFillFactor)
-                qf = Clip3(0.2, 1.0, m_bufferFill / (maxFillFactor * bits));
-            q /= qf;
-            bits *= qf;
-            if (bits < m_bufferRate / minFillFactor)
-                q *= bits * minFillFactor / m_bufferRate;
-            bits = predictSize(&m_pred[m_sliceType], q, (double)m_currentSatd);
-        }
-
-        q = X265_MAX(q0, q);
+        double min = log(MIN_QPSCALE);
+        double max = log(MAX_MAX_QPSCALE);
+        q = (log(q) - min) / (max - min) - 0.5;
+        q = 1.0 / (1.0 + exp(-4 * q));
+        q = q*(max - min) + min;
+        return exp(q);
     }
-
-    // Check B-frame complexity, and use up any bits that would
-    // overflow before the next P-frame.
-    if (m_sliceType == P_SLICE && !m_singleFrameVbv)
-    {
-        int nb = m_bframes;
-        double bits = predictSize(&m_pred[m_sliceType], q, (double)m_currentSatd);
-        double bbits = predictSize(&m_predBfromP, q * m_param->rc.pbFactor, (double)m_currentSatd);
-        double space;
-        if (bbits > m_bufferRate)
-            nb = 0;
-        double pbbits = nb * bbits;
-
-        space = m_bufferFill + (1 + nb) * m_bufferRate - m_bufferSize;
-        if (pbbits < space)
-        {
-            q *= X265_MAX(pbbits / space, bits / (0.5 * m_bufferSize));
-        }
-        q = X265_MAX(q0 / 2, q);
-    }
-    if (!m_isCbr)
-        q = X265_MAX(q0, q);
-
-    if (m_rateFactorMaxIncrement)
-    {
-        double qpNoVbv = x265_qScale2qp(q0);
-        double qmax = X265_MIN(MAX_MAX_QPSCALE,x265_qp2qScale(qpNoVbv + m_rateFactorMaxIncrement));
-        return Clip3(MIN_QPSCALE, qmax, q);
-    }
-
     return Clip3(MIN_QPSCALE, MAX_MAX_QPSCALE, q);
 }
-
 double RateControl::predictRowsSizeSum(Frame* pic, RateControlEntry* rce, double qpVbv, int32_t & encodedBitsSoFar)
 {
     uint32_t rowSatdCostSoFar = 0, totalSatdBits = 0;
@@ -1179,10 +1743,14 @@
     }
     else
         q = pow(rce->blurredComplexity, 1 - m_param->rc.qCompress);
-
-    m_lastRceq = q;
-    q /= rateFactor;
-
+    // avoid NaN's in the Rceq
+    if (rce->coeffBits + rce->mvBits == 0)
+        q = m_lastQScaleFor[rce->sliceType];
+    else
+    {
+        m_lastRceq = q;
+        q /= rateFactor;
+    }
     return q;
 }
 
@@ -1281,16 +1849,16 @@
                             : rce->sliceType == P_SLICE ? (pic->getSlice()->isReferenced()? 'P' : 'p')
                             : pic->getSlice()->isReferenced()? 'B' : 'b';
                 if (fprintf(m_statFileOut,
-                         "in:%d out:%d type:%c dur:%.3f q:%.2f q-aq:%.2f tex:%d mv:%d misc:%d imb:%.2f pmb:%.2f smb:%.2f ",
+                         "in:%d out:%d type:%c dur:%.3f q:%.2f q-aq:%.2f tex:%d mv:%d misc:%d icu:%.2f pcu:%.2f scu:%.2f ",
                          rce->poc, rce->encodeOrder,
                          cType, m_frameDuration,
                          pic->m_avgQpRc, pic->m_avgQpAq,
                          stats->coeffBits,
                          stats->mvBits,
                          stats->miscBits,
-                         stats->cuCount_i,
-                         stats->cuCount_p,
-                         stats->cuCount_skip) < 0)
+                         stats->cuCount_i * m_ncu,
+                         stats->cuCount_p * m_ncu,
+                         stats->cuCount_skip * m_ncu) < 0)
                     goto writeFailure;
                 if (fprintf(m_statFileOut, ";\n") < 0)
                     goto writeFailure;
@@ -1308,22 +1876,23 @@
             }
             /* amortize part of each I slice over the next several frames, up to
              * keyint-max, to avoid over-compensating for the large I slice cost */
-            if (rce->sliceType == I_SLICE)
+            if (!m_param->rc.bStatWrite && !m_param->rc.bStatRead)
             {
-                /* previous I still had a residual; roll it into the new loan */
-                if (m_residualFrames)
-                    bits += m_residualCost * m_residualFrames;
-
-                m_residualFrames = X265_MIN(s_amortizeFrames, m_param->keyframeMax);
-                m_residualCost = (int)((bits * s_amortizeFraction) / m_residualFrames);
-                bits -= m_residualCost * m_residualFrames;
+                if (rce->sliceType == I_SLICE)
+                {
+                    /* previous I still had a residual; roll it into the new loan */
+                    if (m_residualFrames)
+                        bits += m_residualCost * m_residualFrames;
+                    m_residualFrames = X265_MIN(s_amortizeFrames, m_param->keyframeMax);
+                    m_residualCost = (int)((bits * s_amortizeFraction) / m_residualFrames);
+                    bits -= m_residualCost * m_residualFrames;
+                }
+                else if (m_residualFrames)
+                {
+                    bits += m_residualCost;
+                    m_residualFrames--;
+                }
             }
-            else if (m_residualFrames)
-            {
-                bits += m_residualCost;
-                m_residualFrames--;
-            }
-
             if (rce->sliceType != B_SLICE)
                 /* The factor 1.5 is to tune up the actual bits, otherwise the cplxrSum is scaled too low
                  * to improve short term compensation for next frame. */
@@ -1431,10 +2000,10 @@
            unlink(newFileName);
            bError = rename(tmpFileName, newFileName);
         }
-        if (!bError)
+        if (bError)
         {
             x265_log(m_param, X265_LOG_ERROR, "failed to rename cutree output stats file to \"%s\"\n",
-                     m_param->rc.statFileName);
+                     newFileName);
         }
         X265_FREE(tmpFileName);
         X265_FREE(newFileName);
@@ -1446,3 +2015,4 @@
     for (int i = 0; i < 2; i++)
         X265_FREE(m_cuTreeStats.qpBuffer[i]);
 }
+
diff -r fc4c54e9f211 -r 33d55e581aac source/encoder/ratecontrol.h
--- a/source/encoder/ratecontrol.h	Fri Jul 11 16:32:41 2014 +0530
+++ b/source/encoder/ratecontrol.h	Fri Jul 11 16:35:29 2014 +0530
@@ -50,13 +50,10 @@
     double decay;
     double offset;
 };
-
 struct RateControlEntry
 {
-    int64_t coeffBits;  /* Required in 2-pass rate control */
     int64_t lastSatd; /* Contains the picture cost of the previous frame, required for resetAbr and VBV */
     int sliceType;
-    int mvBits;
     int bframes;
     int poc;
     int encodeOrder;
@@ -64,21 +61,34 @@
     bool bLastMiniGopBFrame;
     double blurredComplexity;
     double qpaRc;
+    double qpAq;
     double qRceq;
     double frameSizePlanned;  /* frame Size decided by RateCotrol before encoding the frame */
     double bufferRate;
     double movingAvgSum;
     double qpNoVbv;
     double bufferFill;
+    double frameDuration;
     Predictor rowPreds[3][2];
     Predictor* rowPred[2];
     double frameSizeEstimated;  /* hold frameSize, updated from cu level vbv rc */
     bool isActive;
-
     SEIPictureTiming *picTimingSEI;
     HRDTiming        *hrdTiming;
+    /* Required in 2-pass rate control */
+    double iCuCount;
+    double pCuCount;
+    double skipCuCount;
+    bool keptAsRef;
+    double expectedVbv;
+    double qScale;
+    double newQScale;
+    double newQp;
+    int mvBits;
+    int miscBits;
+    int coeffBits;
+    uint64_t expectedBits; /*total expected bits up to the current frame (current one excluded)*/
 };
-
 class RateControl
 {
 public:
@@ -131,10 +141,9 @@
     double   m_shortTermCplxCount;
     double   m_lastRceq;
     double   m_qCompress;
-
     int64_t  m_totalBits;        /* total bits used for already encoded frames */
     int      m_framesDone;       /* # of frames passed through RateCotrol already */
-
+    double   m_fps;
     /* hrd stuff */
     SEIBufferingPeriod m_bufPeriodSEI;
     double   m_nominalRemovalTime;
@@ -145,7 +154,9 @@
     FILE*    m_statFileOut;
     FILE*    m_cutreeStatFileOut;
     FILE*    m_cutreeStatFileIn;
-
+    int      m_numEntries;
+    RateControlEntry *m_rce2Pass;
+    double   m_lastAccumPNorm;
     struct
     {
         uint16_t *qpBuffer[2]; /* Global buffers for converting MB-tree quantizer data. */
@@ -184,6 +195,12 @@
     double predictSize(Predictor *p, double q, double var);
     void checkAndResetABR(RateControlEntry* rce, bool isFrameDone);
     double predictRowsSizeSum(Frame* pic, RateControlEntry* rce, double qpm, int32_t& encodedBits);
+    bool initPass2();
+    double getDiffLimitedQScale(RateControlEntry *rce, double q);
+    double countExpectedBits();
+    bool RateControl::vbv2Pass(uint64_t allAvailableBits);
+    bool findUnderflow(double *fills, int *t0, int *t1, int over);
+    bool fixUnderflow(int t0, int t1, double adjustment, double qscaleMin, double qscaleMax);
 };
 }
 #endif // ifndef X265_RATECONTROL_H



More information about the x265-devel mailing list