[x265] [PATCH 1 of 4] rc: add 2 pass logic in rateEstimateQscale

aarthi at multicorewareinc.com aarthi at multicorewareinc.com
Mon Jul 21 09:32:46 CEST 2014


# HG changeset patch
# User Aarthi Thirumalai
# Date 1405684160 -19800
#      Fri Jul 18 17:19:20 2014 +0530
# Node ID d9296f2aad7210305efd49cb949407a23ef8ded3
# Parent  eb983d29c11acc03b91e07fe93c31503fa3a4732
rc: add 2 pass logic in rateEstimateQscale

diff -r eb983d29c11a -r d9296f2aad72 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Thu Jul 17 09:29:39 2014 +0200
+++ b/source/encoder/encoder.cpp	Fri Jul 18 17:19:20 2014 +0530
@@ -41,6 +41,10 @@
 
 #include "x265.h"
 
+namespace x265 {
+const char g_sliceTypeToChar[] = {'B', 'P', 'I'};
+}
+
 static const char *summaryCSVHeader =
     "Command, Date/Time, Elapsed Time, FPS, Bitrate, "
     "Y PSNR, U PSNR, V PSNR, Global PSNR, SSIM, SSIM (dB), "
@@ -230,6 +234,8 @@
             rc->m_bufferFill = X265_MAX(rc->m_bufferFill, 0);
             rc->m_bufferFill += encoder->m_rce.bufferRate;
             rc->m_bufferFill = X265_MIN(rc->m_bufferFill, rc->m_bufferSize);
+            if (rc->m_2pass)
+                rc->m_predictedBits += (int64_t)encoder->m_rce.frameSizeEstimated;
         }
         encIdx = (encIdx + 1) % m_param->frameNumThreads;
     }
diff -r eb983d29c11a -r d9296f2aad72 source/encoder/encoder.h
--- a/source/encoder/encoder.h	Thu Jul 17 09:29:39 2014 +0200
+++ b/source/encoder/encoder.h	Fri Jul 18 17:19:20 2014 +0530
@@ -34,6 +34,7 @@
 
 namespace x265 {
 // private namespace
+extern const char g_sliceTypeToChar[3];
 
 class SBac;
 
diff -r eb983d29c11a -r d9296f2aad72 source/encoder/ratecontrol.cpp
--- a/source/encoder/ratecontrol.cpp	Thu Jul 17 09:29:39 2014 +0200
+++ b/source/encoder/ratecontrol.cpp	Fri Jul 18 17:19:20 2014 +0530
@@ -122,6 +122,23 @@
            + rce->miscBits;
 }
 
+inline void copyRceData(RateControlEntry* rce, RateControlEntry* rce2Pass)
+{
+    rce->coeffBits = rce2Pass->coeffBits;
+    rce->mvBits = rce2Pass->mvBits;
+    rce->miscBits = rce2Pass->miscBits;
+    rce->iCuCount = rce2Pass->iCuCount;
+    rce->pCuCount = rce2Pass->pCuCount;
+    rce->skipCuCount = rce2Pass->skipCuCount;
+    rce->keptAsRef = rce2Pass->keptAsRef;
+    rce->qScale = rce2Pass->qScale;
+    rce->newQScale = rce2Pass->newQScale;
+    rce->expectedBits = rce2Pass->expectedBits;
+    rce->expectedVbv = rce2Pass->expectedVbv;
+    rce->blurredComplexity = rce2Pass->blurredComplexity;
+    rce->sliceType = rce2Pass->sliceType;
+}
+
 }  // end anonymous namespace
 /* Compute variance to derive AC energy of each block */
 static inline uint32_t acEnergyVar(Frame *pic, uint64_t sum_ssd, int shift, int i)
@@ -429,6 +446,7 @@
         /* Load stat file and init 2pass algo */
         if (m_param->rc.bStatRead)
         {
+            m_expectedBitsSum = 0;
             char *p, *statsIn, *statsBuf;
             /* read 1st pass stats */
             statsIn = statsBuf = x265_slurp_file(fileName);
@@ -555,7 +573,6 @@
                 if (next)
                     *next++ = 0;
                 e = sscanf(p, " in:%d ", &frameNumber);
-
                 if (frameNumber < 0 || frameNumber >= m_numEntries)
                 {
                     x265_log(m_param, X265_LOG_ERROR, "bad frame number (%d) at stats line %d\n", frameNumber, i);
@@ -767,7 +784,7 @@
         for (int j = 1; j < cplxBlur * 2 && j < m_numEntries - i; j++)
         {
             RateControlEntry *rcj = &m_rce2Pass[i + j];
-            double frameDuration = CLIP_DURATION(rcj->frameDuration) / BASE_FRAME_DURATION;
+            double frameDuration = CLIP_DURATION(rcj->frameDuration)/ BASE_FRAME_DURATION;
             weight *= 1 - pow(rcj->iCuCount / m_ncu, 2);
             if (weight < 0.0001)
                 break;
@@ -999,6 +1016,11 @@
     m_curSlice = pic->m_picSym->m_slice;
     m_sliceType = m_curSlice->m_sliceType;
     rce->sliceType = m_sliceType;
+    if (m_param->rc.bStatRead)
+    {
+        assert(rce->encodeOrder >= 0 && rce->encodeOrder < m_numEntries);
+        copyRceData(rce, &m_rce2Pass[rce->encodeOrder]);
+    }
     rce->isActive = true;
     if (m_sliceType == B_SLICE)
         rce->bframes = m_bframes;
@@ -1027,6 +1049,7 @@
         }
         rce->rowPred[0] = &rce->rowPreds[m_sliceType][0];
         rce->rowPred[1] = &rce->rowPreds[m_sliceType][1];
+        m_predictedBits = m_totalBits;
         updateVbvPlan(enc);
         rce->bufferFill = m_bufferFill;
     }
@@ -1066,6 +1089,7 @@
     // Do not increment m_startEndOrder here. Make rateControlEnd of previous thread
     // to wait until rateControlUpdateStats of this frame is called
     m_framesDone++;
+    rce->newQp = m_qp;
     /* set the final QP to slice structure */
     m_curSlice->m_sliceQp = m_qp;
 }
@@ -1207,6 +1231,14 @@
 {
     double q;
 
+    if (m_2pass)
+    {
+        if (m_sliceType != rce->sliceType)
+        {
+            x265_log(m_param, X265_LOG_ERROR, "slice=%c but 2pass stats say %c\n",
+                     g_sliceTypeToChar[m_sliceType], g_sliceTypeToChar[rce->sliceType]);
+        }
+    }
     if (m_sliceType == B_SLICE)
     {
         /* B-frames don't have independent rate control, but rather get the
@@ -1251,109 +1283,170 @@
         double qScale = x265_qp2qScale(q);
         rce->frameSizePlanned = predictSize(&m_predBfromP, qScale, (double)m_leadingNoBSatd);
         rce->frameSizeEstimated = rce->frameSizePlanned;
+        rce->newQScale = qScale;
         return qScale;
     }
     else
     {
         double abrBuffer = 2 * m_param->rc.rateTolerance * m_bitrate;
-
-        /* 1pass ABR */
-
-        /* Calculate the quantizer which would have produced the desired
-         * average bitrate if it had been applied to all frames so far.
-         * Then modulate that quant based on the current frame's complexity
-         * relative to the average complexity so far (using the 2pass RCEQ).
-         * Then bias the quant up or down if total size so far was far from
-         * the target.
-         * Result: Depending on the value of rate_tolerance, there is a
-         * tradeoff between quality and bitrate precision. But at large
-         * tolerances, the bit distribution approaches that of 2pass. */
-
-        double wantedBits, overflow = 1;
-        rce->movingAvgSum = m_shortTermCplxSum;
-        m_shortTermCplxSum *= 0.5;
-        m_shortTermCplxCount *= 0.5;
-        m_shortTermCplxSum += m_currentSatd / (CLIP_DURATION(m_frameDuration) / BASE_FRAME_DURATION);
-        m_shortTermCplxCount++;
-        /* coeffBits to be used in 2-pass */
-        rce->coeffBits = (int)m_currentSatd;
-        rce->blurredComplexity = m_shortTermCplxSum / m_shortTermCplxCount;
-        rce->mvBits = 0;
-        rce->sliceType = m_sliceType;
-
-        if (m_param->rc.rateControlMode == X265_RC_CRF)
+        if (m_2pass)
         {
-            q = getQScale(rce, m_rateFactorConstant);
+            int64_t diff;
+            if (!m_isVbv)
+            {
+                m_predictedBits = m_totalBits;
+                if (rce->encodeOrder < m_param->frameNumThreads)
+                    m_predictedBits += (int64_t)(rce->encodeOrder * m_bitrate / m_fps);
+                else
+                    m_predictedBits += (int64_t)(m_param->frameNumThreads * m_bitrate / m_fps);
+            }
+            /* Adjust ABR buffer based on distance to the end of the video. */
+            if (m_numEntries > rce->encodeOrder)
+            {
+                uint64_t finalBits = m_rce2Pass[m_numEntries - 1].expectedBits;
+                double videoPos = (double)rce->expectedBits / finalBits;
+                double scaleFactor = sqrt((1 - videoPos) * m_numEntries);
+                abrBuffer *= 0.5 * X265_MAX(scaleFactor, 0.5);
+            }
+            diff = m_predictedBits - (int64_t)rce->expectedBits;
+            q = rce->newQScale;
+            q /= Clip3(0.5, 2.0, (double)(abrBuffer - diff) / abrBuffer);
+            if (((rce->encodeOrder + 1 - m_param->frameNumThreads) >= m_fps) &&
+                (m_expectedBitsSum > 0))
+            {
+                /* Adjust quant based on the difference between
+                 * achieved and expected bitrate so far */
+                double curTime = (double)rce->encodeOrder / m_numEntries;
+                double w = Clip3(0.0, 1.0, curTime * 100);
+                q *= pow((double)m_totalBits / m_expectedBitsSum, w);
+            }
+            rce->qpNoVbv = x265_qScale2qp(q);
+            if (m_isVbv)
+            {
+                /* Do not overflow vbv */
+                double expectedSize = qScale2bits(rce, q);
+                double expectedVbv = m_bufferFill + m_bufferRate - expectedSize;
+                double expectedFullness = rce->expectedVbv / m_bufferSize;
+                double qmax = q * (2 - expectedFullness);
+                double sizeConstraint = 1 + expectedFullness;
+                qmax = X265_MAX(qmax, rce->newQScale);
+                if (expectedFullness < .05)
+                    qmax = MAX_MAX_QPSCALE;
+                qmax = X265_MIN(qmax, MAX_MAX_QPSCALE);
+                while (((expectedVbv < rce->expectedVbv/sizeConstraint) && (q < qmax)) ||
+                        ((expectedVbv < 0) && (q < MAX_MAX_QPSCALE)))
+                {
+                    q *= 1.05;
+                    expectedSize = qScale2bits(rce, q);
+                    expectedVbv = m_bufferFill + m_bufferRate - expectedSize;
+                }
+            }
+            q = Clip3(MIN_QPSCALE, MAX_MAX_QPSCALE, q);
         }
         else
-        {
-            if (!m_param->rc.bStatRead)
-                checkAndResetABR(rce, false);
-            q = getQScale(rce, m_wantedBitsWindow / m_cplxrSum);
+            {
+            /* 1pass ABR */
 
-            /* ABR code can potentially be counterproductive in CBR, so just
-             * don't bother.  Don't run it if the frame complexity is zero
-             * either. */
-            if (!m_isCbr && m_currentSatd)
+            /* Calculate the quantizer which would have produced the desired
+             * average bitrate if it had been applied to all frames so far.
+             * Then modulate that quant based on the current frame's complexity
+             * relative to the average complexity so far (using the 2pass RCEQ).
+             * Then bias the quant up or down if total size so far was far from
+             * the target.
+             * Result: Depending on the value of rate_tolerance, there is a
+             * tradeoff between quality and bitrate precision. But at large
+             * tolerances, the bit distribution approaches that of 2pass. */
+
+            double wantedBits, overflow = 1;
+            rce->movingAvgSum = m_shortTermCplxSum;
+            m_shortTermCplxSum *= 0.5;
+            m_shortTermCplxCount *= 0.5;
+            m_shortTermCplxSum += m_currentSatd / (CLIP_DURATION(m_frameDuration) / BASE_FRAME_DURATION);
+            m_shortTermCplxCount++;
+            /* coeffBits to be used in 2-pass */
+            rce->coeffBits = (int)m_currentSatd;
+            rce->blurredComplexity = m_shortTermCplxSum / m_shortTermCplxCount;
+            rce->mvBits = 0;
+            rce->sliceType = m_sliceType;
+
+            if (m_param->rc.rateControlMode == X265_RC_CRF)
             {
-                /* use framesDone instead of POC as poc count is not serial with bframes enabled */
-                double timeDone = (double)(m_framesDone - m_param->frameNumThreads + 1) * m_frameDuration;
-                wantedBits = timeDone * m_bitrate;
-                if (wantedBits > 0 && m_totalBits > 0 && !m_partialResidualFrames)
+                q = getQScale(rce, m_rateFactorConstant);
+            }
+            else
+            {
+                if (!m_param->rc.bStatRead)
+                    checkAndResetABR(rce, false);
+                q = getQScale(rce, m_wantedBitsWindow / m_cplxrSum);
+
+                /* ABR code can potentially be counterproductive in CBR, so just
+                 * don't bother.  Don't run it if the frame complexity is zero
+                 * either. */
+                if (!m_isCbr && m_currentSatd)
                 {
-                    abrBuffer *= X265_MAX(1, sqrt(timeDone));
-                    overflow = Clip3(.5, 2.0, 1.0 + (m_totalBits - wantedBits) / abrBuffer);
-                    q *= overflow;
+                    /* use framesDone instead of POC as poc count is not serial with bframes enabled */
+                    double timeDone = (double)(m_framesDone - m_param->frameNumThreads + 1) * m_frameDuration;
+                    wantedBits = timeDone * m_bitrate;
+                    if (wantedBits > 0 && m_totalBits > 0 && !m_partialResidualFrames)
+                    {
+                        abrBuffer *= X265_MAX(1, sqrt(timeDone));
+                        overflow = Clip3(.5, 2.0, 1.0 + (m_totalBits - wantedBits) / abrBuffer);
+                        q *= overflow;
+                    }
                 }
             }
+
+            if (m_sliceType == I_SLICE && m_param->keyframeMax > 1
+                && m_lastNonBPictType != I_SLICE && !m_isAbrReset)
+            {
+                q = x265_qp2qScale(m_accumPQp / m_accumPNorm);
+                q /= fabs(m_param->rc.ipFactor);
+            }
+            else if (m_framesDone > 0)
+            {
+                if (m_param->rc.rateControlMode != X265_RC_CRF)
+                {
+                    double lqmin = 0, lqmax = 0;
+                    lqmin = m_lastQScaleFor[m_sliceType] / m_lstep;
+                    lqmax = m_lastQScaleFor[m_sliceType] * m_lstep;
+                    if (!m_partialResidualFrames)
+                    {
+                        if (overflow > 1.1 && m_framesDone > 3)
+                            lqmax *= m_lstep;
+                        else if (overflow < 0.9)
+                            lqmin /= m_lstep;
+                    }
+                    q = Clip3(lqmin, lqmax, q);
+                }
+            }
+            else if (m_qCompress != 1 && m_param->rc.rateControlMode == X265_RC_CRF)
+            {
+                q = x265_qp2qScale(CRF_INIT_QP) / fabs(m_param->rc.ipFactor);
+            }
+            else if (m_framesDone == 0 && !m_isVbv)
+            {
+                /* for ABR alone, clip the first I frame qp */
+                double lqmax = x265_qp2qScale(ABR_INIT_QP_MAX) * m_lstep;
+                q = X265_MIN(lqmax, q);
+            }
+            q = Clip3(MIN_QPSCALE, MAX_MAX_QPSCALE, q);
         }
-
-        if (m_sliceType == I_SLICE && m_param->keyframeMax > 1
-            && m_lastNonBPictType != I_SLICE && !m_isAbrReset)
-        {
-            q = x265_qp2qScale(m_accumPQp / m_accumPNorm);
-            q /= fabs(m_param->rc.ipFactor);
-        }
-        else if (m_framesDone > 0)
-        {
-            if (m_param->rc.rateControlMode != X265_RC_CRF)
-            {
-                double lqmin = 0, lqmax = 0;
-                lqmin = m_lastQScaleFor[m_sliceType] / m_lstep;
-                lqmax = m_lastQScaleFor[m_sliceType] * m_lstep;
-                if (!m_partialResidualFrames)
-                {
-                    if (overflow > 1.1 && m_framesDone > 3)
-                        lqmax *= m_lstep;
-                    else if (overflow < 0.9)
-                        lqmin /= m_lstep;
-                }
-                q = Clip3(lqmin, lqmax, q);
-            }
-        }
-        else if (m_qCompress != 1 && m_param->rc.rateControlMode == X265_RC_CRF)
-        {
-            q = x265_qp2qScale(CRF_INIT_QP) / fabs(m_param->rc.ipFactor);
-        }
-        else if (m_framesDone == 0 && !m_isVbv)
-        {
-            /* for ABR alone, clip the first I frame qp */
-            double lqmax = x265_qp2qScale(ABR_INIT_QP_MAX) * m_lstep;
-            q = X265_MIN(lqmax, q);
-        }
-        q = Clip3(MIN_QPSCALE, MAX_MAX_QPSCALE, q);
         rce->qpNoVbv = x265_qScale2qp(q);
         q = clipQscale(pic, q);
         m_lastQScaleFor[m_sliceType] = q;
-        if (m_curSlice->m_poc == 0 || m_lastQScaleFor[P_SLICE] < q)
+        if ((m_curSlice->m_poc == 0 || m_lastQScaleFor[P_SLICE] < q) && !(m_2pass && !m_isVbv))
             m_lastQScaleFor[P_SLICE] = q * fabs(m_param->rc.ipFactor);
 
-        rce->frameSizePlanned = predictSize(&m_pred[m_sliceType], q, (double)m_currentSatd);
+        if (m_2pass && m_isVbv)
+            rce->frameSizePlanned = qScale2bits(rce, q);
+        else
+            rce->frameSizePlanned = predictSize(&m_pred[m_sliceType], q, (double)m_currentSatd);
         rce->frameSizeEstimated = rce->frameSizePlanned;
         /* Always use up the whole VBV in this case. */
         if (m_singleFrameVbv)
             rce->frameSizePlanned = m_bufferRate;
 
+        rce->newQScale = q;
         return q;
     }
 }
@@ -1967,6 +2060,11 @@
             m_totalBits += bits - rce->rowTotalBits;
         }
     }
+    if (m_2pass)
+    {
+        m_expectedBitsSum += qScale2bits(rce, x265_qp2qScale(rce->newQp));
+        m_totalBits += bits;
+    }
 
     if (m_isVbv)
     {
diff -r eb983d29c11a -r d9296f2aad72 source/encoder/ratecontrol.h
--- a/source/encoder/ratecontrol.h	Thu Jul 17 09:29:39 2014 +0200
+++ b/source/encoder/ratecontrol.h	Fri Jul 18 17:19:20 2014 +0530
@@ -185,6 +185,8 @@
     int      m_numEntries;
     RateControlEntry *m_rce2Pass;
     double   m_lastAccumPNorm;
+    int64_t  m_predictedBits;
+    double   m_expectedBitsSum;   /* sum of qscale2bits after rceq, ratefactor, and overflow, only includes finished frames */
     struct
     {
         uint16_t *qpBuffer[2]; /* Global buffers for converting MB-tree quantizer data. */


More information about the x265-devel mailing list