[x265] [PATCH] rc: add 2 pass logic in rateEstimateQscale

aarthi at multicorewareinc.com aarthi at multicorewareinc.com
Fri Jul 18 06:50:08 CEST 2014


# HG changeset patch
# User Aarthi Thirumalai
# Date 1405595020 -19800
#      Thu Jul 17 16:33:40 2014 +0530
# Node ID 8772e37e101af0d727932edc0025bc590a6da12b
# Parent  93ab6ed75b01449b7cbfbec518d6974134291852
rc: add 2 pass logic in rateEstimateQscale

adjust qscale of each frame based on distance to end of the video and
the difference between achieved and expected bits so far in the final pass.

diff -r 93ab6ed75b01 -r 8772e37e101a source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Thu Jul 17 20:54:19 2014 -0500
+++ b/source/encoder/encoder.cpp	Thu Jul 17 16:33:40 2014 +0530
@@ -41,7 +41,8 @@
 
 #include "x265.h"
 
-static const char *summaryCSVHeader =
+namespace x265 {
+const char *g_summaryCSVHeader =
     "Command, Date/Time, Elapsed Time, FPS, Bitrate, "
     "Y PSNR, U PSNR, V PSNR, Global PSNR, SSIM, SSIM (dB), "
     "I count, I ave-QP, I kpbs, I-PSNR Y, I-PSNR U, I-PSNR V, I-SSIM (dB), "
@@ -49,6 +50,9 @@
     "B count, B ave-QP, B kpbs, B-PSNR Y, B-PSNR U, B-PSNR V, B-SSIM (dB), "
     "Version\n";
 
+const char g_sliceTypeToChar[] = {'B', 'P', 'I'};
+}
+
 using namespace x265;
 
 Encoder::Encoder()
@@ -139,7 +143,7 @@
                                       "Encoding time, Elapsed time, List 0, List 1\n");
                 }
                 else
-                    fputs(summaryCSVHeader, m_csvfpt);
+                    fputs(g_summaryCSVHeader, m_csvfpt);
             }
         }
     }
@@ -230,6 +234,8 @@
             rc->m_bufferFill = X265_MAX(rc->m_bufferFill, 0);
             rc->m_bufferFill += encoder->m_rce.bufferRate;
             rc->m_bufferFill = X265_MIN(rc->m_bufferFill, rc->m_bufferSize);
+            if (rc->m_2pass)
+                rc->m_predictedBits += (int64_t)encoder->m_rce.frameSizeEstimated;
         }
         encIdx = (encIdx + 1) % m_param->frameNumThreads;
     }
@@ -793,7 +799,7 @@
         {
             // adding summary to a per-frame csv log file needs a summary header
             fprintf(m_csvfpt, "\nSummary\n");
-            fputs(summaryCSVHeader, m_csvfpt);
+            fputs(g_summaryCSVHeader, m_csvfpt);
         }
         // CLI arguments or other
         for (int i = 1; i < argc; i++)
diff -r 93ab6ed75b01 -r 8772e37e101a source/encoder/encoder.h
--- a/source/encoder/encoder.h	Thu Jul 17 20:54:19 2014 -0500
+++ b/source/encoder/encoder.h	Thu Jul 17 16:33:40 2014 +0530
@@ -33,6 +33,9 @@
 namespace x265 {
 // private namespace
 
+extern const char *g_summaryCSVHeader;
+extern const char g_sliceTypeToChar[3];
+
 class SBac;
 
 struct EncStats
diff -r 93ab6ed75b01 -r 8772e37e101a source/encoder/ratecontrol.cpp
--- a/source/encoder/ratecontrol.cpp	Thu Jul 17 20:54:19 2014 -0500
+++ b/source/encoder/ratecontrol.cpp	Thu Jul 17 16:33:40 2014 +0530
@@ -122,6 +122,23 @@
            + rce->miscBits;
 }
 
+inline void copyRceData(RateControlEntry* rce, RateControlEntry* rce2Pass)
+{
+    rce->coeffBits = rce2Pass->coeffBits;
+    rce->mvBits = rce2Pass->mvBits;
+    rce->miscBits = rce2Pass->miscBits;
+    rce->iCuCount = rce2Pass->iCuCount;
+    rce->pCuCount = rce2Pass->pCuCount;
+    rce->skipCuCount = rce2Pass->skipCuCount;
+    rce->keptAsRef = rce2Pass->keptAsRef;
+    rce->qScale = rce2Pass->qScale;
+    rce->newQScale = rce2Pass->newQScale;
+    rce->expectedBits = rce2Pass->expectedBits;
+    rce->expectedVbv = rce2Pass->expectedVbv;
+    rce->blurredComplexity = rce2Pass->blurredComplexity;
+    rce->sliceType = rce2Pass->sliceType;
+}
+
 }  // end anonymous namespace
 /* Compute variance to derive AC energy of each block */
 static inline uint32_t acEnergyVar(Frame *pic, uint64_t sum_ssd, int shift, int i)
@@ -982,6 +999,11 @@
     m_curSlice = pic->getSlice();
     m_sliceType = m_curSlice->m_sliceType;
     rce->sliceType = m_sliceType;
+    if (m_param->rc.bStatRead)
+    {
+        assert(rce->encodeOrder >= 0 && rce->encodeOrder < m_numEntries);
+        copyRceData(rce, &m_rce2Pass[rce->encodeOrder]);
+    }
     rce->isActive = true;
     if (m_sliceType == B_SLICE)
         rce->bframes = m_bframes;
@@ -1008,6 +1030,7 @@
         }
         rce->rowPred[0] = &rce->rowPreds[m_sliceType][0];
         rce->rowPred[1] = &rce->rowPreds[m_sliceType][1];
+        m_predictedBits = m_totalBits;
         updateVbvPlan(enc);
         rce->bufferFill = m_bufferFill;
     }
@@ -1045,6 +1068,7 @@
         rce->qpaRc = pic->m_avgQpRc = pic->m_avgQpAq = m_qp;
     }
     m_framesDone++;
+    rce->newQp = m_qp;
     /* set the final QP to slice structure */
     m_curSlice->m_sliceQp = m_qp;
 }
@@ -1186,6 +1210,14 @@
 {
     double q;
 
+    if (m_2pass)
+    {
+        if (m_sliceType != rce->sliceType)
+        {
+            x265_log(m_param, X265_LOG_ERROR, "slice=%c but 2pass stats say %c\n",
+                     g_sliceTypeToChar[m_sliceType], g_sliceTypeToChar[rce->sliceType]);
+        }
+    }
     if (m_sliceType == B_SLICE)
     {
         /* B-frames don't have independent rate control, but rather get the
@@ -1230,109 +1262,170 @@
         double qScale = x265_qp2qScale(q);
         rce->frameSizePlanned = predictSize(&m_predBfromP, qScale, (double)m_leadingNoBSatd);
         rce->frameSizeEstimated = rce->frameSizePlanned;
+        rce->newQScale = qScale;
         return qScale;
     }
     else
     {
         double abrBuffer = 2 * m_param->rc.rateTolerance * m_bitrate;
-
-        /* 1pass ABR */
-
-        /* Calculate the quantizer which would have produced the desired
-         * average bitrate if it had been applied to all frames so far.
-         * Then modulate that quant based on the current frame's complexity
-         * relative to the average complexity so far (using the 2pass RCEQ).
-         * Then bias the quant up or down if total size so far was far from
-         * the target.
-         * Result: Depending on the value of rate_tolerance, there is a
-         * tradeoff between quality and bitrate precision. But at large
-         * tolerances, the bit distribution approaches that of 2pass. */
-
-        double wantedBits, overflow = 1;
-        rce->movingAvgSum = m_shortTermCplxSum;
-        m_shortTermCplxSum *= 0.5;
-        m_shortTermCplxCount *= 0.5;
-        m_shortTermCplxSum += m_currentSatd / (CLIP_DURATION(m_frameDuration) / BASE_FRAME_DURATION);
-        m_shortTermCplxCount++;
-        /* coeffBits to be used in 2-pass */
-        rce->coeffBits = (int)m_currentSatd;
-        rce->blurredComplexity = m_shortTermCplxSum / m_shortTermCplxCount;
-        rce->mvBits = 0;
-        rce->sliceType = m_sliceType;
-
-        if (m_param->rc.rateControlMode == X265_RC_CRF)
+        if (m_2pass)
         {
-            q = getQScale(rce, m_rateFactorConstant);
+            int64_t diff;
+            if (!m_isVbv)
+            {
+                m_predictedBits = m_totalBits;
+                if (rce->encodeOrder < m_param->frameNumThreads)
+                    m_predictedBits += (int64_t)(rce->encodeOrder * m_bitrate / m_fps);
+                else
+                    m_predictedBits += (int64_t)(m_param->frameNumThreads * m_bitrate / m_fps);
+            }
+            /* Adjust ABR buffer based on distance to the end of the video. */
+            if (m_numEntries > rce->encodeOrder)
+            {
+                uint64_t finalBits = m_rce2Pass[m_numEntries - 1].expectedBits;
+                double videoPos = (double)rce->expectedBits / finalBits;
+                double scaleFactor = sqrt((1 - videoPos) * m_numEntries);
+                abrBuffer *= 0.5 * X265_MAX(scaleFactor, 0.5);
+            }
+            diff = m_predictedBits - (int64_t)rce->expectedBits;
+            q = rce->newQScale;
+            q /= Clip3(0.5, 2.0, (double)(abrBuffer - diff) / abrBuffer);
+            if (((rce->encodeOrder + 1 - m_param->frameNumThreads) >= m_fps) &&
+                (m_expectedBitsSum > 0))
+            {
+                /* Adjust quant based on the difference between
+                 * achieved and expected bitrate so far */
+                double curTime = (double)rce->encodeOrder / m_numEntries;
+                double w = Clip3(0.0, 1.0, curTime * 100);
+                q *= pow((double)m_totalBits / m_expectedBitsSum, w);
+            }
+            rce->qpNoVbv = x265_qScale2qp(q);
+            if (m_isVbv)
+            {
+                /* Do not overflow vbv */
+                double expectedSize = qScale2bits(rce, q);
+                double expectedVbv = m_bufferFill + m_bufferRate - expectedSize;
+                double expectedFullness = rce->expectedVbv / m_bufferSize;
+                double qmax = q * (2 - expectedFullness);
+                double sizeConstraint = 1 + expectedFullness;
+                qmax = X265_MAX(qmax, rce->newQScale);
+                if (expectedFullness < .05)
+                    qmax = MAX_MAX_QPSCALE;
+                qmax = X265_MIN(qmax, MAX_MAX_QPSCALE);
+                while (((expectedVbv < rce->expectedVbv/sizeConstraint) && (q < qmax)) ||
+                        ((expectedVbv < 0) && (q < MAX_MAX_QPSCALE)))
+                {
+                    q *= 1.05;
+                    expectedSize = qScale2bits(rce, q);
+                    expectedVbv = m_bufferFill + m_bufferRate - expectedSize;
+                }
+            }
+            q = Clip3(MIN_QPSCALE, MAX_MAX_QPSCALE, q);
         }
         else
         {
-            if (!m_param->rc.bStatRead)
-                checkAndResetABR(rce, false);
-            q = getQScale(rce, m_wantedBitsWindow / m_cplxrSum);
+            /* 1pass ABR */
 
-            /* ABR code can potentially be counterproductive in CBR, so just
-             * don't bother.  Don't run it if the frame complexity is zero
-             * either. */
-            if (!m_isCbr && m_currentSatd)
+            /* Calculate the quantizer which would have produced the desired
+             * average bitrate if it had been applied to all frames so far.
+             * Then modulate that quant based on the current frame's complexity
+             * relative to the average complexity so far (using the 2pass RCEQ).
+             * Then bias the quant up or down if total size so far was far from
+             * the target.
+             * Result: Depending on the value of rate_tolerance, there is a
+             * tradeoff between quality and bitrate precision. But at large
+             * tolerances, the bit distribution approaches that of 2pass. */
+
+            double wantedBits, overflow = 1;
+            rce->movingAvgSum = m_shortTermCplxSum;
+            m_shortTermCplxSum *= 0.5;
+            m_shortTermCplxCount *= 0.5;
+            m_shortTermCplxSum += m_currentSatd / (CLIP_DURATION(m_frameDuration) / BASE_FRAME_DURATION);
+            m_shortTermCplxCount++;
+            /* coeffBits to be used in 2-pass */
+            rce->coeffBits = (int)m_currentSatd;
+            rce->blurredComplexity = m_shortTermCplxSum / m_shortTermCplxCount;
+            rce->mvBits = 0;
+            rce->sliceType = m_sliceType;
+
+            if (m_param->rc.rateControlMode == X265_RC_CRF)
             {
-                /* use framesDone instead of POC as poc count is not serial with bframes enabled */
-                double timeDone = (double)(m_framesDone - m_param->frameNumThreads + 1) * m_frameDuration;
-                wantedBits = timeDone * m_bitrate;
-                if (wantedBits > 0 && m_totalBits > 0 && !m_residualFrames)
+                q = getQScale(rce, m_rateFactorConstant);
+            }
+            else
+            {
+                if (!m_param->rc.bStatRead)
+                    checkAndResetABR(rce, false);
+                q = getQScale(rce, m_wantedBitsWindow / m_cplxrSum);
+
+                /* ABR code can potentially be counterproductive in CBR, so just
+                 * don't bother.  Don't run it if the frame complexity is zero
+                 * either. */
+                if (!m_isCbr && m_currentSatd)
                 {
-                    abrBuffer *= X265_MAX(1, sqrt(timeDone));
-                    overflow = Clip3(.5, 2.0, 1.0 + (m_totalBits - wantedBits) / abrBuffer);
-                    q *= overflow;
+                    /* use framesDone instead of POC as poc count is not serial with bframes enabled */
+                    double timeDone = (double)(m_framesDone - m_param->frameNumThreads + 1) * m_frameDuration;
+                    wantedBits = timeDone * m_bitrate;
+                    if (wantedBits > 0 && m_totalBits > 0 && !m_residualFrames)
+                    {
+                        abrBuffer *= X265_MAX(1, sqrt(timeDone));
+                        overflow = Clip3(.5, 2.0, 1.0 + (m_totalBits - wantedBits) / abrBuffer);
+                        q *= overflow;
+                    }
                 }
             }
+            if (m_sliceType == I_SLICE && m_param->keyframeMax > 1
+                && m_lastNonBPictType != I_SLICE && !m_isAbrReset)
+            {
+                q = x265_qp2qScale(m_accumPQp / m_accumPNorm);
+                q /= fabs(m_param->rc.ipFactor);
+            }
+            else if (m_framesDone > 0)
+            {
+                if (m_param->rc.rateControlMode != X265_RC_CRF)
+                {
+                    double lqmin = 0, lqmax = 0;
+                    lqmin = m_lastQScaleFor[m_sliceType] / m_lstep;
+                    lqmax = m_lastQScaleFor[m_sliceType] * m_lstep;
+                    if (!m_residualFrames)
+                    {
+                        if (overflow > 1.1 && m_framesDone > 3)
+                            lqmax *= m_lstep;
+                        else if (overflow < 0.9)
+                            lqmin /= m_lstep;
+                    }
+                    q = Clip3(lqmin, lqmax, q);
+                }
+            }
+            else if (m_qCompress != 1 && m_param->rc.rateControlMode == X265_RC_CRF)
+            {
+                q = x265_qp2qScale(CRF_INIT_QP) / fabs(m_param->rc.ipFactor);
+            }
+            else if (m_framesDone == 0 && !m_isVbv)
+            {
+                /* for ABR alone, clip the first I frame qp */
+                double lqmax = x265_qp2qScale(ABR_INIT_QP_MAX) * m_lstep;
+                q = X265_MIN(lqmax, q);
+            }
+            q = Clip3(MIN_QPSCALE, MAX_MAX_QPSCALE, q);
+            rce->qpNoVbv = x265_qScale2qp(q);
+            q = clipQscale(pic, q);
         }
 
-        if (m_sliceType == I_SLICE && m_param->keyframeMax > 1
-            && m_lastNonBPictType != I_SLICE && !m_isAbrReset)
-        {
-            q = x265_qp2qScale(m_accumPQp / m_accumPNorm);
-            q /= fabs(m_param->rc.ipFactor);
-        }
-        else if (m_framesDone > 0)
-        {
-            if (m_param->rc.rateControlMode != X265_RC_CRF)
-            {
-                double lqmin = 0, lqmax = 0;
-                lqmin = m_lastQScaleFor[m_sliceType] / m_lstep;
-                lqmax = m_lastQScaleFor[m_sliceType] * m_lstep;
-                if (!m_residualFrames)
-                {
-                    if (overflow > 1.1 && m_framesDone > 3)
-                        lqmax *= m_lstep;
-                    else if (overflow < 0.9)
-                        lqmin /= m_lstep;
-                }
-                q = Clip3(lqmin, lqmax, q);
-            }
-        }
-        else if (m_qCompress != 1 && m_param->rc.rateControlMode == X265_RC_CRF)
-        {
-            q = x265_qp2qScale(CRF_INIT_QP) / fabs(m_param->rc.ipFactor);
-        }
-        else if (m_framesDone == 0 && !m_isVbv)
-        {
-            /* for ABR alone, clip the first I frame qp */
-            double lqmax = x265_qp2qScale(ABR_INIT_QP_MAX) * m_lstep;
-            q = X265_MIN(lqmax, q);
-        }
-        q = Clip3(MIN_QPSCALE, MAX_MAX_QPSCALE, q);
-        rce->qpNoVbv = x265_qScale2qp(q);
-        q = clipQscale(pic, q);
         m_lastQScaleFor[m_sliceType] = q;
-        if (m_curSlice->m_poc == 0 || m_lastQScaleFor[P_SLICE] < q)
+        if ((m_curSlice->m_poc == 0 || m_lastQScaleFor[P_SLICE] < q) && !(m_2pass && !m_isVbv))
             m_lastQScaleFor[P_SLICE] = q * fabs(m_param->rc.ipFactor);
 
-        rce->frameSizePlanned = predictSize(&m_pred[m_sliceType], q, (double)m_currentSatd);
+        if (m_2pass && m_isVbv)
+            rce->frameSizePlanned = qScale2bits(rce, q);
+        else
+            rce->frameSizePlanned = predictSize(&m_pred[m_sliceType], q, (double)m_currentSatd);
         rce->frameSizeEstimated = rce->frameSizePlanned;
         /* Always use up the whole VBV in this case. */
         if (m_singleFrameVbv)
             rce->frameSizePlanned = m_bufferRate;
 
+        rce->newQScale = q;
         return q;
     }
 }
@@ -1931,6 +2024,8 @@
             m_totalBits += bits;
         }
     }
+    if (m_2pass)
+        m_expectedBitsSum += qScale2bits(rce, x265_qp2qScale(rce->newQp));
 
     if (m_isVbv)
     {
diff -r 93ab6ed75b01 -r 8772e37e101a source/encoder/ratecontrol.h
--- a/source/encoder/ratecontrol.h	Thu Jul 17 20:54:19 2014 -0500
+++ b/source/encoder/ratecontrol.h	Thu Jul 17 16:33:40 2014 +0530
@@ -160,6 +160,8 @@
     int      m_numEntries;
     RateControlEntry *m_rce2Pass;
     double   m_lastAccumPNorm;
+    int64_t  m_predictedBits;
+    double   m_expectedBitsSum;   /* sum of qscale2bits after rceq, ratefactor, and overflow, only includes finished frames */
     struct
     {
         uint16_t *qpBuffer[2]; /* Global buffers for converting MB-tree quantizer data. */


More information about the x265-devel mailing list