[x265] [PATCH RFC] vbv: enable row resets during vbv when mid-frame qp adjustemets are too high/low

Steve Borho steve at borho.org
Tue Mar 18 04:19:24 CET 2014


# HG changeset patch
# User Aarthi Thirumalai
# Date 1394482958 -19800
#      Tue Mar 11 01:52:38 2014 +0530
# Node ID 079c5454ed3285a30c5be3ae0f998df7b25002a2
# Parent  7b86d42683be4c2727aa8d602b12a424806e1a49
vbv: enable row resets during vbv when mid-frame qp adjustemets are too high/low.

diff -r 7b86d42683be -r 079c5454ed32 source/common/wavefront.cpp
--- a/source/common/wavefront.cpp	Sun Mar 16 23:37:56 2014 +0900
+++ b/source/common/wavefront.cpp	Tue Mar 11 01:52:38 2014 +0530
@@ -32,6 +32,7 @@
 bool WaveFront::init(int numRows)
 {
     m_numRows = numRows;
+    m_bAllRowsStop = false;
 
     if (m_pool)
     {
@@ -105,6 +106,9 @@
 
 bool WaveFront::findJob()
 {
+    if (m_bAllRowsStop)
+        return false;
+
     unsigned long id;
 
     // thread safe
diff -r 7b86d42683be -r 079c5454ed32 source/common/wavefront.h
--- a/source/common/wavefront.h	Sun Mar 16 23:37:56 2014 +0900
+++ b/source/common/wavefront.h	Tue Mar 11 01:52:38 2014 +0530
@@ -53,6 +53,9 @@
 
 public:
 
+    // temporarily pause findJob() distributing work
+    bool m_bAllRowsStop;
+
     WaveFront(ThreadPool *pool)
         : JobProvider(pool)
         , m_internalDependencyBitmap(0)
diff -r 7b86d42683be -r 079c5454ed32 source/encoder/cturow.h
--- a/source/encoder/cturow.h	Sun Mar 16 23:37:56 2014 +0900
+++ b/source/encoder/cturow.h	Tue Mar 11 01:52:38 2014 +0530
@@ -87,6 +87,9 @@
     void processCU(TComDataCU *cu, TComSlice *slice, TEncSbac *bufferSBac, bool bSaveCabac);
 
     /* Threading variables */
+
+    /* This row lock must be acquired when reading or writing m_active,
+     * m_completed, or m_busy */
     Lock                m_lock;
 
     /* row is ready to run, has no neighbor dependencies. The row may have
diff -r 7b86d42683be -r 079c5454ed32 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Sun Mar 16 23:37:56 2014 +0900
+++ b/source/encoder/frameencoder.cpp	Tue Mar 11 01:52:38 2014 +0530
@@ -908,7 +908,7 @@
         m_rows[i].m_completed = 0;
         m_rows[i].m_busy = false;
     }
-
+    m_vbvResetTriggerRow = -1;
     int range = m_cfg->param->searchRange; /* fpel search */
     range    += 1;                        /* diamond search range check lag */
     range    += 2;                        /* subpel refine */
@@ -998,80 +998,147 @@
     m_totalTime = 0;
 }
 
+// Called by worker threads
 void FrameEncoder::processRowEncoder(int row)
 {
     PPAScopeEvent(Thread_ProcessRow);
 
-    // Called by worker threads
+    CTURow& codeRow = m_rows[m_cfg->param->bEnableWavefront ? row : 0];
     CTURow& curRow  = m_rows[row];
+
     if (curRow.m_busy)
     {
         /* On multi-socket Windows servers, we have seen problems with
          * ATOMIC_CAS which resulted in multiple worker threads processing
          * the same CU row, which often resulted in bad pointer accesses. We
          * believe the problem is fixed, but are leaving this check in place
-         * to prevent crashes in case it is not. */
+         * to prevent crashes in case it is not */
         x265_log(m_cfg->param, X265_LOG_WARNING,
                  "internal error - simulaneous row access detected. Please report HW to x265-devel at videolan.org");
         return;
     }
+    if (row > 0)
+    {
+        ScopedLock self(curRow.m_lock);
+        if (m_bAllRowsStop)
+        {
+            curRow.m_active = false;
+            return;
+        }
+    }
     curRow.m_busy = true;
 
     int64_t startTime = x265_mdate();
-    CTURow& codeRow = m_rows[m_cfg->param->bEnableWavefront ? row : 0];
     const uint32_t numCols = m_pic->getPicSym()->getFrameWidthInCU();
     const uint32_t lineStartCUAddr = row * numCols;
     double qpBase = m_pic->m_avgQpRc;
-    bool isVbv = m_cfg->param->rc.vbvBufferSize > 0 && m_cfg->param->rc.vbvMaxBitrate > 0;
-    for (uint32_t col = curRow.m_completed; col < numCols; col++)
+    bool bIsVbv = m_cfg->param->rc.vbvBufferSize > 0 && m_cfg->param->rc.vbvMaxBitrate > 0;
+
+    while (curRow.m_completed < numCols)
     {
+        uint32_t col = curRow.m_completed;
         const uint32_t cuAddr = lineStartCUAddr + col;
         TComDataCU* cu = m_pic->getCU(cuAddr);
         cu->initCU(m_pic, cuAddr);
 
         codeRow.m_entropyCoder.setEntropyCoder(&m_sbacCoder, m_pic->getSlice());
         codeRow.m_entropyCoder.resetEntropy();
-        TEncSbac *bufSbac = (m_cfg->param->bEnableWavefront && col == 0 && row > 0) ? &m_rows[row - 1].m_bufferSbacCoder : NULL;
 
-        if ((uint32_t)row >= col && (row != 0) && isVbv)
-            qpBase = m_pic->getCU(cuAddr - numCols + 1)->m_baseQp;
+        if (bIsVbv)
+        {
+            if (!row)
+                m_pic->m_rowDiagQp[row] = m_pic->m_avgQpRc;
 
-        if (m_cfg->param->rc.aqMode || isVbv)
+            if ((uint32_t)row >= col && (row != 0) && m_vbvResetTriggerRow != row)
+                cu->m_baseQp = m_pic->getCU(cuAddr - numCols + 1)->m_baseQp;
+            else 
+                cu->m_baseQp = m_pic->m_rowDiagQp[row];
+        }
+        else
+            cu->m_baseQp = m_pic->m_avgQpRc;
+
+        if (m_cfg->param->rc.aqMode || bIsVbv)
         {
-            int qp = calcQpForCu(m_pic, cuAddr, qpBase);
+            int qp = calcQpForCu(m_pic, cuAddr, cu->m_baseQp);
             setLambda(qp, row);
             qp = X265_MIN(qp, MAX_QP);
             cu->setQPSubParts(char(qp), 0, 0);
-            cu->m_baseQp = qpBase;
             if (m_cfg->param->rc.aqMode)
                 m_pic->m_qpaAq[row] += qp;
         }
+
+        TEncSbac *bufSbac = (m_cfg->param->bEnableWavefront && col == 0 && row > 0) ? &m_rows[row - 1].m_bufferSbacCoder : NULL;
         codeRow.processCU(cu, m_pic->getSlice(), bufSbac, m_cfg->param->bEnableWavefront && col == 1);
-        if (isVbv)
+        curRow.m_completed++;
+
+        // Completed CU processing
+
+        if (bIsVbv)
         {
             // Update encoded bits, satdCost, baseQP for each CU
             m_pic->m_rowDiagSatd[row] += m_pic->m_cuCostsForVbv[cuAddr];
+            m_pic->m_rowDiagIntraSatd[row] += m_pic->m_intraCuCostsForVbv[cuAddr];
             m_pic->m_rowEncodedBits[row] += cu->m_totalBits;
             m_pic->m_numEncodedCusPerRow[row] = cuAddr;
             m_pic->m_qpaRc[row] += cu->m_baseQp;
 
-            if ((uint32_t)row == col)
-                m_pic->m_rowDiagQp[row] = qpBase;
-
-            // If current block is at row diagonal checkpoint, call vbv ratecontrol.
+            // If current block is at row diagonal checkpoint, update vbv ratecontrol.
             if ((uint32_t)row == col && row != 0)
             {
-                m_top->m_rateControl->rowDiagonalVbvRateControl(m_pic, row, &m_rce, qpBase);
+                qpBase = cu->m_baseQp;
+                int reEncode = m_top->m_rateControl->rowDiagonalVbvRateControl(m_pic, row, &m_rce, qpBase);
                 qpBase = Clip3((double)MIN_QP, (double)MAX_MAX_QP, qpBase);
+                m_pic->m_rowDiagQp[row] = qpBase;
+                m_pic->m_rowDiagQScale[row] =  x265_qp2qScale(qpBase);
+
+                if (reEncode < 0)
+                {
+                    x265_log(m_cfg->param, X265_LOG_INFO, "POC %d row %d - encode restart required for VBV\n", m_pic->getPOC(), row);
+
+                    // prevent the WaveFront::findJob() method from providing new jobs
+                    m_bAllRowsStop = true;
+                    m_vbvResetTriggerRow = row;
+
+                    curRow.m_completed = 0;
+
+                    for (int r = m_numRows - 1; r >= row ; r--)
+                    {
+                        CTURow& stopRow = m_rows[r];
+
+                        if (r != row)
+                        {
+                            bool bRowBusy = true;
+                            while (bRowBusy)
+                            {
+                                // Determine idle, hold lock while examining busy flags
+                                stopRow.m_lock.acquire();
+                                bRowBusy = stopRow.m_active && stopRow.m_busy;
+                                stopRow.m_lock.release();
+
+                                if (bRowBusy)
+                                {
+                                    GIVE_UP_TIME();
+                                }
+                            }
+                        }
+
+                        stopRow.m_completed = 0;
+                        m_pic->m_qpaAq[r] = 0;
+                        m_pic->m_rowEncodedBits[r] = 0;
+                        m_pic->m_qpaRc[r] = 0;
+                        m_pic->m_numEncodedCusPerRow[r] = 0;
+                    }
+
+                    m_bAllRowsStop = false;
+                }
             }
         }
-        // Completed CU processing
-        m_rows[row].m_completed++;
-        if (m_rows[row].m_completed >= 2 && row < m_numRows - 1)
+
+        if (curRow.m_completed >= 2 && row < m_numRows - 1)
         {
             ScopedLock below(m_rows[row + 1].m_lock);
-            if (m_rows[row + 1].m_active == false &&
-                m_rows[row + 1].m_completed + 2 <= m_rows[row].m_completed)
+            if (m_rows[row + 1].m_active == false && !m_bAllRowsStop &&
+                m_rows[row + 1].m_completed + 2 <= curRow.m_completed)
             {
                 m_rows[row + 1].m_active = true;
                 enqueueRowEncoder(row + 1);
@@ -1079,18 +1146,19 @@
         }
 
         ScopedLock self(curRow.m_lock);
-        if (row > 0 && m_rows[row].m_completed < numCols - 1 && m_rows[row - 1].m_completed < m_rows[row].m_completed + 2)
+        if (m_bAllRowsStop || 
+            (row > 0 && curRow.m_completed < numCols - 1 && m_rows[row - 1].m_completed < m_rows[row].m_completed + 2))
         {
             curRow.m_active = false;
             curRow.m_busy = false;
-            m_totalTime = m_totalTime + (x265_mdate() - startTime);
+            m_totalTime += x265_mdate() - startTime;
             return;
         }
     }
 
     // this row of CTUs has been encoded
 
-    // Run row-wise loop filters
+    // trigger row-wise loop filters
     if (row >= m_filterRowDelay)
     {
         enableRowFilter(row - m_filterRowDelay);
@@ -1106,7 +1174,7 @@
             enableRowFilter(i);
         }
     }
-    m_totalTime = m_totalTime + (x265_mdate() - startTime);
+    m_totalTime += x265_mdate() - startTime;
     curRow.m_busy = false;
 }
 
diff -r 7b86d42683be -r 079c5454ed32 source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h	Sun Mar 16 23:37:56 2014 +0900
+++ b/source/encoder/frameencoder.h	Tue Mar 11 01:52:38 2014 +0530
@@ -165,6 +165,7 @@
     TComPPS                  m_pps;
     RateControlEntry         m_rce;
     SEIDecodedPictureHash    m_seiReconPictureDigest;
+    int                      m_vbvResetTriggerRow;
 
 protected:
 
diff -r 7b86d42683be -r 079c5454ed32 source/encoder/ratecontrol.cpp
--- a/source/encoder/ratecontrol.cpp	Sun Mar 16 23:37:56 2014 +0900
+++ b/source/encoder/ratecontrol.cpp	Tue Mar 11 01:52:38 2014 +0530
@@ -792,8 +792,8 @@
                 }
 
                 refRowSatdCost >>= X265_DEPTH - 8;
-                refQScale = row == maxRows - 1 ? refPic->m_rowDiagQScale[row] : refPic->m_rowDiagQScale[row + 1];
-            }
+                refQScale = refPic->m_rowDiagQScale[row];
+             }
 
             if (picType == I_SLICE || qScale >= refQScale)
             {
@@ -829,8 +829,6 @@
 {
     double qScaleVbv = x265_qp2qScale(qpVbv);
 
-    pic->m_rowDiagQp[row] = qpVbv;
-    pic->m_rowDiagQScale[row] = qScaleVbv;
     uint64_t rowSatdCost = pic->m_rowDiagSatd[row];
     double encodedBits = pic->m_rowEncodedBits[row];
     if (row == 1)


More information about the x265-devel mailing list