[x265] [PATCH] more partial framework for frame parallelism

Min Chen chenm003 at 163.com
Sun Sep 1 07:35:27 CEST 2013


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1378013717 -28800
# Node ID e9f5ad12c16862fa8d1c5faa7e4a5688e4d8ef70
# Parent  c31b254a4bfca27a754dc7ba8eb85023cd23bb3e
more partial framework for frame parallelism

diff -r c31b254a4bfc -r e9f5ad12c168 source/Lib/TLibCommon/TComDataCU.cpp
--- a/source/Lib/TLibCommon/TComDataCU.cpp	Sun Sep 01 08:19:42 2013 +0530
+++ b/source/Lib/TLibCommon/TComDataCU.cpp	Sun Sep 01 13:35:17 2013 +0800
@@ -2715,14 +2715,17 @@
     return false;
 }
 
-Void TComDataCU::clipMv(MV& outMV)
+Void TComDataCU::clipMv(MV& outMV, int /*rowsAvailable*/)
 {
     Int mvshift = 2;
     Int offset = 8;
     Int xmax = (m_slice->getSPS()->getPicWidthInLumaSamples() + offset - m_cuPelX - 1) << mvshift;
     Int xmin = (-(Int)g_maxCUWidth - offset - (Int)m_cuPelX + 1) << mvshift;
 
-    Int ymax = (m_slice->getSPS()->getPicHeightInLumaSamples() + offset - m_cuPelY - 1) << mvshift;
+    int ylimit = m_slice->getSPS()->getPicHeightInLumaSamples();
+    //if (rowsAvailable)
+    //    ylimit = X265_MIN(rowsAvailable * g_maxCUHeight, ylimit);
+    Int ymax = (ylimit + offset - m_cuPelY - 1) << mvshift;
     Int ymin = (-(Int)g_maxCUHeight - offset - (Int)m_cuPelY + 1) << mvshift;
 
     outMV.x = min(xmax, max(xmin, (Int)outMV.x));
diff -r c31b254a4bfc -r e9f5ad12c168 source/Lib/TLibCommon/TComDataCU.h
--- a/source/Lib/TLibCommon/TComDataCU.h	Sun Sep 01 08:19:42 2013 +0530
+++ b/source/Lib/TLibCommon/TComDataCU.h	Sun Sep 01 13:35:17 2013 +0800
@@ -448,7 +448,7 @@
     Void          setMVPIdxSubParts(Int mvpIdx, RefPicList picList, UInt absPartIdx, UInt partIdx, UInt depth);
     Void          setMVPNumSubParts(Int iMVPNum, RefPicList picList, UInt absPartIdx, UInt partIdx, UInt depth);
 
-    Void          clipMv(x265::MV& outMV);
+    Void          clipMv(x265::MV& outMV, int rowsAvailable = 0);
 
     Void          getMvPredLeft(x265::MV& mvPred)       { mvPred = m_mvFieldA.mv; }
 
diff -r c31b254a4bfc -r e9f5ad12c168 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp	Sun Sep 01 08:19:42 2013 +0530
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp	Sun Sep 01 13:35:17 2013 +0800
@@ -2946,6 +2946,7 @@
 UInt TEncSearch::xGetTemplateCost(TComDataCU* cu, UInt partAddr, TComYuv* templateCand, MV mvCand, Int mvpIdx,
                                   Int mvpCandCount, RefPicList picList, Int refIdx, Int sizex, Int sizey)
 {
+    // TODO: does it clip with m_referenceRowsAvailable?
     cu->clipMv(mvCand);
 
     // prediction pattern
@@ -3013,8 +3014,8 @@
     mvmin = mvp - dist;
     mvmax = mvp + dist;
 
-    cu->clipMv(mvmin);
-    cu->clipMv(mvmax);
+    cu->clipMv(mvmin, m_referenceRowsAvailable);
+    cu->clipMv(mvmax, m_referenceRowsAvailable);
 
     mvmin >>= 2;
     mvmax >>= 2;
diff -r c31b254a4bfc -r e9f5ad12c168 source/Lib/TLibEncoder/TEncSearch.h
--- a/source/Lib/TLibEncoder/TEncSearch.h	Sun Sep 01 08:19:42 2013 +0530
+++ b/source/Lib/TLibEncoder/TEncSearch.h	Sun Sep 01 13:35:17 2013 +0800
@@ -68,6 +68,7 @@
 public:
 
     x265::MotionEstimate m_me;
+    int             m_referenceRowsAvailable;
 
 protected:
 
diff -r c31b254a4bfc -r e9f5ad12c168 source/common/wavefront.cpp
--- a/source/common/wavefront.cpp	Sun Sep 01 08:19:42 2013 +0530
+++ b/source/common/wavefront.cpp	Sun Sep 01 13:35:17 2013 +0800
@@ -41,6 +41,11 @@
         m_queuedBitmap = new uint64_t[m_numWords];
         if (m_queuedBitmap)
             memset((void*)m_queuedBitmap, 0, sizeof(uint64_t) * m_numWords);
+
+        m_enableBitmap = new uint64_t[m_numWords];
+        if (m_enableBitmap)
+            memset((void*)m_enableBitmap, 0, sizeof(uint64_t) * m_numWords);
+        
         return m_queuedBitmap != NULL;
     }
 
@@ -54,6 +59,11 @@
         delete[] m_queuedBitmap;
         m_queuedBitmap = NULL;
     }
+    if (m_enableBitmap)
+    {
+        delete[] m_enableBitmap;
+        m_enableBitmap = NULL;
+    }
 }
 
 void WaveFront::enqueueRow(int row)
@@ -66,6 +76,15 @@
     m_pool->pokeIdleThread();
 }
 
+void WaveFront::enableRow(int row)
+{
+    // thread safe
+    uint64_t bit = 1LL << (row & 63);
+
+    assert(row < m_numRows);
+    ATOMIC_OR(&m_enableBitmap[row >> 6], bit);
+}
+
 bool WaveFront::checkHigherPriorityRow(int curRow)
 {
     int fullwords = curRow >> 6;
@@ -95,8 +114,14 @@
                 break;
 
             CTZ64(id, oldval);
+
+            // NOTE: if the lowest row is unavailable, so we don't check higher row
+            if (!(m_enableBitmap[w] & (1LL << id)))
+            {
+                return false;
+            }
+
             uint64_t newval = oldval & ~(1LL << id);
-
             if (ATOMIC_CAS(&m_queuedBitmap[w], oldval, newval) == oldval)
             {
                 // we cleared the bit, process row
diff -r c31b254a4bfc -r e9f5ad12c168 source/common/wavefront.h
--- a/source/common/wavefront.h	Sun Sep 01 08:19:42 2013 +0530
+++ b/source/common/wavefront.h	Sun Sep 01 13:35:17 2013 +0800
@@ -40,6 +40,7 @@
 
     // bitmap of rows queued for processing, uses atomic intrinsics
     uint64_t volatile *m_queuedBitmap;
+    uint64_t volatile *m_enableBitmap;
 
     // number of words in the bitmap
     int m_numWords;
@@ -64,6 +65,8 @@
     // This provider must be enqueued in the pool before enqueuing a row
     void enqueueRow(int row);
 
+    void enableRow(int row);
+
     // Returns true if a row above curRow is available for processing.  The processRow()
     // method may call this function periodically and voluntarily exit
     bool checkHigherPriorityRow(int curRow);
diff -r c31b254a4bfc -r e9f5ad12c168 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Sun Sep 01 08:19:42 2013 +0530
+++ b/source/encoder/frameencoder.cpp	Sun Sep 01 13:35:17 2013 +0800
@@ -897,22 +897,15 @@
                 for (Int ref = 0; ref < slice->getNumRefIdx(list); ref++)
                 {
                     TComPic *refpic = slice->getRefPic(list, ref);
-                    while (refpic->m_reconRowCount <= (UInt) row)
+                    while (refpic->m_reconRowCount < (UInt) row + 1)
                         refpic->m_reconRowWait.wait();
                     min = X265_MIN(min, refpic->m_reconRowCount);
                 }
             }
 
             m_referenceRowsAvailable = min;
-            row = min;
 
-#if 0 // incomplete signaling of available recon reference rows
-            if (row > 0)
-            {
-                if (!m_rows[row + 1].m_active && (row == 1 || m_pic->m_complete_enc[row - 2] > 1))
-                    WaveFront::enqueueRow(row - 1);
-            }
-#endif
+            WaveFront::enableRow(row);
         }
 
         WaveFront::enqueueRow(0);
@@ -959,6 +952,7 @@
         codeRow.m_entropyCoder.setEntropyCoder(&m_sbacCoder, m_pic->getSlice());
         codeRow.m_entropyCoder.resetEntropy();
 
+        codeRow.m_search.m_referenceRowsAvailable = m_referenceRowsAvailable;
         TEncSbac *bufSbac = (m_cfg->param.bEnableWavefront && col == 0 && row > 0) ? &m_rows[row - 1].m_bufferSbacCoder : NULL;
         codeRow.processCU(cu, m_pic->getSlice(), bufSbac, m_cfg->param.bEnableWavefront && col == 1);
 
diff -r c31b254a4bfc -r e9f5ad12c168 source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h	Sun Sep 01 08:19:42 2013 +0530
+++ b/source/encoder/frameencoder.h	Sun Sep 01 13:35:17 2013 +0800
@@ -199,7 +199,7 @@
     /* Picture being encoded, and its output NAL list */
     TComPic*                 m_pic;
     AccessUnit               m_accessUnit;
-    int                      m_referenceRowsAvailable;
+    volatile int             m_referenceRowsAvailable;
 
     int                      m_numRows;
     int                      row_delay;
diff -r c31b254a4bfc -r e9f5ad12c168 source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp	Sun Sep 01 08:19:42 2013 +0530
+++ b/source/encoder/framefilter.cpp	Sun Sep 01 13:35:17 2013 +0800
@@ -224,13 +224,6 @@
 
     // this row of CTUs has been encoded
 
-    // TODO: extend margins for motion reference
-
-    // Notify other FrameEncoders that this row of reconstructed pixels is available
-    m_pic->m_reconRowCount++;
-    for (UInt i = 0; i < m_pic->m_countRefEncoders; i++)
-        m_pic->m_reconRowWait.trigger();
-
     if (row == m_numRows - 1)
     {
         m_sao.rdoSaoUnitRowEnd(saoParam, m_pic->getNumCUsInFrame());
@@ -248,4 +241,11 @@
 
         m_completionEvent.trigger();
     }
+
+    // TODO: extend margins for motion reference
+
+    // Notify other FrameEncoders that this row of reconstructed pixels is available
+    m_pic->m_reconRowCount++;
+    for (UInt i = 0; i < m_pic->m_countRefEncoders; i++)
+        m_pic->m_reconRowWait.trigger();
 }



More information about the x265-devel mailing list