[x265] [PATCH] more partial framework for frame parallelism
Min Chen
chenm003 at 163.com
Sun Sep 1 07:35:27 CEST 2013
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1378013717 -28800
# Node ID e9f5ad12c16862fa8d1c5faa7e4a5688e4d8ef70
# Parent c31b254a4bfca27a754dc7ba8eb85023cd23bb3e
more partial framework for frame parallelism
diff -r c31b254a4bfc -r e9f5ad12c168 source/Lib/TLibCommon/TComDataCU.cpp
--- a/source/Lib/TLibCommon/TComDataCU.cpp Sun Sep 01 08:19:42 2013 +0530
+++ b/source/Lib/TLibCommon/TComDataCU.cpp Sun Sep 01 13:35:17 2013 +0800
@@ -2715,14 +2715,17 @@
return false;
}
-Void TComDataCU::clipMv(MV& outMV)
+Void TComDataCU::clipMv(MV& outMV, int /*rowsAvailable*/)
{
Int mvshift = 2;
Int offset = 8;
Int xmax = (m_slice->getSPS()->getPicWidthInLumaSamples() + offset - m_cuPelX - 1) << mvshift;
Int xmin = (-(Int)g_maxCUWidth - offset - (Int)m_cuPelX + 1) << mvshift;
- Int ymax = (m_slice->getSPS()->getPicHeightInLumaSamples() + offset - m_cuPelY - 1) << mvshift;
+ int ylimit = m_slice->getSPS()->getPicHeightInLumaSamples();
+ //if (rowsAvailable)
+ // ylimit = X265_MIN(rowsAvailable * g_maxCUHeight, ylimit);
+ Int ymax = (ylimit + offset - m_cuPelY - 1) << mvshift;
Int ymin = (-(Int)g_maxCUHeight - offset - (Int)m_cuPelY + 1) << mvshift;
outMV.x = min(xmax, max(xmin, (Int)outMV.x));
diff -r c31b254a4bfc -r e9f5ad12c168 source/Lib/TLibCommon/TComDataCU.h
--- a/source/Lib/TLibCommon/TComDataCU.h Sun Sep 01 08:19:42 2013 +0530
+++ b/source/Lib/TLibCommon/TComDataCU.h Sun Sep 01 13:35:17 2013 +0800
@@ -448,7 +448,7 @@
Void setMVPIdxSubParts(Int mvpIdx, RefPicList picList, UInt absPartIdx, UInt partIdx, UInt depth);
Void setMVPNumSubParts(Int iMVPNum, RefPicList picList, UInt absPartIdx, UInt partIdx, UInt depth);
- Void clipMv(x265::MV& outMV);
+ Void clipMv(x265::MV& outMV, int rowsAvailable = 0);
Void getMvPredLeft(x265::MV& mvPred) { mvPred = m_mvFieldA.mv; }
diff -r c31b254a4bfc -r e9f5ad12c168 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Sun Sep 01 08:19:42 2013 +0530
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Sun Sep 01 13:35:17 2013 +0800
@@ -2946,6 +2946,7 @@
UInt TEncSearch::xGetTemplateCost(TComDataCU* cu, UInt partAddr, TComYuv* templateCand, MV mvCand, Int mvpIdx,
Int mvpCandCount, RefPicList picList, Int refIdx, Int sizex, Int sizey)
{
+ // TODO: does it clip with m_referenceRowsAvailable?
cu->clipMv(mvCand);
// prediction pattern
@@ -3013,8 +3014,8 @@
mvmin = mvp - dist;
mvmax = mvp + dist;
- cu->clipMv(mvmin);
- cu->clipMv(mvmax);
+ cu->clipMv(mvmin, m_referenceRowsAvailable);
+ cu->clipMv(mvmax, m_referenceRowsAvailable);
mvmin >>= 2;
mvmax >>= 2;
diff -r c31b254a4bfc -r e9f5ad12c168 source/Lib/TLibEncoder/TEncSearch.h
--- a/source/Lib/TLibEncoder/TEncSearch.h Sun Sep 01 08:19:42 2013 +0530
+++ b/source/Lib/TLibEncoder/TEncSearch.h Sun Sep 01 13:35:17 2013 +0800
@@ -68,6 +68,7 @@
public:
x265::MotionEstimate m_me;
+ int m_referenceRowsAvailable;
protected:
diff -r c31b254a4bfc -r e9f5ad12c168 source/common/wavefront.cpp
--- a/source/common/wavefront.cpp Sun Sep 01 08:19:42 2013 +0530
+++ b/source/common/wavefront.cpp Sun Sep 01 13:35:17 2013 +0800
@@ -41,6 +41,11 @@
m_queuedBitmap = new uint64_t[m_numWords];
if (m_queuedBitmap)
memset((void*)m_queuedBitmap, 0, sizeof(uint64_t) * m_numWords);
+
+ m_enableBitmap = new uint64_t[m_numWords];
+ if (m_enableBitmap)
+ memset((void*)m_enableBitmap, 0, sizeof(uint64_t) * m_numWords);
+
return m_queuedBitmap != NULL;
}
@@ -54,6 +59,11 @@
delete[] m_queuedBitmap;
m_queuedBitmap = NULL;
}
+ if (m_enableBitmap)
+ {
+ delete[] m_enableBitmap;
+ m_enableBitmap = NULL;
+ }
}
void WaveFront::enqueueRow(int row)
@@ -66,6 +76,15 @@
m_pool->pokeIdleThread();
}
+void WaveFront::enableRow(int row)
+{
+ // thread safe
+ uint64_t bit = 1LL << (row & 63);
+
+ assert(row < m_numRows);
+ ATOMIC_OR(&m_enableBitmap[row >> 6], bit);
+}
+
bool WaveFront::checkHigherPriorityRow(int curRow)
{
int fullwords = curRow >> 6;
@@ -95,8 +114,14 @@
break;
CTZ64(id, oldval);
+
+ // NOTE: if the lowest row is unavailable, so we don't check higher row
+ if (!(m_enableBitmap[w] & (1LL << id)))
+ {
+ return false;
+ }
+
uint64_t newval = oldval & ~(1LL << id);
-
if (ATOMIC_CAS(&m_queuedBitmap[w], oldval, newval) == oldval)
{
// we cleared the bit, process row
diff -r c31b254a4bfc -r e9f5ad12c168 source/common/wavefront.h
--- a/source/common/wavefront.h Sun Sep 01 08:19:42 2013 +0530
+++ b/source/common/wavefront.h Sun Sep 01 13:35:17 2013 +0800
@@ -40,6 +40,7 @@
// bitmap of rows queued for processing, uses atomic intrinsics
uint64_t volatile *m_queuedBitmap;
+ uint64_t volatile *m_enableBitmap;
// number of words in the bitmap
int m_numWords;
@@ -64,6 +65,8 @@
// This provider must be enqueued in the pool before enqueuing a row
void enqueueRow(int row);
+ void enableRow(int row);
+
// Returns true if a row above curRow is available for processing. The processRow()
// method may call this function periodically and voluntarily exit
bool checkHigherPriorityRow(int curRow);
diff -r c31b254a4bfc -r e9f5ad12c168 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Sun Sep 01 08:19:42 2013 +0530
+++ b/source/encoder/frameencoder.cpp Sun Sep 01 13:35:17 2013 +0800
@@ -897,22 +897,15 @@
for (Int ref = 0; ref < slice->getNumRefIdx(list); ref++)
{
TComPic *refpic = slice->getRefPic(list, ref);
- while (refpic->m_reconRowCount <= (UInt) row)
+ while (refpic->m_reconRowCount < (UInt) row + 1)
refpic->m_reconRowWait.wait();
min = X265_MIN(min, refpic->m_reconRowCount);
}
}
m_referenceRowsAvailable = min;
- row = min;
-#if 0 // incomplete signaling of available recon reference rows
- if (row > 0)
- {
- if (!m_rows[row + 1].m_active && (row == 1 || m_pic->m_complete_enc[row - 2] > 1))
- WaveFront::enqueueRow(row - 1);
- }
-#endif
+ WaveFront::enableRow(row);
}
WaveFront::enqueueRow(0);
@@ -959,6 +952,7 @@
codeRow.m_entropyCoder.setEntropyCoder(&m_sbacCoder, m_pic->getSlice());
codeRow.m_entropyCoder.resetEntropy();
+ codeRow.m_search.m_referenceRowsAvailable = m_referenceRowsAvailable;
TEncSbac *bufSbac = (m_cfg->param.bEnableWavefront && col == 0 && row > 0) ? &m_rows[row - 1].m_bufferSbacCoder : NULL;
codeRow.processCU(cu, m_pic->getSlice(), bufSbac, m_cfg->param.bEnableWavefront && col == 1);
diff -r c31b254a4bfc -r e9f5ad12c168 source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h Sun Sep 01 08:19:42 2013 +0530
+++ b/source/encoder/frameencoder.h Sun Sep 01 13:35:17 2013 +0800
@@ -199,7 +199,7 @@
/* Picture being encoded, and its output NAL list */
TComPic* m_pic;
AccessUnit m_accessUnit;
- int m_referenceRowsAvailable;
+ volatile int m_referenceRowsAvailable;
int m_numRows;
int row_delay;
diff -r c31b254a4bfc -r e9f5ad12c168 source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp Sun Sep 01 08:19:42 2013 +0530
+++ b/source/encoder/framefilter.cpp Sun Sep 01 13:35:17 2013 +0800
@@ -224,13 +224,6 @@
// this row of CTUs has been encoded
- // TODO: extend margins for motion reference
-
- // Notify other FrameEncoders that this row of reconstructed pixels is available
- m_pic->m_reconRowCount++;
- for (UInt i = 0; i < m_pic->m_countRefEncoders; i++)
- m_pic->m_reconRowWait.trigger();
-
if (row == m_numRows - 1)
{
m_sao.rdoSaoUnitRowEnd(saoParam, m_pic->getNumCUsInFrame());
@@ -248,4 +241,11 @@
m_completionEvent.trigger();
}
+
+ // TODO: extend margins for motion reference
+
+ // Notify other FrameEncoders that this row of reconstructed pixels is available
+ m_pic->m_reconRowCount++;
+ for (UInt i = 0; i < m_pic->m_countRefEncoders; i++)
+ m_pic->m_reconRowWait.trigger();
}
More information about the x265-devel
mailing list