<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Fri, May 25, 2018 at 4:31 PM, <span dir="ltr"><<a href="mailto:bhavna@multicorewareinc.com" target="_blank">bhavna@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><span class=""># HG changeset patch<br>
# User Bhavna Hariharan <<a href="mailto:bhavna@multicorewareinc.com">bhavna@multicorewareinc.com</a>><br>
</span># Date 1527165877 -19800<br>
# Thu May 24 18:14:37 2018 +0530<br>
# Node ID 77d698d854fab725682213c9a39ac9<wbr>1aa632095f<br>
<span class=""># Parent cc2c5e46f3c87d27e3602af30b06ba<wbr>6a0fbe2704<br>
Clean up dynamic refinement<br>
<br>
This patch does the following:<br>
1) Earlier, locks were used to avoid the possibility of race conditions while<br>
copying data from CTU level to frame level. Now, the data is collected for each<br>
row and when the entire frame completes analysis the row data is copied to the<br>
frame. This method eliminates the possibility of a race condition without<br>
having to employ locks.<br>
2) Allocate memory for the CTU infromation from the data pool, this will avoid<br>
fragmentation of data.<br>
<br>
</span>diff -r cc2c5e46f3c8 -r 77d698d854fa source/common/common.h<br>
<span class="">--- a/source/common/common.h Mon May 21 18:42:29 2018 +0530<br>
</span>+++ b/source/common/common.h Thu May 24 18:14:37 2018 +0530<br>
<span class="">@@ -332,6 +332,8 @@<br>
#define START_CODE_OVERHEAD 3 <br>
#define FILLER_OVERHEAD (NAL_TYPE_OVERHEAD + START_CODE_OVERHEAD + 1)<br>
<br>
</span>+#define MAX_NUM_DYN_REFINE (NUM_CU_DEPTH * X265_REFINE_INTER_LEVELS)<br>
<span class="">+<br>
namespace X265_NS {<br>
<br>
enum { SAO_NUM_OFFSET = 4 };<br>
</span>diff -r cc2c5e46f3c8 -r 77d698d854fa source/common/cudata.cpp<br>
<span class="">--- a/source/common/cudata.cpp Mon May 21 18:42:29 2018 +0530<br>
</span>+++ b/source/common/cudata.cpp Thu May 24 18:14:37 2018 +0530<br>
@@ -317,16 +317,6 @@<br>
m_cuAboveLeft = (m_cuLeft && m_cuAbove) ? m_encData->getPicCTU(m_cuAddr - widthInCU - 1) : NULL;<br>
<span class=""> m_cuAboveRight = (m_cuAbove && ((m_cuAddr % widthInCU) < (widthInCU - 1))) ? m_encData->getPicCTU(m_cuAddr - widthInCU + 1) : NULL;<br>
memset(m_distortion, 0, m_numPartitions * sizeof(sse_t));<br>
-<br>
- if (m_encData->m_param-><wbr>bDynamicRefine)<br>
- {<br>
- int size = m_encData->m_param->maxCUDepth * X265_REFINE_INTER_LEVELS;<br>
- CHECKED_MALLOC_ZERO(m_<wbr>collectCURd, uint64_t, size);<br>
- CHECKED_MALLOC_ZERO(m_<wbr>collectCUVariance, uint32_t, size);<br>
- CHECKED_MALLOC_ZERO(m_<wbr>collectCUCount, uint32_t, size);<br>
- }<br>
-fail:<br>
- return;<br>
}<br>
<br>
</span> // initialize Sub partition<br>
diff -r cc2c5e46f3c8 -r 77d698d854fa source/common/cudata.h<br>
<span class="">--- a/source/common/cudata.h Mon May 21 18:42:29 2018 +0530<br>
</span>+++ b/source/common/cudata.h Thu May 24 18:14:37 2018 +0530<br>
<span class="">@@ -353,8 +353,12 @@<br>
coeff_t* trCoeffMemBlock;<br>
MV* mvMemBlock;<br>
sse_t* distortionMemBlock;<br>
+ uint64_t* dynRefineRdBlock;<br>
+ uint32_t* dynRefCntBlock;<br>
+ uint32_t* dynRefVarBlock;<br>
<br>
- CUDataMemPool() { charMemBlock = NULL; trCoeffMemBlock = NULL; mvMemBlock = NULL; distortionMemBlock = NULL; }<br>
+ CUDataMemPool() { charMemBlock = NULL; trCoeffMemBlock = NULL; mvMemBlock = NULL; distortionMemBlock = NULL; <br>
+ dynRefineRdBlock = NULL; dynRefCntBlock = NULL; dynRefVarBlock = NULL;}<br>
<br>
bool create(uint32_t depth, uint32_t csp, uint32_t numInstances, const x265_param& param)<br>
{<br>
</span>diff -r cc2c5e46f3c8 -r 77d698d854fa source/common/framedata.cpp<br>
--- a/source/common/framedata.cpp Mon May 21 18:42:29 2018 +0530<br>
+++ b/source/common/framedata.cpp Thu May 24 18:14:37 2018 +0530<br>
@@ -41,9 +41,25 @@<br>
if (param.rc.bStatWrite)<br>
m_spsrps = const_cast<RPS*>(sps.spsrps);<br>
bool isallocated = m_cuMemPool.create(0, param.internalCsp, sps.numCUsInFrame, param);<br>
+ if (m_param->bDynamicRefine)<br>
+ {<br>
+ CHECKED_MALLOC_ZERO(m_<wbr>cuMemPool.dynRefineRdBlock, uint64_t, MAX_NUM_DYN_REFINE * sps.numCUsInFrame);<br>
+ CHECKED_MALLOC_ZERO(m_<wbr>cuMemPool.dynRefCntBlock, uint32_t, MAX_NUM_DYN_REFINE * sps.numCUsInFrame);<br>
+ CHECKED_MALLOC_ZERO(m_<wbr>cuMemPool.dynRefVarBlock, uint32_t, MAX_NUM_DYN_REFINE * sps.numCUsInFrame);<br>
+ }<br>
if (isallocated)<br>
+ {<br>
for (uint32_t ctuAddr = 0; ctuAddr < sps.numCUsInFrame; ctuAddr++)<br>
+ {<br>
+ if (m_param->bDynamicRefine)<br>
+ {<br>
+ m_picCTU[ctuAddr].m_<wbr>collectCURd = m_cuMemPool.dynRefineRdBlock + (ctuAddr * MAX_NUM_DYN_REFINE);<br>
+ m_picCTU[ctuAddr].m_<wbr>collectCUVariance = m_cuMemPool.dynRefVarBlock + (ctuAddr * MAX_NUM_DYN_REFINE);<br>
+ m_picCTU[ctuAddr].m_<wbr>collectCUCount = m_cuMemPool.dynRefCntBlock + (ctuAddr * MAX_NUM_DYN_REFINE);<br>
+ }<br>
m_picCTU[ctuAddr].initialize(<wbr>m_cuMemPool, 0, param, ctuAddr);<br>
+ }<br>
+ }<br>
else<br>
return false;<br>
CHECKED_MALLOC_ZERO(m_cuStat, RCStatCU, sps.numCUsInFrame);<br>
@@ -65,6 +81,12 @@<br>
{<br>
memset(m_cuStat, 0, sps.numCUsInFrame * sizeof(*m_cuStat));<br>
memset(m_rowStat, 0, sps.numCuInHeight * sizeof(*m_rowStat));<br>
+ if (m_param->bDynamicRefine)<br>
+ {<br>
+ memset(m_picCTU->m_<wbr>collectCURd, 0, MAX_NUM_DYN_REFINE * sizeof(uint64_t));<br>
+ memset(m_picCTU->m_<wbr>collectCUVariance, 0, MAX_NUM_DYN_REFINE * sizeof(uint32_t));<br>
+ memset(m_picCTU->m_<wbr>collectCUCount, 0, MAX_NUM_DYN_REFINE * sizeof(uint32_t));<br>
+ }<br>
}<br>
<br>
void FrameData::destroy()<br>
@@ -75,6 +97,12 @@<br>
<br>
m_cuMemPool.destroy();<br>
<br>
+ if (m_param->bDynamicRefine)<br>
+ {<br>
+ X265_FREE(m_cuMemPool.<wbr>dynRefineRdBlock);<br>
+ X265_FREE(m_cuMemPool.<wbr>dynRefCntBlock);<br>
+ X265_FREE(m_cuMemPool.<wbr>dynRefVarBlock);<br>
+ }<br>
X265_FREE(m_cuStat);<br>
X265_FREE(m_rowStat);<br>
for (int i = 0; i < INTEGRAL_PLANE_NUM; i++)<br>
diff -r cc2c5e46f3c8 -r 77d698d854fa source/common/framedata.h<br>
<span class="">--- a/source/common/framedata.h Mon May 21 18:42:29 2018 +0530<br>
</span>+++ b/source/common/framedata.h Thu May 24 18:14:37 2018 +0530<br>
<span class="">@@ -88,6 +88,11 @@<br>
uint64_t cntInterPu[NUM_CU_DEPTH][<wbr>INTER_MODES - 1];<br>
uint64_t cntMergePu[NUM_CU_DEPTH][<wbr>INTER_MODES - 1];<br>
<br>
+ /* Feature values per row for dynamic refinement */<br>
+ uint64_t rowRdDyn[MAX_NUM_DYN_REFINE];<br>
+ uint32_t rowVarDyn[MAX_NUM_DYN_REFINE];<br>
+ uint32_t rowCntDyn[MAX_NUM_DYN_REFINE];<br>
+<br>
FrameStats()<br>
{<br>
memset(this, 0, sizeof(FrameStats));<br>
</span>diff -r cc2c5e46f3c8 -r 77d698d854fa source/encoder/frameencoder.<wbr>cpp<br>
<span class="">--- a/source/encoder/frameencoder.<wbr>cpp Mon May 21 18:42:29 2018 +0530<br>
</span>+++ b/source/encoder/frameencoder.<wbr>cpp Thu May 24 18:14:37 2018 +0530<br>
<div><div class="h5">@@ -956,6 +956,9 @@<br>
} <br>
} // end of (m_param->maxSlices > 1)<br>
<br>
+ if (m_param->bDynamicRefine && m_top->m_startPoint <= m_frame->m_encodeOrder) //Avoid collecting data that will not be used by future frames.<br>
+ collectDynDataFrame();<br>
+<br>
if (m_param->rc.bStatWrite)<br>
{<br>
int totalI = 0, totalP = 0, totalSkip = 0;<br>
@@ -1494,31 +1497,12 @@<br>
<br>
// Does all the CU analysis, returns best top level mode decision<br>
Mode& best = tld.analysis.compressCTU(*ctu, *m_frame, m_cuGeoms[m_ctuGeomMap[cuAddr]<wbr>], rowCoder);<br>
- if (m_param->bDynamicRefine)<br>
- {<br>
- if (m_top->m_startPoint <= m_frame->m_encodeOrder) // Avoid collecting data that will not be used by future frames.<br>
- {<br>
- ScopedLock dynLock(m_top->m_<wbr>dynamicRefineLock);<br>
- for (uint32_t i = 0; i < X265_REFINE_INTER_LEVELS; i++)<br>
- {<br>
- for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)<br>
- {<br>
- int offset = (depth * X265_REFINE_INTER_LEVELS) + i;<br>
- int curFrameIndex = m_frame->m_encodeOrder - m_top->m_startPoint;<br>
- int index = (curFrameIndex * X265_REFINE_INTER_LEVELS * m_param->maxCUDepth) + offset;<br>
- if (ctu->m_collectCUCount[offset]<wbr>)<br>
- {<br>
- m_top->m_variance[index] += ctu->m_collectCUVariance[<wbr>offset];<br>
- m_top->m_rdCost[index] += ctu->m_collectCURd[offset];<br>
- m_top->m_trainingCount[index] += ctu->m_collectCUCount[offset];<br>
- }<br>
- }<br>
- }<br>
- }<br>
- X265_FREE_ZERO(ctu->m_<wbr>collectCUVariance);<br>
- X265_FREE_ZERO(ctu->m_<wbr>collectCURd);<br>
- X265_FREE_ZERO(ctu->m_<wbr>collectCUCount);<br>
- }<br>
+<br>
+ /* startPoint > encodeOrder is true when the start point changes for<br>
+ a new GOP but few frames from the previous GOP is still incomplete.<br>
+ The data of frames in this interval will not be used by any future frames. */<br>
+ if (m_param->bDynamicRefine && m_top->m_startPoint <= m_frame->m_encodeOrder)<br>
+ collectDynDataRow(*ctu, &curRow.rowStats);<br>
<br>
// take a sample of the current active worker count<br>
ATOMIC_ADD(&m_<wbr>totalActiveWorkerCount, m_activeWorkerCount);<br>
@@ -1901,6 +1885,46 @@<br>
if (ATOMIC_INC(&m_<wbr>completionCount) == 2 * (int)m_numRows)<br>
m_completionEvent.trigger();<br>
}<br>
+<br>
+void FrameEncoder::<wbr>collectDynDataRow(CUData& ctu, FrameStats* rowStats)<br>
+{<br>
+ for (uint32_t i = 0; i < X265_REFINE_INTER_LEVELS; i++)<br>
+ {<br>
+ for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)<br>
+ {<br>
+ int offset = (depth * X265_REFINE_INTER_LEVELS) + i;<br>
+ if (ctu.m_collectCUCount[offset])<br>
+ {<br>
+ rowStats->rowVarDyn[offset] += ctu.m_collectCUVariance[<wbr>offset];<br>
+ rowStats->rowRdDyn[offset] += ctu.m_collectCURd[offset];<br>
+ rowStats->rowCntDyn[offset] += ctu.m_collectCUCount[offset];<br>
+ }<br>
+ }<br>
+ }<br>
+}<br>
+<br>
+void FrameEncoder::<wbr>collectDynDataFrame()<br>
+{<br>
+ for (uint32_t row = 0; row < m_numRows; row++)<br>
+ {<br>
+ for (uint32_t refLevel = 0; refLevel < X265_REFINE_INTER_LEVELS; refLevel++)<br>
+ {<br>
+ for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)<br>
+ {<br>
+ int offset = (depth * X265_REFINE_INTER_LEVELS) + refLevel;<br>
+ int curFrameIndex = m_frame->m_encodeOrder - m_top->m_startPoint;<br>
+ int index = (curFrameIndex * X265_REFINE_INTER_LEVELS * m_param->maxCUDepth) + offset;<br>
+ if (m_rows[row].rowStats.<wbr>rowCntDyn[offset])<br>
+ {<br>
+ m_top->m_variance[index] += m_rows[row].rowStats.<wbr>rowVarDyn[offset];<br>
+ m_top->m_rdCost[index] += m_rows[row].rowStats.rowRdDyn[<wbr>offset];<br>
+ m_top->m_trainingCount[index] += m_rows[row].rowStats.<wbr>rowCntDyn[offset];<br>
+ }<br>
+ }<br>
+ }<br>
+ }<br>
+}<br>
+<br>
void FrameEncoder::<wbr>computeAvgTrainingData()<br>
{<br>
if (m_frame->m_lowres.bScenecut || m_frame->m_lowres.bKeyframe)<br>
</div></div>diff -r cc2c5e46f3c8 -r 77d698d854fa source/encoder/frameencoder.h<br>
<span class="">--- a/source/encoder/frameencoder.<wbr>h Mon May 21 18:42:29 2018 +0530<br>
</span>+++ b/source/encoder/frameencoder.<wbr>h Thu May 24 18:14:37 2018 +0530<br>
<div class="HOEnZb"><div class="h5">@@ -243,6 +243,8 @@<br>
#if ENABLE_LIBVMAF<br>
void vmafFrameLevelScore();<br>
#endif<br>
+ void collectDynDataRow(CUData& ctu, FrameStats* rowStats);<br>
+ void collectDynDataFrame();<br>
};<br>
}<br>
<br>
</div></div><br>______________________________<wbr>_________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/<wbr>listinfo/x265-devel</a><br>
<br></blockquote></div><br></div><div class="gmail_extra"><br></div><div class="gmail_extra">Pushed.</div></div>