<div dir="ltr">Please ignore this patch. </div><div class="gmail_extra"><br clear="all"><div><div class="gmail_signature" data-smartmail="gmail_signature"><div dir="ltr"><div><div dir="ltr"><div><div dir="ltr"><br>Thanks,<br><br>Bhavna Hariharan<br></div></div></div></div></div></div></div>
<br><div class="gmail_quote">On Thu, Apr 12, 2018 at 3:52 PM, <span dir="ltr"><<a href="mailto:bhavna@multicorewareinc.com" target="_blank">bhavna@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Bhavna Hariharan <<a href="mailto:bhavna@multicorewareinc.com">bhavna@multicorewareinc.com</a>><br>
# Date 1523528451 -19800<br>
# Thu Apr 12 15:50:51 2018 +0530<br>
# Node ID bff8e6d4407bebd8ff19be8323deac<wbr>c16be4875c<br>
# Parent 04a337abd70de269cef7d9655365f3<wbr>a3ebde02aa<br>
dynamic-refine: Remove lock while collecting CTU data<br>
<br>
Locks were used to avoid the possibility of race conditions while copying<br>
data from CTU level to frame level. Now, the data is collected for each row and<br>
when the entire frame completes analysis the row data is copied to the frame.<br>
This method eliminates the possibility of a race condition without having to<br>
employ locks.<br>
<br>
diff -r 04a337abd70d -r bff8e6d4407b source/common/common.h<br>
--- a/source/common/common.h Thu Apr 12 15:10:59 2018 +0530<br>
+++ b/source/common/common.h Thu Apr 12 15:50:51 2018 +0530<br>
@@ -332,6 +332,8 @@<br>
#define START_CODE_OVERHEAD 3<br>
#define FILLER_OVERHEAD (NAL_TYPE_OVERHEAD + START_CODE_OVERHEAD + 1)<br>
<br>
+#define MAX_NUM_DYN_REFINE ((NUM_CU_DEPTH - 1) * X265_REFINE_INTER_LEVELS)<br>
+<br>
namespace X265_NS {<br>
<br>
enum { SAO_NUM_OFFSET = 4 };<br>
diff -r 04a337abd70d -r bff8e6d4407b source/common/framedata.h<br>
--- a/source/common/framedata.h Thu Apr 12 15:10:59 2018 +0530<br>
+++ b/source/common/framedata.h Thu Apr 12 15:50:51 2018 +0530<br>
@@ -88,6 +88,11 @@<br>
uint64_t cntInterPu[NUM_CU_DEPTH][<wbr>INTER_MODES - 1];<br>
uint64_t cntMergePu[NUM_CU_DEPTH][<wbr>INTER_MODES - 1];<br>
<br>
+ /* Feature values per row for dynamic refinement */<br>
+ uint64_t rowRdDyn[MAX_NUM_DYN_REFINE];<br>
+ uint32_t rowVarDyn[MAX_NUM_DYN_REFINE];<br>
+ uint32_t rowCntDyn[MAX_NUM_DYN_REFINE];<br>
+<br>
FrameStats()<br>
{<br>
memset(this, 0, sizeof(FrameStats));<br>
diff -r 04a337abd70d -r bff8e6d4407b source/encoder/frameencoder.<wbr>cpp<br>
--- a/source/encoder/frameencoder.<wbr>cpp Thu Apr 12 15:10:59 2018 +0530<br>
+++ b/source/encoder/frameencoder.<wbr>cpp Thu Apr 12 15:50:51 2018 +0530<br>
@@ -935,6 +935,9 @@<br>
}<br>
} // end of (m_param->maxSlices > 1)<br>
<br>
+ if (m_param->bDynamicRefine && m_top->m_startPoint <= m_frame->m_encodeOrder) //Avoid collecting data that will not be used by future frames.<br>
+ collectDynDataFrame();<br>
+<br>
if (m_param->rc.bStatWrite)<br>
{<br>
int totalI = 0, totalP = 0, totalSkip = 0;<br>
@@ -1473,27 +1476,13 @@<br>
<br>
// Does all the CU analysis, returns best top level mode decision<br>
Mode& best = tld.analysis.compressCTU(*ctu, *m_frame, m_cuGeoms[m_ctuGeomMap[cuAddr]<wbr>], rowCoder);<br>
- if (m_param->bDynamicRefine)<br>
+<br>
+ /* startPoint > encodeOrder is true when the start point changes for<br>
+ a new GOP but all frames of the previous GOP is still incomplete<br>
+ The data from these frames will not be used by any future frames. */<br>
+ if (m_param->bDynamicRefine && m_top->m_startPoint <= m_frame->m_encodeOrder)<br>
{<br>
- if (m_top->m_startPoint <= m_frame->m_encodeOrder) // Avoid collecting data that will not be used by future frames.<br>
- {<br>
- ScopedLock dynLock(m_top->m_<wbr>dynamicRefineLock);<br>
- for (uint32_t i = 0; i < X265_REFINE_INTER_LEVELS; i++)<br>
- {<br>
- for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)<br>
- {<br>
- int offset = (depth * X265_REFINE_INTER_LEVELS) + i;<br>
- int curFrameIndex = m_frame->m_encodeOrder - m_top->m_startPoint;<br>
- int index = (curFrameIndex * X265_REFINE_INTER_LEVELS * m_param->maxCUDepth) + offset;<br>
- if (ctu->m_collectCUCount[offset]<wbr>)<br>
- {<br>
- m_top->m_variance[index] += ctu->m_collectCUVariance[<wbr>offset];<br>
- m_top->m_rdCost[index] += ctu->m_collectCURd[offset];<br>
- m_top->m_trainingCount[index] += ctu->m_collectCUCount[offset];<br>
- }<br>
- }<br>
- }<br>
- }<br>
+ collectDynDataRow(*ctu, &curRow.rowStats);<br>
X265_FREE_ZERO(ctu->m_<wbr>collectCUVariance);<br>
X265_FREE_ZERO(ctu->m_<wbr>collectCURd);<br>
X265_FREE_ZERO(ctu->m_<wbr>collectCUCount);<br>
@@ -1880,6 +1869,46 @@<br>
if (ATOMIC_INC(&m_<wbr>completionCount) == 2 * (int)m_numRows)<br>
m_completionEvent.trigger();<br>
}<br>
+<br>
+void FrameEncoder::<wbr>collectDynDataRow(const CUData& ctu, FrameStats* rowStats)<br>
+{<br>
+ for (uint32_t i = 0; i < X265_REFINE_INTER_LEVELS; i++)<br>
+ {<br>
+ for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)<br>
+ {<br>
+ int offset = (depth * X265_REFINE_INTER_LEVELS) + i;<br>
+ if (ctu.m_collectCUCount[offset])<br>
+ {<br>
+ rowStats->rowVarDyn[offset] += ctu.m_collectCUVariance[<wbr>offset];<br>
+ rowStats->rowRdDyn[offset] += ctu.m_collectCURd[offset];<br>
+ rowStats->rowCntDyn[offset] += ctu.m_collectCUCount[offset];<br>
+ }<br>
+ }<br>
+ }<br>
+}<br>
+<br>
+void FrameEncoder::<wbr>collectDynDataFrame()<br>
+{<br>
+ for (uint32_t row = 0; row < m_numRows; row++)<br>
+ {<br>
+ for (uint32_t refLevel = 0; refLevel < X265_REFINE_INTER_LEVELS; refLevel++)<br>
+ {<br>
+ for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)<br>
+ {<br>
+ int offset = (depth * X265_REFINE_INTER_LEVELS) + refLevel;<br>
+ int curFrameIndex = m_frame->m_encodeOrder - m_top->m_startPoint;<br>
+ int index = (curFrameIndex * X265_REFINE_INTER_LEVELS * m_param->maxCUDepth) + offset;<br>
+ if (m_rows[row].rowStats.<wbr>rowCntDyn[offset])<br>
+ {<br>
+ m_top->m_variance[index] += m_rows[row].rowStats.<wbr>rowVarDyn[offset];<br>
+ m_top->m_rdCost[index] += m_rows[row].rowStats.rowRdDyn[<wbr>offset];<br>
+ m_top->m_trainingCount[index] += m_rows[row].rowStats.<wbr>rowCntDyn[offset];<br>
+ }<br>
+ }<br>
+ }<br>
+ }<br>
+}<br>
+<br>
void FrameEncoder::<wbr>computeAvgTrainingData()<br>
{<br>
if (m_frame->m_lowres.bScenecut || m_frame->m_lowres.bKeyframe)<br>
diff -r 04a337abd70d -r bff8e6d4407b source/encoder/frameencoder.h<br>
--- a/source/encoder/frameencoder.<wbr>h Thu Apr 12 15:10:59 2018 +0530<br>
+++ b/source/encoder/frameencoder.<wbr>h Thu Apr 12 15:50:51 2018 +0530<br>
@@ -240,6 +240,9 @@<br>
void enqueueRowFilter(int row) { WaveFront::enqueueRow(row * 2 + 1); }<br>
void enableRowEncoder(int row) { WaveFront::enableRow(row * 2 + 0); }<br>
void enableRowFilter(int row) { WaveFront::enableRow(row * 2 + 1); }<br>
+<br>
+ void collectDynDataRow(const CUData& ctu, FrameStats* rowStats);<br>
+ void collectDynDataFrame();<br>
};<br>
}<br>
<br>
</blockquote></div><br></div>