[x265] [PATCH] Clean up dynamic refinement
bhavna at multicorewareinc.com
bhavna at multicorewareinc.com
Fri May 25 13:01:31 CEST 2018
# HG changeset patch
# User Bhavna Hariharan <bhavna at multicorewareinc.com>
# Date 1527165877 -19800
# Thu May 24 18:14:37 2018 +0530
# Node ID 77d698d854fab725682213c9a39ac91aa632095f
# Parent cc2c5e46f3c87d27e3602af30b06ba6a0fbe2704
Clean up dynamic refinement
This patch does the following:
1) Earlier, locks were used to avoid the possibility of race conditions while
copying data from CTU level to frame level. Now, the data is collected for each
row and when the entire frame completes analysis the row data is copied to the
frame. This method eliminates the possibility of a race condition without
having to employ locks.
2) Allocate memory for the CTU infromation from the data pool, this will avoid
fragmentation of data.
diff -r cc2c5e46f3c8 -r 77d698d854fa source/common/common.h
--- a/source/common/common.h Mon May 21 18:42:29 2018 +0530
+++ b/source/common/common.h Thu May 24 18:14:37 2018 +0530
@@ -332,6 +332,8 @@
#define START_CODE_OVERHEAD 3
#define FILLER_OVERHEAD (NAL_TYPE_OVERHEAD + START_CODE_OVERHEAD + 1)
+#define MAX_NUM_DYN_REFINE (NUM_CU_DEPTH * X265_REFINE_INTER_LEVELS)
+
namespace X265_NS {
enum { SAO_NUM_OFFSET = 4 };
diff -r cc2c5e46f3c8 -r 77d698d854fa source/common/cudata.cpp
--- a/source/common/cudata.cpp Mon May 21 18:42:29 2018 +0530
+++ b/source/common/cudata.cpp Thu May 24 18:14:37 2018 +0530
@@ -317,16 +317,6 @@
m_cuAboveLeft = (m_cuLeft && m_cuAbove) ? m_encData->getPicCTU(m_cuAddr - widthInCU - 1) : NULL;
m_cuAboveRight = (m_cuAbove && ((m_cuAddr % widthInCU) < (widthInCU - 1))) ? m_encData->getPicCTU(m_cuAddr - widthInCU + 1) : NULL;
memset(m_distortion, 0, m_numPartitions * sizeof(sse_t));
-
- if (m_encData->m_param->bDynamicRefine)
- {
- int size = m_encData->m_param->maxCUDepth * X265_REFINE_INTER_LEVELS;
- CHECKED_MALLOC_ZERO(m_collectCURd, uint64_t, size);
- CHECKED_MALLOC_ZERO(m_collectCUVariance, uint32_t, size);
- CHECKED_MALLOC_ZERO(m_collectCUCount, uint32_t, size);
- }
-fail:
- return;
}
// initialize Sub partition
diff -r cc2c5e46f3c8 -r 77d698d854fa source/common/cudata.h
--- a/source/common/cudata.h Mon May 21 18:42:29 2018 +0530
+++ b/source/common/cudata.h Thu May 24 18:14:37 2018 +0530
@@ -353,8 +353,12 @@
coeff_t* trCoeffMemBlock;
MV* mvMemBlock;
sse_t* distortionMemBlock;
+ uint64_t* dynRefineRdBlock;
+ uint32_t* dynRefCntBlock;
+ uint32_t* dynRefVarBlock;
- CUDataMemPool() { charMemBlock = NULL; trCoeffMemBlock = NULL; mvMemBlock = NULL; distortionMemBlock = NULL; }
+ CUDataMemPool() { charMemBlock = NULL; trCoeffMemBlock = NULL; mvMemBlock = NULL; distortionMemBlock = NULL;
+ dynRefineRdBlock = NULL; dynRefCntBlock = NULL; dynRefVarBlock = NULL;}
bool create(uint32_t depth, uint32_t csp, uint32_t numInstances, const x265_param& param)
{
diff -r cc2c5e46f3c8 -r 77d698d854fa source/common/framedata.cpp
--- a/source/common/framedata.cpp Mon May 21 18:42:29 2018 +0530
+++ b/source/common/framedata.cpp Thu May 24 18:14:37 2018 +0530
@@ -41,9 +41,25 @@
if (param.rc.bStatWrite)
m_spsrps = const_cast<RPS*>(sps.spsrps);
bool isallocated = m_cuMemPool.create(0, param.internalCsp, sps.numCUsInFrame, param);
+ if (m_param->bDynamicRefine)
+ {
+ CHECKED_MALLOC_ZERO(m_cuMemPool.dynRefineRdBlock, uint64_t, MAX_NUM_DYN_REFINE * sps.numCUsInFrame);
+ CHECKED_MALLOC_ZERO(m_cuMemPool.dynRefCntBlock, uint32_t, MAX_NUM_DYN_REFINE * sps.numCUsInFrame);
+ CHECKED_MALLOC_ZERO(m_cuMemPool.dynRefVarBlock, uint32_t, MAX_NUM_DYN_REFINE * sps.numCUsInFrame);
+ }
if (isallocated)
+ {
for (uint32_t ctuAddr = 0; ctuAddr < sps.numCUsInFrame; ctuAddr++)
+ {
+ if (m_param->bDynamicRefine)
+ {
+ m_picCTU[ctuAddr].m_collectCURd = m_cuMemPool.dynRefineRdBlock + (ctuAddr * MAX_NUM_DYN_REFINE);
+ m_picCTU[ctuAddr].m_collectCUVariance = m_cuMemPool.dynRefVarBlock + (ctuAddr * MAX_NUM_DYN_REFINE);
+ m_picCTU[ctuAddr].m_collectCUCount = m_cuMemPool.dynRefCntBlock + (ctuAddr * MAX_NUM_DYN_REFINE);
+ }
m_picCTU[ctuAddr].initialize(m_cuMemPool, 0, param, ctuAddr);
+ }
+ }
else
return false;
CHECKED_MALLOC_ZERO(m_cuStat, RCStatCU, sps.numCUsInFrame);
@@ -65,6 +81,12 @@
{
memset(m_cuStat, 0, sps.numCUsInFrame * sizeof(*m_cuStat));
memset(m_rowStat, 0, sps.numCuInHeight * sizeof(*m_rowStat));
+ if (m_param->bDynamicRefine)
+ {
+ memset(m_picCTU->m_collectCURd, 0, MAX_NUM_DYN_REFINE * sizeof(uint64_t));
+ memset(m_picCTU->m_collectCUVariance, 0, MAX_NUM_DYN_REFINE * sizeof(uint32_t));
+ memset(m_picCTU->m_collectCUCount, 0, MAX_NUM_DYN_REFINE * sizeof(uint32_t));
+ }
}
void FrameData::destroy()
@@ -75,6 +97,12 @@
m_cuMemPool.destroy();
+ if (m_param->bDynamicRefine)
+ {
+ X265_FREE(m_cuMemPool.dynRefineRdBlock);
+ X265_FREE(m_cuMemPool.dynRefCntBlock);
+ X265_FREE(m_cuMemPool.dynRefVarBlock);
+ }
X265_FREE(m_cuStat);
X265_FREE(m_rowStat);
for (int i = 0; i < INTEGRAL_PLANE_NUM; i++)
diff -r cc2c5e46f3c8 -r 77d698d854fa source/common/framedata.h
--- a/source/common/framedata.h Mon May 21 18:42:29 2018 +0530
+++ b/source/common/framedata.h Thu May 24 18:14:37 2018 +0530
@@ -88,6 +88,11 @@
uint64_t cntInterPu[NUM_CU_DEPTH][INTER_MODES - 1];
uint64_t cntMergePu[NUM_CU_DEPTH][INTER_MODES - 1];
+ /* Feature values per row for dynamic refinement */
+ uint64_t rowRdDyn[MAX_NUM_DYN_REFINE];
+ uint32_t rowVarDyn[MAX_NUM_DYN_REFINE];
+ uint32_t rowCntDyn[MAX_NUM_DYN_REFINE];
+
FrameStats()
{
memset(this, 0, sizeof(FrameStats));
diff -r cc2c5e46f3c8 -r 77d698d854fa source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Mon May 21 18:42:29 2018 +0530
+++ b/source/encoder/frameencoder.cpp Thu May 24 18:14:37 2018 +0530
@@ -956,6 +956,9 @@
}
} // end of (m_param->maxSlices > 1)
+ if (m_param->bDynamicRefine && m_top->m_startPoint <= m_frame->m_encodeOrder) //Avoid collecting data that will not be used by future frames.
+ collectDynDataFrame();
+
if (m_param->rc.bStatWrite)
{
int totalI = 0, totalP = 0, totalSkip = 0;
@@ -1494,31 +1497,12 @@
// Does all the CU analysis, returns best top level mode decision
Mode& best = tld.analysis.compressCTU(*ctu, *m_frame, m_cuGeoms[m_ctuGeomMap[cuAddr]], rowCoder);
- if (m_param->bDynamicRefine)
- {
- if (m_top->m_startPoint <= m_frame->m_encodeOrder) // Avoid collecting data that will not be used by future frames.
- {
- ScopedLock dynLock(m_top->m_dynamicRefineLock);
- for (uint32_t i = 0; i < X265_REFINE_INTER_LEVELS; i++)
- {
- for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)
- {
- int offset = (depth * X265_REFINE_INTER_LEVELS) + i;
- int curFrameIndex = m_frame->m_encodeOrder - m_top->m_startPoint;
- int index = (curFrameIndex * X265_REFINE_INTER_LEVELS * m_param->maxCUDepth) + offset;
- if (ctu->m_collectCUCount[offset])
- {
- m_top->m_variance[index] += ctu->m_collectCUVariance[offset];
- m_top->m_rdCost[index] += ctu->m_collectCURd[offset];
- m_top->m_trainingCount[index] += ctu->m_collectCUCount[offset];
- }
- }
- }
- }
- X265_FREE_ZERO(ctu->m_collectCUVariance);
- X265_FREE_ZERO(ctu->m_collectCURd);
- X265_FREE_ZERO(ctu->m_collectCUCount);
- }
+
+ /* startPoint > encodeOrder is true when the start point changes for
+ a new GOP but few frames from the previous GOP is still incomplete.
+ The data of frames in this interval will not be used by any future frames. */
+ if (m_param->bDynamicRefine && m_top->m_startPoint <= m_frame->m_encodeOrder)
+ collectDynDataRow(*ctu, &curRow.rowStats);
// take a sample of the current active worker count
ATOMIC_ADD(&m_totalActiveWorkerCount, m_activeWorkerCount);
@@ -1901,6 +1885,46 @@
if (ATOMIC_INC(&m_completionCount) == 2 * (int)m_numRows)
m_completionEvent.trigger();
}
+
+void FrameEncoder::collectDynDataRow(CUData& ctu, FrameStats* rowStats)
+{
+ for (uint32_t i = 0; i < X265_REFINE_INTER_LEVELS; i++)
+ {
+ for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)
+ {
+ int offset = (depth * X265_REFINE_INTER_LEVELS) + i;
+ if (ctu.m_collectCUCount[offset])
+ {
+ rowStats->rowVarDyn[offset] += ctu.m_collectCUVariance[offset];
+ rowStats->rowRdDyn[offset] += ctu.m_collectCURd[offset];
+ rowStats->rowCntDyn[offset] += ctu.m_collectCUCount[offset];
+ }
+ }
+ }
+}
+
+void FrameEncoder::collectDynDataFrame()
+{
+ for (uint32_t row = 0; row < m_numRows; row++)
+ {
+ for (uint32_t refLevel = 0; refLevel < X265_REFINE_INTER_LEVELS; refLevel++)
+ {
+ for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)
+ {
+ int offset = (depth * X265_REFINE_INTER_LEVELS) + refLevel;
+ int curFrameIndex = m_frame->m_encodeOrder - m_top->m_startPoint;
+ int index = (curFrameIndex * X265_REFINE_INTER_LEVELS * m_param->maxCUDepth) + offset;
+ if (m_rows[row].rowStats.rowCntDyn[offset])
+ {
+ m_top->m_variance[index] += m_rows[row].rowStats.rowVarDyn[offset];
+ m_top->m_rdCost[index] += m_rows[row].rowStats.rowRdDyn[offset];
+ m_top->m_trainingCount[index] += m_rows[row].rowStats.rowCntDyn[offset];
+ }
+ }
+ }
+ }
+}
+
void FrameEncoder::computeAvgTrainingData()
{
if (m_frame->m_lowres.bScenecut || m_frame->m_lowres.bKeyframe)
diff -r cc2c5e46f3c8 -r 77d698d854fa source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h Mon May 21 18:42:29 2018 +0530
+++ b/source/encoder/frameencoder.h Thu May 24 18:14:37 2018 +0530
@@ -243,6 +243,8 @@
#if ENABLE_LIBVMAF
void vmafFrameLevelScore();
#endif
+ void collectDynDataRow(CUData& ctu, FrameStats* rowStats);
+ void collectDynDataFrame();
};
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: x265-clone.patch
Type: text/x-patch
Size: 10681 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20180525/dc6e6bb2/attachment-0001.bin>
More information about the x265-devel
mailing list