[x265] [PATCH] Clean up dynamic refinement
Bhavna Hariharan
bhavna at multicorewareinc.com
Fri May 25 13:00:20 CEST 2018
Please ignore this patch, I will resend it with some changes.
Thanks,
Bhavna Hariharan
On Tue, May 22, 2018 at 2:14 PM, <bhavna at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Bhavna Hariharan <bhavna at multicorewareinc.com>
> # Date 1526964471 -19800
> # Tue May 22 10:17:51 2018 +0530
> # Node ID 5587d9a25248075edadf94e1a78f6e11d091f651
> # Parent cc2c5e46f3c87d27e3602af30b06ba6a0fbe2704
> Clean up dynamic refinement
>
> This patch does the following:
> 1) Earlier, locks were used to avoid the possibility of race conditions
> while
> copying data from CTU level to frame level. Now, the data is collected for
> each
> row and when the entire frame completes analysis the row data is copied to
> the
> frame. This method eliminates the possibility of a race condition without
> having to employ locks.
> 2) Allocate memory for the CTU infromation from the data pool, this will
> avoid
> fragmentation of data.
>
> diff -r cc2c5e46f3c8 -r 5587d9a25248 source/common/common.h
> --- a/source/common/common.h Mon May 21 18:42:29 2018 +0530
> +++ b/source/common/common.h Tue May 22 10:17:51 2018 +0530
> @@ -332,6 +332,8 @@
> #define START_CODE_OVERHEAD 3
> #define FILLER_OVERHEAD (NAL_TYPE_OVERHEAD + START_CODE_OVERHEAD + 1)
>
> +#define MAX_NUM_DYN_REFINE ((NUM_CU_DEPTH - 1) *
> X265_REFINE_INTER_LEVELS)
> +
> namespace X265_NS {
>
> enum { SAO_NUM_OFFSET = 4 };
> diff -r cc2c5e46f3c8 -r 5587d9a25248 source/common/cudata.cpp
> --- a/source/common/cudata.cpp Mon May 21 18:42:29 2018 +0530
> +++ b/source/common/cudata.cpp Tue May 22 10:17:51 2018 +0530
> @@ -274,6 +274,9 @@
> for (int i = 0; i < 3; i++)
> m_fAc_den[i] = m_fDc_den[i] = 0;
> }
> + m_collectCURd = dataPool.dynRefineRdBlock + (instance *
> MAX_NUM_DYN_REFINE);
> + m_collectCUVariance = dataPool.dynRefVarBlock + (instance *
> MAX_NUM_DYN_REFINE);
> + m_collectCUCount = dataPool.dynRefCntBlock + (instance *
> MAX_NUM_DYN_REFINE);
> }
>
> void CUData::initCTU(const Frame& frame, uint32_t cuAddr, int qp,
> uint32_t firstRowInSlice, uint32_t lastRowInSlice, uint32_t lastCuInSlice)
> @@ -318,15 +321,9 @@
> m_cuAboveRight = (m_cuAbove && ((m_cuAddr % widthInCU) < (widthInCU -
> 1))) ? m_encData->getPicCTU(m_cuAddr - widthInCU + 1) : NULL;
> memset(m_distortion, 0, m_numPartitions * sizeof(sse_t));
>
> - if (m_encData->m_param->bDynamicRefine)
> - {
> - int size = m_encData->m_param->maxCUDepth *
> X265_REFINE_INTER_LEVELS;
> - CHECKED_MALLOC_ZERO(m_collectCURd, uint64_t, size);
> - CHECKED_MALLOC_ZERO(m_collectCUVariance, uint32_t, size);
> - CHECKED_MALLOC_ZERO(m_collectCUCount, uint32_t, size);
> - }
> -fail:
> - return;
> + memset(m_collectCURd, 0, MAX_NUM_DYN_REFINE * sizeof(uint64_t));
> + memset(m_collectCUVariance, 0, MAX_NUM_DYN_REFINE * sizeof(uint32_t));
> + memset(m_collectCUCount, 0, MAX_NUM_DYN_REFINE * sizeof(uint32_t));
> }
>
> // initialize Sub partition
> diff -r cc2c5e46f3c8 -r 5587d9a25248 source/common/cudata.h
> --- a/source/common/cudata.h Mon May 21 18:42:29 2018 +0530
> +++ b/source/common/cudata.h Tue May 22 10:17:51 2018 +0530
> @@ -353,8 +353,12 @@
> coeff_t* trCoeffMemBlock;
> MV* mvMemBlock;
> sse_t* distortionMemBlock;
> + uint64_t* dynRefineRdBlock;
> + uint32_t* dynRefCntBlock;
> + uint32_t* dynRefVarBlock;
>
> - CUDataMemPool() { charMemBlock = NULL; trCoeffMemBlock = NULL;
> mvMemBlock = NULL; distortionMemBlock = NULL; }
> + CUDataMemPool() { charMemBlock = NULL; trCoeffMemBlock = NULL;
> mvMemBlock = NULL; distortionMemBlock = NULL;
> + dynRefineRdBlock = NULL; dynRefCntBlock = NULL;
> dynRefVarBlock = NULL;}
>
> bool create(uint32_t depth, uint32_t csp, uint32_t numInstances,
> const x265_param& param)
> {
> @@ -373,6 +377,9 @@
> CHECKED_MALLOC(charMemBlock, uint8_t, numPartition * numInstances
> * CUData::BytesPerPartition);
> CHECKED_MALLOC_ZERO(mvMemBlock, MV, numPartition * 4 *
> numInstances);
> CHECKED_MALLOC(distortionMemBlock, sse_t, numPartition *
> numInstances);
> + CHECKED_MALLOC_ZERO(dynRefineRdBlock, uint64_t,
> MAX_NUM_DYN_REFINE * numInstances);
> + CHECKED_MALLOC_ZERO(dynRefCntBlock, uint32_t, MAX_NUM_DYN_REFINE
> * numInstances);
> + CHECKED_MALLOC_ZERO(dynRefVarBlock, uint32_t, MAX_NUM_DYN_REFINE
> * numInstances);
> return true;
> fail:
> return false;
> @@ -384,6 +391,9 @@
> X265_FREE(mvMemBlock);
> X265_FREE(charMemBlock);
> X265_FREE(distortionMemBlock);
> + X265_FREE(dynRefineRdBlock);
> + X265_FREE(dynRefCntBlock);
> + X265_FREE(dynRefVarBlock);
> }
> };
> }
> diff -r cc2c5e46f3c8 -r 5587d9a25248 source/common/framedata.h
> --- a/source/common/framedata.h Mon May 21 18:42:29 2018 +0530
> +++ b/source/common/framedata.h Tue May 22 10:17:51 2018 +0530
> @@ -88,6 +88,11 @@
> uint64_t cntInterPu[NUM_CU_DEPTH][INTER_MODES - 1];
> uint64_t cntMergePu[NUM_CU_DEPTH][INTER_MODES - 1];
>
> + /* Feature values per row for dynamic refinement */
> + uint64_t rowRdDyn[MAX_NUM_DYN_REFINE];
> + uint32_t rowVarDyn[MAX_NUM_DYN_REFINE];
> + uint32_t rowCntDyn[MAX_NUM_DYN_REFINE];
> +
> FrameStats()
> {
> memset(this, 0, sizeof(FrameStats));
> diff -r cc2c5e46f3c8 -r 5587d9a25248 source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp Mon May 21 18:42:29 2018 +0530
> +++ b/source/encoder/frameencoder.cpp Tue May 22 10:17:51 2018 +0530
> @@ -956,6 +956,9 @@
> }
> } // end of (m_param->maxSlices > 1)
>
> + if (m_param->bDynamicRefine && m_top->m_startPoint <=
> m_frame->m_encodeOrder) //Avoid collecting data that will not be used by
> future frames.
> + collectDynDataFrame();
> +
> if (m_param->rc.bStatWrite)
> {
> int totalI = 0, totalP = 0, totalSkip = 0;
> @@ -1494,31 +1497,12 @@
>
> // Does all the CU analysis, returns best top level mode decision
> Mode& best = tld.analysis.compressCTU(*ctu, *m_frame,
> m_cuGeoms[m_ctuGeomMap[cuAddr]], rowCoder);
> - if (m_param->bDynamicRefine)
> - {
> - if (m_top->m_startPoint <= m_frame->m_encodeOrder) // Avoid
> collecting data that will not be used by future frames.
> - {
> - ScopedLock dynLock(m_top->m_dynamicRefineLock);
> - for (uint32_t i = 0; i < X265_REFINE_INTER_LEVELS; i++)
> - {
> - for (uint32_t depth = 0; depth < m_param->maxCUDepth;
> depth++)
> - {
> - int offset = (depth * X265_REFINE_INTER_LEVELS) +
> i;
> - int curFrameIndex = m_frame->m_encodeOrder -
> m_top->m_startPoint;
> - int index = (curFrameIndex *
> X265_REFINE_INTER_LEVELS * m_param->maxCUDepth) + offset;
> - if (ctu->m_collectCUCount[offset])
> - {
> - m_top->m_variance[index] +=
> ctu->m_collectCUVariance[offset];
> - m_top->m_rdCost[index] +=
> ctu->m_collectCURd[offset];
> - m_top->m_trainingCount[index] +=
> ctu->m_collectCUCount[offset];
> - }
> - }
> - }
> - }
> - X265_FREE_ZERO(ctu->m_collectCUVariance);
> - X265_FREE_ZERO(ctu->m_collectCURd);
> - X265_FREE_ZERO(ctu->m_collectCUCount);
> - }
> +
> + /* startPoint > encodeOrder is true when the start point changes
> for
> + a new GOP but few frames from the previous GOP is still
> incomplete.
> + The data of frames in this interval will not be used by any
> future frames. */
> + if (m_param->bDynamicRefine && m_top->m_startPoint <=
> m_frame->m_encodeOrder)
> + collectDynDataRow(*ctu, &curRow.rowStats);
>
> // take a sample of the current active worker count
> ATOMIC_ADD(&m_totalActiveWorkerCount, m_activeWorkerCount);
> @@ -1901,6 +1885,46 @@
> if (ATOMIC_INC(&m_completionCount) == 2 * (int)m_numRows)
> m_completionEvent.trigger();
> }
> +
> +void FrameEncoder::collectDynDataRow(CUData& ctu, FrameStats* rowStats)
> +{
> + for (uint32_t i = 0; i < X265_REFINE_INTER_LEVELS; i++)
> + {
> + for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)
> + {
> + int offset = (depth * X265_REFINE_INTER_LEVELS) + i;
> + if (ctu.m_collectCUCount[offset])
> + {
> + rowStats->rowVarDyn[offset] += ctu.m_collectCUVariance[
> offset];
> + rowStats->rowRdDyn[offset] += ctu.m_collectCURd[offset];
> + rowStats->rowCntDyn[offset] +=
> ctu.m_collectCUCount[offset];
> + }
> + }
> + }
> +}
> +
> +void FrameEncoder::collectDynDataFrame()
> +{
> + for (uint32_t row = 0; row < m_numRows; row++)
> + {
> + for (uint32_t refLevel = 0; refLevel < X265_REFINE_INTER_LEVELS;
> refLevel++)
> + {
> + for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)
> + {
> + int offset = (depth * X265_REFINE_INTER_LEVELS) +
> refLevel;
> + int curFrameIndex = m_frame->m_encodeOrder -
> m_top->m_startPoint;
> + int index = (curFrameIndex * X265_REFINE_INTER_LEVELS *
> m_param->maxCUDepth) + offset;
> + if (m_rows[row].rowStats.rowCntDyn[offset])
> + {
> + m_top->m_variance[index] += m_rows[row].rowStats.
> rowVarDyn[offset];
> + m_top->m_rdCost[index] +=
> m_rows[row].rowStats.rowRdDyn[offset];
> + m_top->m_trainingCount[index] += m_rows[row].rowStats.
> rowCntDyn[offset];
> + }
> + }
> + }
> + }
> +}
> +
> void FrameEncoder::computeAvgTrainingData()
> {
> if (m_frame->m_lowres.bScenecut || m_frame->m_lowres.bKeyframe)
> diff -r cc2c5e46f3c8 -r 5587d9a25248 source/encoder/frameencoder.h
> --- a/source/encoder/frameencoder.h Mon May 21 18:42:29 2018 +0530
> +++ b/source/encoder/frameencoder.h Tue May 22 10:17:51 2018 +0530
> @@ -243,6 +243,8 @@
> #if ENABLE_LIBVMAF
> void vmafFrameLevelScore();
> #endif
> + void collectDynDataRow(CUData& ctu, FrameStats* rowStats);
> + void collectDynDataFrame();
> };
> }
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20180525/67e371da/attachment.html>
More information about the x265-devel
mailing list