[x265] [PATCH] Clean up dynamic refinement

Ashok Kumar Mishra ashok at multicorewareinc.com
Mon May 28 09:47:05 CEST 2018


On Fri, May 25, 2018 at 4:31 PM, <bhavna at multicorewareinc.com> wrote:

> # HG changeset patch
> # User Bhavna Hariharan <bhavna at multicorewareinc.com>
> # Date 1527165877 -19800
> #      Thu May 24 18:14:37 2018 +0530
> # Node ID 77d698d854fab725682213c9a39ac91aa632095f
> # Parent  cc2c5e46f3c87d27e3602af30b06ba6a0fbe2704
> Clean up dynamic refinement
>
> This patch does the following:
> 1) Earlier, locks were used to avoid the possibility of race conditions
> while
> copying data from CTU level to frame level. Now, the data is collected for
> each
> row and when the entire frame completes analysis the row data is copied to
> the
> frame. This method eliminates the possibility of a race condition without
> having to employ locks.
> 2) Allocate memory for the CTU infromation from the data pool, this will
> avoid
> fragmentation of data.
>
> diff -r cc2c5e46f3c8 -r 77d698d854fa source/common/common.h
> --- a/source/common/common.h    Mon May 21 18:42:29 2018 +0530
> +++ b/source/common/common.h    Thu May 24 18:14:37 2018 +0530
> @@ -332,6 +332,8 @@
>  #define START_CODE_OVERHEAD 3
>  #define FILLER_OVERHEAD (NAL_TYPE_OVERHEAD + START_CODE_OVERHEAD + 1)
>
> +#define MAX_NUM_DYN_REFINE          (NUM_CU_DEPTH *
> X265_REFINE_INTER_LEVELS)
> +
>  namespace X265_NS {
>
>  enum { SAO_NUM_OFFSET = 4 };
> diff -r cc2c5e46f3c8 -r 77d698d854fa source/common/cudata.cpp
> --- a/source/common/cudata.cpp  Mon May 21 18:42:29 2018 +0530
> +++ b/source/common/cudata.cpp  Thu May 24 18:14:37 2018 +0530
> @@ -317,16 +317,6 @@
>      m_cuAboveLeft = (m_cuLeft && m_cuAbove) ?
> m_encData->getPicCTU(m_cuAddr - widthInCU - 1) : NULL;
>      m_cuAboveRight = (m_cuAbove && ((m_cuAddr % widthInCU) < (widthInCU -
> 1))) ? m_encData->getPicCTU(m_cuAddr - widthInCU + 1) : NULL;
>      memset(m_distortion, 0, m_numPartitions * sizeof(sse_t));
> -
> -    if (m_encData->m_param->bDynamicRefine)
> -    {
> -        int size = m_encData->m_param->maxCUDepth *
> X265_REFINE_INTER_LEVELS;
> -        CHECKED_MALLOC_ZERO(m_collectCURd, uint64_t, size);
> -        CHECKED_MALLOC_ZERO(m_collectCUVariance, uint32_t, size);
> -        CHECKED_MALLOC_ZERO(m_collectCUCount, uint32_t, size);
> -    }
> -fail:
> -    return;
>  }
>
>  // initialize Sub partition
> diff -r cc2c5e46f3c8 -r 77d698d854fa source/common/cudata.h
> --- a/source/common/cudata.h    Mon May 21 18:42:29 2018 +0530
> +++ b/source/common/cudata.h    Thu May 24 18:14:37 2018 +0530
> @@ -353,8 +353,12 @@
>      coeff_t* trCoeffMemBlock;
>      MV*      mvMemBlock;
>      sse_t*   distortionMemBlock;
> +    uint64_t* dynRefineRdBlock;
> +    uint32_t* dynRefCntBlock;
> +    uint32_t* dynRefVarBlock;
>
> -    CUDataMemPool() { charMemBlock = NULL; trCoeffMemBlock = NULL;
> mvMemBlock = NULL; distortionMemBlock = NULL; }
> +    CUDataMemPool() { charMemBlock = NULL; trCoeffMemBlock = NULL;
> mvMemBlock = NULL; distortionMemBlock = NULL;
> +                      dynRefineRdBlock = NULL; dynRefCntBlock = NULL;
> dynRefVarBlock = NULL;}
>
>      bool create(uint32_t depth, uint32_t csp, uint32_t numInstances,
> const x265_param& param)
>      {
> diff -r cc2c5e46f3c8 -r 77d698d854fa source/common/framedata.cpp
> --- a/source/common/framedata.cpp       Mon May 21 18:42:29 2018 +0530
> +++ b/source/common/framedata.cpp       Thu May 24 18:14:37 2018 +0530
> @@ -41,9 +41,25 @@
>      if (param.rc.bStatWrite)
>          m_spsrps = const_cast<RPS*>(sps.spsrps);
>      bool isallocated = m_cuMemPool.create(0, param.internalCsp,
> sps.numCUsInFrame, param);
> +    if (m_param->bDynamicRefine)
> +    {
> +        CHECKED_MALLOC_ZERO(m_cuMemPool.dynRefineRdBlock, uint64_t,
> MAX_NUM_DYN_REFINE * sps.numCUsInFrame);
> +        CHECKED_MALLOC_ZERO(m_cuMemPool.dynRefCntBlock, uint32_t,
> MAX_NUM_DYN_REFINE * sps.numCUsInFrame);
> +        CHECKED_MALLOC_ZERO(m_cuMemPool.dynRefVarBlock, uint32_t,
> MAX_NUM_DYN_REFINE * sps.numCUsInFrame);
> +    }
>      if (isallocated)
> +    {
>          for (uint32_t ctuAddr = 0; ctuAddr < sps.numCUsInFrame; ctuAddr++)
> +        {
> +            if (m_param->bDynamicRefine)
> +            {
> +                m_picCTU[ctuAddr].m_collectCURd =
> m_cuMemPool.dynRefineRdBlock + (ctuAddr * MAX_NUM_DYN_REFINE);
> +                m_picCTU[ctuAddr].m_collectCUVariance =
> m_cuMemPool.dynRefVarBlock + (ctuAddr * MAX_NUM_DYN_REFINE);
> +                m_picCTU[ctuAddr].m_collectCUCount =
> m_cuMemPool.dynRefCntBlock + (ctuAddr * MAX_NUM_DYN_REFINE);
> +            }
>              m_picCTU[ctuAddr].initialize(m_cuMemPool, 0, param, ctuAddr);
> +        }
> +    }
>      else
>          return false;
>      CHECKED_MALLOC_ZERO(m_cuStat, RCStatCU, sps.numCUsInFrame);
> @@ -65,6 +81,12 @@
>  {
>      memset(m_cuStat, 0, sps.numCUsInFrame * sizeof(*m_cuStat));
>      memset(m_rowStat, 0, sps.numCuInHeight * sizeof(*m_rowStat));
> +    if (m_param->bDynamicRefine)
> +    {
> +        memset(m_picCTU->m_collectCURd, 0, MAX_NUM_DYN_REFINE *
> sizeof(uint64_t));
> +        memset(m_picCTU->m_collectCUVariance, 0, MAX_NUM_DYN_REFINE *
> sizeof(uint32_t));
> +        memset(m_picCTU->m_collectCUCount, 0, MAX_NUM_DYN_REFINE *
> sizeof(uint32_t));
> +    }
>  }
>
>  void FrameData::destroy()
> @@ -75,6 +97,12 @@
>
>      m_cuMemPool.destroy();
>
> +    if (m_param->bDynamicRefine)
> +    {
> +        X265_FREE(m_cuMemPool.dynRefineRdBlock);
> +        X265_FREE(m_cuMemPool.dynRefCntBlock);
> +        X265_FREE(m_cuMemPool.dynRefVarBlock);
> +    }
>      X265_FREE(m_cuStat);
>      X265_FREE(m_rowStat);
>      for (int i = 0; i < INTEGRAL_PLANE_NUM; i++)
> diff -r cc2c5e46f3c8 -r 77d698d854fa source/common/framedata.h
> --- a/source/common/framedata.h Mon May 21 18:42:29 2018 +0530
> +++ b/source/common/framedata.h Thu May 24 18:14:37 2018 +0530
> @@ -88,6 +88,11 @@
>      uint64_t    cntInterPu[NUM_CU_DEPTH][INTER_MODES - 1];
>      uint64_t    cntMergePu[NUM_CU_DEPTH][INTER_MODES - 1];
>
> +    /* Feature values per row for dynamic refinement */
> +    uint64_t       rowRdDyn[MAX_NUM_DYN_REFINE];
> +    uint32_t       rowVarDyn[MAX_NUM_DYN_REFINE];
> +    uint32_t       rowCntDyn[MAX_NUM_DYN_REFINE];
> +
>      FrameStats()
>      {
>          memset(this, 0, sizeof(FrameStats));
> diff -r cc2c5e46f3c8 -r 77d698d854fa source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp   Mon May 21 18:42:29 2018 +0530
> +++ b/source/encoder/frameencoder.cpp   Thu May 24 18:14:37 2018 +0530
> @@ -956,6 +956,9 @@
>          }
>      } // end of (m_param->maxSlices > 1)
>
> +    if (m_param->bDynamicRefine && m_top->m_startPoint <=
> m_frame->m_encodeOrder) //Avoid collecting data that will not be used by
> future frames.
> +        collectDynDataFrame();
> +
>      if (m_param->rc.bStatWrite)
>      {
>          int totalI = 0, totalP = 0, totalSkip = 0;
> @@ -1494,31 +1497,12 @@
>
>          // Does all the CU analysis, returns best top level mode decision
>          Mode& best = tld.analysis.compressCTU(*ctu, *m_frame,
> m_cuGeoms[m_ctuGeomMap[cuAddr]], rowCoder);
> -        if (m_param->bDynamicRefine)
> -        {
> -            if (m_top->m_startPoint <= m_frame->m_encodeOrder) // Avoid
> collecting data that will not be used by future frames.
> -            {
> -                ScopedLock dynLock(m_top->m_dynamicRefineLock);
> -                for (uint32_t i = 0; i < X265_REFINE_INTER_LEVELS; i++)
> -                {
> -                    for (uint32_t depth = 0; depth < m_param->maxCUDepth;
> depth++)
> -                    {
> -                        int offset = (depth * X265_REFINE_INTER_LEVELS) +
> i;
> -                        int curFrameIndex = m_frame->m_encodeOrder -
> m_top->m_startPoint;
> -                        int index = (curFrameIndex *
> X265_REFINE_INTER_LEVELS * m_param->maxCUDepth) + offset;
> -                        if (ctu->m_collectCUCount[offset])
> -                        {
> -                            m_top->m_variance[index] +=
> ctu->m_collectCUVariance[offset];
> -                            m_top->m_rdCost[index] +=
> ctu->m_collectCURd[offset];
> -                            m_top->m_trainingCount[index] +=
> ctu->m_collectCUCount[offset];
> -                        }
> -                    }
> -                }
> -            }
> -            X265_FREE_ZERO(ctu->m_collectCUVariance);
> -            X265_FREE_ZERO(ctu->m_collectCURd);
> -            X265_FREE_ZERO(ctu->m_collectCUCount);
> -        }
> +
> +        /* startPoint > encodeOrder is true when the start point changes
> for
> +        a new GOP but few frames from the previous GOP is still
> incomplete.
> +        The data of frames in this interval will not be used by any
> future frames. */
> +        if (m_param->bDynamicRefine && m_top->m_startPoint <=
> m_frame->m_encodeOrder)
> +            collectDynDataRow(*ctu, &curRow.rowStats);
>
>          // take a sample of the current active worker count
>          ATOMIC_ADD(&m_totalActiveWorkerCount, m_activeWorkerCount);
> @@ -1901,6 +1885,46 @@
>      if (ATOMIC_INC(&m_completionCount) == 2 * (int)m_numRows)
>          m_completionEvent.trigger();
>  }
> +
> +void FrameEncoder::collectDynDataRow(CUData& ctu, FrameStats* rowStats)
> +{
> +    for (uint32_t i = 0; i < X265_REFINE_INTER_LEVELS; i++)
> +    {
> +        for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)
> +        {
> +            int offset = (depth * X265_REFINE_INTER_LEVELS) + i;
> +            if (ctu.m_collectCUCount[offset])
> +            {
> +                rowStats->rowVarDyn[offset] += ctu.m_collectCUVariance[
> offset];
> +                rowStats->rowRdDyn[offset] += ctu.m_collectCURd[offset];
> +                rowStats->rowCntDyn[offset] +=
> ctu.m_collectCUCount[offset];
> +            }
> +        }
> +    }
> +}
> +
> +void FrameEncoder::collectDynDataFrame()
> +{
> +    for (uint32_t row = 0; row < m_numRows; row++)
> +    {
> +        for (uint32_t refLevel = 0; refLevel < X265_REFINE_INTER_LEVELS;
> refLevel++)
> +        {
> +            for (uint32_t depth = 0; depth < m_param->maxCUDepth; depth++)
> +            {
> +                int offset = (depth * X265_REFINE_INTER_LEVELS) +
> refLevel;
> +                int curFrameIndex = m_frame->m_encodeOrder -
> m_top->m_startPoint;
> +                int index = (curFrameIndex * X265_REFINE_INTER_LEVELS *
> m_param->maxCUDepth) + offset;
> +                if (m_rows[row].rowStats.rowCntDyn[offset])
> +                {
> +                    m_top->m_variance[index] += m_rows[row].rowStats.
> rowVarDyn[offset];
> +                    m_top->m_rdCost[index] +=
> m_rows[row].rowStats.rowRdDyn[offset];
> +                    m_top->m_trainingCount[index] += m_rows[row].rowStats.
> rowCntDyn[offset];
> +                }
> +            }
> +        }
> +    }
> +}
> +
>  void FrameEncoder::computeAvgTrainingData()
>  {
>      if (m_frame->m_lowres.bScenecut || m_frame->m_lowres.bKeyframe)
> diff -r cc2c5e46f3c8 -r 77d698d854fa source/encoder/frameencoder.h
> --- a/source/encoder/frameencoder.h     Mon May 21 18:42:29 2018 +0530
> +++ b/source/encoder/frameencoder.h     Thu May 24 18:14:37 2018 +0530
> @@ -243,6 +243,8 @@
>  #if ENABLE_LIBVMAF
>      void vmafFrameLevelScore();
>  #endif
> +    void collectDynDataRow(CUData& ctu, FrameStats* rowStats);
> +    void collectDynDataFrame();
>  };
>  }
>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>

Pushed.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20180528/9ca86244/attachment-0001.html>


More information about the x265-devel mailing list