[x265] [PATCH] analysis: avoid motion references not used by split blocks in pme mode

Steve Borho steve at borho.org
Thu Aug 20 05:13:20 CEST 2015


On 08/19, ashok at multicorewareinc.com wrote:
> # HG changeset patch
> # User Ashok Kumar Mishra<ashok at multicorewareinc.com>
> # Date 1439808217 -19800
> #      Mon Aug 17 16:13:37 2015 +0530
> # Node ID 868aa6ce2f9033812cdc1c9c4fae31a8a40a2921
> # Parent  2980141a744a569ad6f60dbebdece76a4eababfd
> analysis: avoid motion references not used by split blocks in pme mode

this looks correct, but I worry it adds some avoidable overhead to the
typical non-pme work-flow

> diff -r 2980141a744a -r 868aa6ce2f90 source/encoder/search.cpp
> --- a/source/encoder/search.cpp	Tue Aug 18 12:45:52 2015 +0530
> +++ b/source/encoder/search.cpp	Mon Aug 17 16:13:37 2015 +0530
> @@ -1962,10 +1962,16 @@
>      /* Perform ME, repeat until no more work is available */
>      do
>      {
> -        if (meId < m_slice->m_numRefIdx[0])
> -            slave.singleMotionEstimation(*this, pme.mode, pme.pu, pme.puIdx, 0, meId);
> +        if (meId < m_refIdxCnt[0])
> +        {
> +            int refIdx = m_bestRefIdx[0][meId]; //L0
> +            slave.singleMotionEstimation(*this, pme.mode, pme.pu, pme.puIdx, 0, refIdx);
> +        }
>          else
> -            slave.singleMotionEstimation(*this, pme.mode, pme.pu, pme.puIdx, 1, meId - m_slice->m_numRefIdx[0]);
> +        {
> +            int refIdx = m_bestRefIdx[1][meId - m_refIdxCnt[0]]; //L1
> +            slave.singleMotionEstimation(*this, pme.mode, pme.pu, pme.puIdx, 1, refIdx);
> +        }
>  
>          meId = -1;
>          pme.m_lock.acquire();
> @@ -2037,8 +2043,6 @@
>      const int* numRefIdx = slice->m_numRefIdx;
>      uint32_t lastMode = 0;
>      int      totalmebits = 0;
> -    int      numME = numRefIdx[0] + numRefIdx[1];
> -    bool     bTryDistributed = m_param->bDistributeMotionEstimation && numME > 2;
>      MV       mvzero(0, 0);
>      Yuv&     tmpPredYuv = m_rqt[cuGeom.depth].tmpPredYuv;
>  
> @@ -2047,6 +2051,32 @@
>  
>      for (int puIdx = 0; puIdx < numPart; puIdx++)
>      {
> +        uint32_t refMask = refMasks[puIdx] ? refMasks[puIdx] : (uint32_t)-1;
> +
> +        m_refIdxCnt[0] = 0; m_refIdxCnt[1] = 0;
> +        for (int list = 0; list < numPredDir; list++)
> +        {
> +            int idx = 0;
> +            for (int ref = 0; ref < numRefIdx[list]; ref++)
> +            {
> +                ProfileCounter(interMode.cu, totalMotionReferences[cuGeom.depth]);
> +
> +                if (!(refMask & (1 << ref)))
> +                {
> +                    ProfileCounter(interMode.cu, skippedMotionReferences[cuGeom.depth]);
> +                    continue;
> +                }
> +                m_bestRefIdx[list][idx++] = ref;
> +            }
> +            m_refIdxCnt[list] = idx;
> +
> +            /* the second list ref bits start at bit 16 */
> +            refMask >>= 16;
> +        }
> +        int numME = m_refIdxCnt[0] + m_refIdxCnt[1];
> +
> +        bool bTryDistributed = m_param->bDistributeMotionEstimation && numME > 2;

my thought on how to fix this was to add an explicit ref list in the PME
class.

  struct {
     int list, ref;
  } m_jobs[2 * MAX_NUM_REF];
  
then within if (m_param->bDistributeMotionEstimation) it would walk the
references and the refmasks and add jobs to pme.m_jobs and incrementing
pme.m_jobTotal. At the end if m_jobTotal is not large enough, then skip
bonding any threads and let it fall through to the serial unidir path
(just as if bonding had failed).

Let me know if you need any further clarifications.

>          MotionData* bestME = interMode.bestME[puIdx];
>          PredictionUnit pu(cu, cuGeom, puIdx);
>  
> @@ -2113,7 +2143,8 @@
>              {
>                  processPME(pme, *this);
>  
> -                singleMotionEstimation(*this, interMode, pu, puIdx, 0, 0); /* L0-0 */
> +                int ref = (m_refIdxCnt[0] == 0) ? m_bestRefIdx[1][0] : m_bestRefIdx[0][0];
> +                singleMotionEstimation(*this, interMode, pu, puIdx, 0, ref); /* L0-0 or L1-0 */
>  
>                  bDoUnidir = false;
>  
> @@ -2126,19 +2157,11 @@
>          }
>          if (bDoUnidir)
>          {
> -            uint32_t refMask = refMasks[puIdx] ? refMasks[puIdx] : (uint32_t)-1;
> -
>              for (int list = 0; list < numPredDir; list++)
>              {
> -                for (int ref = 0; ref < numRefIdx[list]; ref++)
> +                for (int idx = 0; idx < m_refIdxCnt[list]; idx++)
>                  {
> -                    ProfileCounter(interMode.cu, totalMotionReferences[cuGeom.depth]);
> -
> -                    if (!(refMask & (1 << ref)))
> -                    {
> -                        ProfileCounter(interMode.cu, skippedMotionReferences[cuGeom.depth]);
> -                        continue;
> -                    }
> +                    int ref = m_bestRefIdx[list][idx];
>  
>                      uint32_t bits = m_listSelBits[list] + MVP_IDX_BITS;
>                      bits += getTUBits(ref, numRefIdx[list]);
> @@ -2173,8 +2196,6 @@
>                          bestME[list].bits = bits;
>                      }
>                  }
> -                /* the second list ref bits start at bit 16 */
> -                refMask >>= 16;
>              }
>          }
>  
> diff -r 2980141a744a -r 868aa6ce2f90 source/encoder/search.h
> --- a/source/encoder/search.h	Tue Aug 18 12:45:52 2015 +0530
> +++ b/source/encoder/search.h	Mon Aug 17 16:13:37 2015 +0530
> @@ -316,6 +316,9 @@
>      uint32_t        m_numLayers;
>      uint32_t        m_refLagPixels;
>  
> +    int             m_refIdxCnt[2];
> +    int             m_bestRefIdx[2][16];
> +
>  #if DETAILED_CU_STATS
>      /* Accumulate CU statistics separately for each frame encoder */
>      CUStats         m_stats[X265_MAX_FRAME_THREADS];
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho


More information about the x265-devel mailing list