[x265] [PATCH] search: dump best motion statistics for P&B slices into analysis file

Steve Borho steve at borho.org
Thu Oct 23 17:52:24 CEST 2014


On 10/23, gopu at multicorewareinc.com wrote:
> # HG changeset patch
> # User Gopu Govindaswamy <gopu at multicorewareinc.com>
> # Date 1414056254 -19800
> #      Thu Oct 23 14:54:14 2014 +0530
> # Node ID 08eb43dca008688aeb8814fd21a8e8cb4b3f9801
> # Parent  ce304756a6e469b94cceef930e62972bd2168e4f
> search: dump best motion statistics for P&B slices into analysis file
> 
> diff -r ce304756a6e4 -r 08eb43dca008 source/common/constants.h
> --- a/source/common/constants.h	Wed Oct 22 23:16:13 2014 -0500
> +++ b/source/common/constants.h	Thu Oct 23 14:54:14 2014 +0530
> @@ -73,6 +73,7 @@
>  #define IF_FILTER_PREC    6                            // Log2 of sum of filter taps
>  #define IF_INTERNAL_OFFS (1 << (IF_INTERNAL_PREC - 1)) // Offset used internally
>  #define SLFASE_CONSTANT  0x5f4e4a53
> +#define MAX_RECURSIVE 85 //maximum recursive call for each cu

space between // and comment

>  extern const int16_t g_lumaFilter[4][NTAPS_LUMA];      // Luma filter taps
>  extern const int16_t g_chromaFilter[8][NTAPS_CHROMA];  // Chroma filter taps
> diff -r ce304756a6e4 -r 08eb43dca008 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp	Wed Oct 22 23:16:13 2014 -0500
> +++ b/source/encoder/analysis.cpp	Thu Oct 23 14:54:14 2014 +0530
> @@ -154,6 +154,9 @@
>      }
>      else
>      {
> +        if (m_param->analysisMode)
> +            m_interAnalysisData = m_frame->m_interData + (ctu.m_cuAddr * MAX_RECURSIVE);
> +
>          if (!m_param->rdLevel)
>          {
>              /* In RD Level 0/1, copy source pixels into the reconstructed block so
> @@ -880,6 +883,11 @@
>      if (md.bestMode != &md.pred[PRED_SPLIT] && m_param->rdLevel)
>          md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPicYuv, cuAddr, cuGeom.encodeIdx);
>  
> +    /* increment the analysisData buffer for each recursive call */
> +    if (m_interAnalysisData)
> +        m_interAnalysisData++;

I would much prefer passing this pointer through the stack.
Perhaps the array could be indexed using cuGeom.childOffset

>      x265_emms(); // TODO: Remove
>  }
>  
> @@ -902,6 +910,11 @@
>  
>          if (!earlySkip)
>          {
> +
> +            /* increment the analysisData buffer for each recursive call */
> +            if (m_interAnalysisData)
> +                m_interAnalysisData++;

there doesn't seem to be any attempt to figure out which inter mode is
best? Many partition shapes may be tried, only one will be selected as
the best inter option.

>              checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, false);
>  
>              if (m_param->bEnableRectInter)
> @@ -1209,7 +1222,7 @@
>          interMode.distortion = primitives.sa8d[sizeIdx](fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
>          interMode.sa8dCost = m_rdCost.calcRdSADCost(interMode.distortion, interMode.sa8dBits);
>      }
> -    else if (predInterSearch(interMode, cuGeom, false, false))
> +    else if (predInterSearch(interMode, cuGeom, false, false, m_interAnalysisData))
>      {
>          interMode.distortion = primitives.sa8d[sizeIdx](fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
>          interMode.sa8dCost = m_rdCost.calcRdSADCost(interMode.distortion, interMode.sa8dBits);
> @@ -1234,7 +1247,7 @@
>          encodeResAndCalcRdInterCU(interMode, cuGeom);
>          checkBestMode(interMode, cuGeom.depth);
>      }
> -    else if (predInterSearch(interMode, cuGeom, bMergeOnly, true))
> +    else if (predInterSearch(interMode, cuGeom, bMergeOnly, true, m_interAnalysisData))
>      {
>          encodeResAndCalcRdInterCU(interMode, cuGeom);
>          checkBestMode(interMode, cuGeom.depth);
> diff -r ce304756a6e4 -r 08eb43dca008 source/encoder/analysis.h
> --- a/source/encoder/analysis.h	Wed Oct 22 23:16:13 2014 -0500
> +++ b/source/encoder/analysis.h	Thu Oct 23 14:54:14 2014 +0530
> @@ -71,6 +71,7 @@
>  
>      ModeDepth m_modeDepth[NUM_CU_DEPTH];
>      bool      m_bTryLossless;
> +    x265_inter_data* m_interAnalysisData;    // To store the inter analysis data per CU
>  
>      Analysis();
>      bool create(ThreadLocalData* tld);
> diff -r ce304756a6e4 -r 08eb43dca008 source/encoder/search.cpp
> --- a/source/encoder/search.cpp	Wed Oct 22 23:16:13 2014 -0500
> +++ b/source/encoder/search.cpp	Thu Oct 23 14:54:14 2014 +0530
> @@ -1903,7 +1903,7 @@
>  
>  /* search of the best candidate for inter prediction
>   * returns true if predYuv was filled with a motion compensated prediction */
> -bool Search::predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bMergeOnly, bool bChroma)
> +bool Search::predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bMergeOnly, bool bChroma, x265_inter_data* interAnalysisData)
>  {
>      CUData* cu = &interMode.cu;
>      Yuv* predYuv = &interMode.predYuv;
> @@ -2038,10 +2038,24 @@
>                      list[l].ref = ref;
>                      list[l].cost = cost;
>                      list[l].bits = bits;
> +                    list[l].costZero = !!(satdCost - m_me.mvcost(outmv));
>                  }
>              }
>          }
>  
> +        if (m_param->analysisMode == X265_ANALYSIS_SAVE && interAnalysisData)
> +        {
> +            for (int32_t i = 0; i < 2; i++)
> +            {
> +                interAnalysisData->costZero[i] = list[i].costZero;
> +                interAnalysisData->mvx[i] = list[i].mv.x;
> +                interAnalysisData->mvy[i] = list[i].mv.y;
> +                interAnalysisData->ref[i] = list[i].ref;
> +            }
> +            interAnalysisData->zOrder = cu->m_absIdxInCTU;
> +            interAnalysisData->depth  = cu->m_depth[0];
> +        }
> +
>          // Bi-directional prediction
>          if (slice->isInterB() && !cu->isBipredRestriction() && list[0].cost != MAX_UINT && list[1].cost != MAX_UINT)
>          {
> diff -r ce304756a6e4 -r 08eb43dca008 source/encoder/search.h
> --- a/source/encoder/search.h	Wed Oct 22 23:16:13 2014 -0500
> +++ b/source/encoder/search.h	Thu Oct 23 14:54:14 2014 +0530
> @@ -146,6 +146,11 @@
>          int ref;
>          uint32_t cost;
>          int bits;
> +
> +        /* require for sharing mode, if cost is zero no need to call subpelCompare
> +         * extract the best cost from mvcost and this is only for actual me, not required for
> +         * lowres me a */
> +        bool costZero;
>      };

the comment seems truncated

>      Search();
> @@ -162,7 +167,7 @@
>      void     checkIntra(Mode& intraMode, const CUGeom& cuGeom, PartSize partSize, uint8_t* sharedModes);
>  
>      // estimation inter prediction (non-skip)
> -    bool     predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bMergeOnly, bool bChroma);
> +    bool     predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bMergeOnly, bool bChroma, x265_inter_data* interAnalysisData);
>      void     parallelInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bChroma);

this approach doesn't play well with --pme, but I think there's an
opportunity here to fix several problems at once.

1. parallelInterSearch is already collecting the best list costs into a
   m_bestME[] structure pair. I'm re-combining the --pme and --no-pme
   code paths so all ME will write to those structs.

2. if we move m_bestME[2] to the Mode structure, then at the end of the
   compress function you will be able to find the best inter ME results
   by: md.bestMode->bestME[] (if the best mode is inter)

3. you only need to check for inter-save in a single place, at the end of
   CU compress function, and ME is unaware it's being snooped

To solve the indexing problem:

top-level:
 compressInterCU_rd5_6(ctu, cuGeom, interAnalysisData);

recursion:
 compressInterCU_rd5_6(parentCTU, childCuData,
                       interAnalysisData + cuGeom.childOffset + subPartIdx);

end-logic:
if (m_param->analysisMode == X265_ANALYSIS_SAVE && interAnalysisData)
{
    if (md.bestMode->cu.m_predMode[0] == MODE_INTER && !md.bestMode->cu.m_mergeFlag[0])
    { 
        for (int32_t i = 0; i < 2; i++)
        {
            interAnalysisData->costZero[i] = md.bestMode->bestME[i].costZero;
            interAnalysisData->mvx[i]      = md.bestMode->bestME[i].mv.x;
            interAnalysisData->mvy[i]      = md.bestMode->bestME[i].mv.y;
            interAnalysisData->ref[i]      = md.bestMode->bestME[i].ref;
        }
        interAnalysisData->zOrder = cuGeom.encodeIdx;
        interAnalysisData->depth  = cuGeom.depth;
    }
    else ?
}

-- 
Steve Borho


More information about the x265-devel mailing list