[x265] [PATCH] analysis: add an additional round of sub-pel refinement for inter 2Nx2N in rd 5 and 6

Steve Borho steve at borho.org
Thu May 21 16:37:43 CEST 2015


On 05/21, santhoshini at multicorewareinc.com wrote:
> # HG changeset patch
> # User Santhoshini Sekar<santhoshini at multicorewareinc.com>
> # Date 1432182660 -19800
> #      Thu May 21 10:01:00 2015 +0530
> # Node ID 630b378b744f4bf442839680f5120d7d299d2acd
> # Parent  dc4fcfc574ade14ecc841797ad08be9753fad58e
> analysis: add an additional round of sub-pel refinement for inter 2Nx2N in rd 5 and 6
> 
> diff -r dc4fcfc574ad -r 630b378b744f source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp	Wed May 20 12:17:44 2015 -0500
> +++ b/source/encoder/analysis.cpp	Thu May 21 10:01:00 2015 +0530
> @@ -742,6 +742,24 @@
>          cuStat.avgCost[depth] = (temp + md.bestMode->rdCost) / cuStat.count[depth];
>      }
>  
> +    /* If zero-residual, do not bother doing subpelRefine */
> +    bool subpelRefine = !!(md.bestMode->cu.m_predMode[0] & MODE_INTER) && !(md.bestMode->cu.m_mergeFlag[0]) && (md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N) && (md.bestMode->cu.m_cuDepth[0] == depth);

these long expressions should be broken into two lines
'subpelRefine' is not a very descriptive term, this is rd-refine since
it is refining based on RD cost

> +    if (subpelRefine && m_param->rdLevel > 4)
> +    {
> +        int hpelDirs = MotionEstimate::hpelDirCount(m_param->subpelRefine);
> +        if (m_slice->m_pps->bUseDQP && depth <= m_slice->m_pps->maxCuDQPDepth)
> +            setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, cuGeom));
> +        uint64_t bcost = md.bestMode->rdCost;
> +        int bdir = 0;
> +        for (int i = 1; i <= hpelDirs; i++)
> +        {
> +            qPelRefine(*md.bestMode, cuGeom, true, i);
> +            encodeResAndCalcRdInterCU(*md.bestMode, cuGeom);
> +            COPY2_IF_LT(bcost, md.bestMode->rdCost, bdir, i);
> +        }
> +        qPelRefine(*md.bestMode, cuGeom, true, bdir);
> +        encodeResAndCalcRdInterCU(*md.bestMode, cuGeom);
> +    }
>      /* Copy best data to encData CTU and recon */
>      md.bestMode->cu.copyToPic(depth);
>      if (md.bestMode != &md.pred[PRED_SPLIT])
> @@ -1312,6 +1330,24 @@
>          checkBestMode(*splitPred, depth);
>      }
>  
> +    /* If zero-residual, do not bother doing subpelRefine */
> +    bool subpelRefine = !!(md.bestMode->cu.m_predMode[0] & MODE_INTER) && !(md.bestMode->cu.m_mergeFlag[0]) && (md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N) && (md.bestMode->cu.m_cuDepth[0] == depth);
> +    if (subpelRefine)
> +    {
> +        int hpelDirs = MotionEstimate::hpelDirCount(m_param->subpelRefine);
> +        uint64_t bcost = md.bestMode->rdCost;
> +        int bdir = 0;
> +        if (m_slice->m_pps->bUseDQP && depth <= m_slice->m_pps->maxCuDQPDepth)
> +            setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, cuGeom));
> +        for (int i = 1; i <= hpelDirs; i++)
> +        {
> +            qPelRefine(*md.bestMode, cuGeom, true, i);
> +            encodeResAndCalcRdInterCU(*md.bestMode, cuGeom);
> +            COPY2_IF_LT(bcost, md.bestMode->rdCost, bdir, i);
> +        }
> +        qPelRefine(*md.bestMode, cuGeom, true, bdir);
> +        encodeResAndCalcRdInterCU(*md.bestMode, cuGeom);
> +    }
>      /* Copy best data to encData CTU and recon */
>      md.bestMode->cu.copyToPic(depth);
>      if (md.bestMode != &md.pred[PRED_SPLIT])
> diff -r dc4fcfc574ad -r 630b378b744f source/encoder/motion.cpp
> --- a/source/encoder/motion.cpp	Wed May 20 12:17:44 2015 -0500
> +++ b/source/encoder/motion.cpp	Thu May 21 10:01:00 2015 +0530
> @@ -155,6 +155,11 @@
>             workload[subme].qpel_iters / 2;
>  }
>  
> +int MotionEstimate::hpelDirCount(int subme)
> +{
> +    return workload[subme].hpel_dirs;
> +}
> +
>  MotionEstimate::~MotionEstimate()
>  {
>      fencPUYuv.destroy();
> @@ -1205,6 +1210,49 @@
>      return bcost;
>  }
>  
> +int MotionEstimate::qPelCompare(ReferencePlanes *ref,
> +                                   const MV &       mvmin,
> +                                   const MV &       mvmax,
> +                                   const MV&        mvp,
> +                                   const MV&        mv,
> +                                   MV &             outQMv,
> +                                   int halfPelIdx)
> +{
> +    setMVP(mvp);

no idea why this function is necessary

> +    MV qmvmin = mvmin.toQPel();
> +    MV qmvmax = mvmax.toQPel();
> +
> +    MV fmv = mv.roundToFPel();
> +    fmv = fmv.clipped(qmvmin, qmvmax);
> +    int bcost = INT_MAX;
> +    const SubpelWorkload& wl = workload[this->subpelRefine];
> +
> +    MV hmv = fmv + square1[halfPelIdx] * 2;
> +    bcost = subpelCompare(ref, hmv, satd) + mvcost(hmv);
> +    MV bmv = hmv;
> +
> +    for (int iter = 0; iter < wl.qpel_iters; iter++)
> +    {
> +        int bdir = 0;
> +        for (int i = 1; i <= wl.qpel_dirs; i++)
> +        {
> +            MV qmv = hmv + square1[i];
> +            int cost = subpelCompare(ref, qmv, satd) + mvcost(qmv);
> +            COPY2_IF_LT(bcost, cost, bdir, i);
> +        }
> +
> +        if (bdir)
> +            bmv += square1[bdir];
> +        else
> +            break;
> +    }
> +
> +    x265_emms();
> +    outQMv = bmv;
> +    return bcost;
> +}
> +
>  int MotionEstimate::subpelCompare(ReferencePlanes *ref, const MV& qmv, pixelcmp_t cmp)
>  {
>      intptr_t refStride = ref->lumaStride;
> diff -r dc4fcfc574ad -r 630b378b744f source/encoder/motion.h
> --- a/source/encoder/motion.h	Wed May 20 12:17:44 2015 -0500
> +++ b/source/encoder/motion.h	Thu May 21 10:01:00 2015 +0530
> @@ -69,6 +69,7 @@
>  
>      static void initScales();
>      static int hpelIterationCount(int subme);
> +    static int hpelDirCount(int subme);
>      void init(int method, int refine, int csp);
>  
>      /* Methods called at slice setup */
> @@ -90,6 +91,7 @@
>      }
>  
>      int motionEstimate(ReferencePlanes* ref, const MV & mvmin, const MV & mvmax, const MV & qmvp, int numCandidates, const MV * mvc, int merange, MV & outQMv);
> +    int qPelCompare(ReferencePlanes* ref, const MV & mvmin, const MV & mvmax, const MV & mvp, const MV & mv, MV & outQMv, int halfPelIdx);
>  
>      int subpelCompare(ReferencePlanes* ref, const MV &qmv, pixelcmp_t);
>  
> diff -r dc4fcfc574ad -r 630b378b744f source/encoder/search.cpp
> --- a/source/encoder/search.cpp	Wed May 20 12:17:44 2015 -0500
> +++ b/source/encoder/search.cpp	Thu May 21 10:01:00 2015 +0530
> @@ -2348,6 +2348,54 @@
>      interMode.sa8dBits += totalmebits;
>  }
>  
> +void Search::qPelRefine(Mode& interMode, const CUGeom& cuGeom, bool bChromaSA8D, int halfpelIdx)
> +{
> +    CUData& cu = interMode.cu;
> +    Yuv* predYuv = &interMode.predYuv;
> +
> +    const Slice *slice = m_slice;
> +    uint32_t interDir = cu.m_interDir[0];
> +
> +    const int* numRefIdx = slice->m_numRefIdx;
> +
> +    MotionData* bestME = interMode.bestME[0];
> +    PredictionUnit pu(cu, cuGeom, 0);
> +
> +    for (uint32_t list = 0; list < 2; list++)
> +    {
> +        if (interDir & (1 << list))
> +        {
> +            int ref = bestME[list].ref;
> +            uint32_t bits = m_listSelBits[list] + MVP_IDX_BITS;
> +            bits += getTUBits(ref, numRefIdx[list]);
> +
> +            int merange = m_param->searchRange;
> +
> +            MV mvmin, mvmax, outmv, mvp = interMode.bestME[0][0].mvp;
> +            MV mv = interMode.bestME[0][0].mv;
> +
> +            int satdCost;
> +            setSearchRange(cu, mv, merange, mvmin, mvmax);
> +            satdCost = m_me.qPelCompare(&slice->m_mref[list][ref], mvmin, mvmax, mvp, mv, outmv, halfpelIdx);
> +
> +            /* Get total cost of partition, but only include MV bit cost once */
> +            bits += m_me.bitcost(outmv);
> +            uint32_t cost = (satdCost - m_me.mvcost(outmv)) + m_rdCost.getCost(bits);
> +
> +            if (cost < bestME[list].cost)
> +            {
> +                bestME[list].mv = outmv;
> +                bestME[list].cost = cost;
> +                bestME[list].bits = bits;
> +            }
> +        }
> +    }
> +    motionCompensation(cu, pu, *predYuv, true, bChromaSA8D);

1. bidir refinement should a different feature, it needs to get built
   into predInterSearch() itself. rd-refine should only be used to
   refine a unidir prediction.

2. Doing all this work to update bestME[] satd and bits costs is a waste
   of time.

3. you're not writing the new MV into the CU, so motionCompensation
   below is just re-generating the original prediction for this CU,
   ignoring bestME

4. the whole point of rd-refine is to measure the full RD cost at each
   offset. using SATD to pick a direction in qPelCompare is defeating
   the point. All this function should be doing for hpel refine is:

   cu.m_mv[0][0] = interMode.bestME[0][0].mv + square1[halfPelIdx] * 2;
   motionCompensation(cu, pu, *predYuv, true, true);

   and qpel refine:

   cu.m_mv[0][0] = interMode.bestME[0][0].mv + square1[halfPelIdx];
   motionCompensation(cu, pu, *predYuv, true, true);

   it probably doesn't need to be a function at all


At the end, the final refined MV must fully configured in the CU with:

   cu.setPUMv(0, cu.m_mv[0][0], 0, 0);
   cu.m_mvd[0][0] = cu.m_mvd[0][0].mvp - cu.m_mv[0][0];

And, thinking out loud, m_mvd probably needs to be removed from CUData
since it can always be calculated by subtracting the MV from its MVP.
Cacheing it has no useful value, except to trip us up when we forget to
update it (we are not even setting it on bidir CUs now)

> +
> +    X265_CHECK(interMode.ok(), "inter mode is not ok");
> +}
> +
>  void Search::getBlkBits(PartSize cuMode, bool bPSlice, int partIdx, uint32_t lastMode, uint32_t blockBit[3])
>  {
>      if (cuMode == SIZE_2Nx2N)
> diff -r dc4fcfc574ad -r 630b378b744f source/encoder/search.h
> --- a/source/encoder/search.h	Wed May 20 12:17:44 2015 -0500
> +++ b/source/encoder/search.h	Thu May 21 10:01:00 2015 +0530
> @@ -311,6 +311,7 @@
>  
>      // estimation inter prediction (non-skip)
>      void     predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bChromaMC, uint32_t masks[2]);
> +    void     qPelRefine(Mode& interMode, const CUGeom& cuGeom, bool bChroma, int halfpelIdx);
>  
>      // encode residual and compute rd-cost for inter mode
>      void     encodeResAndCalcRdInterCU(Mode& interMode, const CUGeom& cuGeom);
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho


More information about the x265-devel mailing list