[x265] [PATCH] search: move MVP Index selection to motion estimate to avoid code duplication

Wed Apr 29 20:59:12 CEST 2015

On 04/29, gopu at multicorewareinc.com wrote:
> # HG changeset patch
> # User Gopu Govindaswamy <gopu at multicorewareinc.com>
> # Date 1430293410 -19800
> #      Wed Apr 29 13:13:30 2015 +0530
> # Node ID cc6da5218a188ded2d239bb8dbf6c3399978c93f
> # Parent  c4d9ee2cef03ef74f5623784d514ffcdf725bec4
> search: move MVP Index selection to motion estimate to avoid code duplication
> 
> diff -r c4d9ee2cef03 -r cc6da5218a18 source/encoder/motion.cpp
> --- a/source/encoder/motion.cpp	Tue Apr 28 14:34:45 2015 -0500
> +++ b/source/encoder/motion.cpp	Wed Apr 29 13:13:30 2015 +0530
> @@ -1280,3 +1280,45 @@
>  
>      return cost;
>  }
> +
> +int MotionEstimate::getBestmvpIdx(const PredictionUnit& pu, Yuv& dstYuv, const PicYuv& refPic, const MV* amvp, const int merange, const bool bFrameParallel)
> +{
> +    pixel* dst = dstYuv.getLumaAddr(pu.puAbsPartIdx);
> +    intptr_t dstStride = dstYuv.m_size;
> +    int mvpidx = 0;
> +    int mvpcost = MotionEstimate::COST_MAX;
> +    intptr_t srcStride = refPic.m_stride;
> +    int cost;

eh, I fail to see how this is avoiding code duplication when you've
copied the motion compensation code below, when this class already
subpelCompare()

> +    if (amvp[0] == amvp[1])
> +        mvpidx = 0;

this should be return 0;

> +    for (int i = 0; i < AMVP_NUM_CANDS; i++)
> +    {
> +        int xFrac = amvp[i].x & 0x3;
> +        int yFrac = amvp[i].y & 0x3;
> +        intptr_t srcOffset = (amvp[i].x >> 2) + (amvp[i].y >> 2) * srcStride;
> +        const pixel* src = refPic.getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + srcOffset;

this would crash since the MVP is no longer being clipped to the
available pixels

> +        // NOTE: skip mvCand if Y is > merange and -FN>1
> +        if (bFrameParallel && (amvp[i].y >= (merange + 1) * 4))
> +            continue;
> +
> +        if (!(yFrac | xFrac))
> +            primitives.pu[partEnum].copy_pp(dst, dstStride, src, srcStride);
> +        else if (!yFrac)
> +            primitives.pu[partEnum].luma_hpp(src, srcStride, dst, dstStride, xFrac);
> +        else if (!xFrac)
> +            primitives.pu[partEnum].luma_vpp(src, srcStride, dst, dstStride, yFrac);
> +        else
> +            primitives.pu[partEnum].luma_hvpp(src, srcStride, dst, dstStride, xFrac, yFrac);
> +
> +        cost = bufSAD(dst, dstStride);
> +        if (cost < mvpcost)
> +        {
> +            mvpcost = cost;
> +            mvpidx = i;
> +        }
> +    }
> +    return mvpidx;
> +}
> diff -r c4d9ee2cef03 -r cc6da5218a18 source/encoder/motion.h
> --- a/source/encoder/motion.h	Tue Apr 28 14:34:45 2015 -0500
> +++ b/source/encoder/motion.h	Wed Apr 29 13:13:30 2015 +0530
> @@ -29,6 +29,7 @@
>  #include "mv.h"
>  #include "bitcost.h"
>  #include "yuv.h"
> +#include "predict.h"
>  
>  namespace x265 {
>  // private x265 namespace
> @@ -93,6 +94,8 @@
>  
>      int subpelCompare(ReferencePlanes* ref, const MV &qmv, pixelcmp_t);
>  
> +    int getBestmvpIdx(const PredictionUnit& pu, Yuv& dstYuv, const PicYuv& refPic, const MV* amvp, const int merange, const bool bFrameParallel);
> +
>  protected:
>  
>      inline void StarPatternSearch(ReferencePlanes *ref,
> diff -r c4d9ee2cef03 -r cc6da5218a18 source/encoder/search.cpp
> --- a/source/encoder/search.cpp	Tue Apr 28 14:34:45 2015 -0500
> +++ b/source/encoder/search.cpp	Wed Apr 29 13:13:30 2015 +0530
> @@ -1923,38 +1923,17 @@
>      MV mvc[(MD_ABOVE_LEFT + 1) * 2 + 1];
>      int numMvc = interMode.cu.getPMV(interMode.interNeighbours, list, ref, interMode.amvpCand[list][ref], mvc);
>  
> -    int mvpIdx = 0;
>      int merange = m_param->searchRange;
>      MotionData* bestME = interMode.bestME[part];
> -
> -    if (interMode.amvpCand[list][ref][0] != interMode.amvpCand[list][ref][1])
> -    {
> -        uint32_t bestCost = MAX_INT;
> -        for (int i = 0; i < AMVP_NUM_CANDS; i++)
> -        {
> -            MV mvCand = interMode.amvpCand[list][ref][i];
> -
> -            // NOTE: skip mvCand if Y is > merange and -FN>1
> -            if (m_bFrameParallel && (mvCand.y >= (merange + 1) * 4))
> -                continue;
> -
> -            interMode.cu.clipMv(mvCand);

this clip here prevents crashes

> -            Yuv& tmpPredYuv = m_rqt[cuGeom.depth].tmpPredYuv;
> -            predInterLumaPixel(pu, tmpPredYuv, *m_slice->m_refPicList[list][ref]->m_reconPic, mvCand);
> -            uint32_t cost = m_me.bufSAD(tmpPredYuv.getLumaAddr(pu.puAbsPartIdx), tmpPredYuv.m_size);
> -
> -            if (bestCost > cost)
> -            {
> -                bestCost = cost;
> -                mvpIdx = i;
> -            }
> -        }
> -    }
> -
> -    MV mvmin, mvmax, outmv, mvp = interMode.amvpCand[list][ref][mvpIdx];
> +    MV mvmin, mvmax, outmv, mvp;
> +    MV *amvpCand = interMode.amvpCand[list][ref];
> +
> +    // Pick the best possible MVP IDX from AMVP candidates based on least residual
> +    Yuv& tmpPredYuv = m_rqt[cuGeom.depth].tmpPredYuv;
> +    int mvpIdx = m_me.getBestmvpIdx(pu, tmpPredYuv, *m_slice->m_refPicList[list][ref]->m_reconPic, amvpCand, merange, m_bFrameParallel);
> +    mvp = amvpCand[mvpIdx];
> +
>      setSearchRange(interMode.cu, mvp, merange, mvmin, mvmax);
> -
>      int satdCost = m_me.motionEstimate(&m_slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv);
>  
>      /* Get total cost of partition, but only include MV bit cost once */
> @@ -1962,7 +1941,7 @@
>      uint32_t cost = (satdCost - m_me.mvcost(outmv)) + m_rdCost.getCost(bits);
>  
>      /* Refine MVP selection, updates: mvp, mvpIdx, bits, cost */
> -    checkBestMVP(interMode.amvpCand[list][ref], outmv, mvp, mvpIdx, bits, cost);
> +    checkBestMVP(amvpCand, outmv, mvp, mvpIdx, bits, cost);
>  
>      /* tie goes to the smallest ref ID, just like --no-pme */
>      ScopedLock _lock(master.m_meLock);
> @@ -2051,46 +2030,23 @@
>                  bits += getTUBits(ref, numRefIdx[list]);
>  
>                  int numMvc = cu.getPMV(interMode.interNeighbours, list, ref, interMode.amvpCand[list][ref], mvc);
> -
> -                // Pick the best possible MVP from AMVP candidates based on least residual
> -                int mvpIdx = 0;
>                  int merange = m_param->searchRange;
> -
> -                if (interMode.amvpCand[list][ref][0] != interMode.amvpCand[list][ref][1])
> -                {
> -                    uint32_t bestCost = MAX_INT;
> -                    for (int i = 0; i < AMVP_NUM_CANDS; i++)
> -                    {
> -                        MV mvCand = interMode.amvpCand[list][ref][i];
> -
> -                        // NOTE: skip mvCand if Y is > merange and -FN>1
> -                        if (m_bFrameParallel && (mvCand.y >= (merange + 1) * 4))
> -                            continue;
> -
> -                        cu.clipMv(mvCand);
> -                        predInterLumaPixel(pu, tmpPredYuv, *slice->m_refPicList[list][ref]->m_reconPic, mvCand);
> -                        uint32_t cost = m_me.bufSAD(tmpPredYuv.getLumaAddr(pu.puAbsPartIdx), tmpPredYuv.m_size);
> -
> -                        if (bestCost > cost)
> -                        {
> -                            bestCost = cost;
> -                            mvpIdx = i;
> -                        }
> -                    }
> -                }
> -
> -                MV mvmin, mvmax, outmv, mvp = interMode.amvpCand[list][ref][mvpIdx];
> -
> -                int satdCost;
> +                MV mvmin, mvmax, outmv, mvp;
> +                MV *amvpCand = interMode.amvpCand[list][ref];
> +
> +                // Pick the best possible MVP IDX from AMVP candidates based on least residual
> +                int mvpIdx = m_me.getBestmvpIdx(pu, tmpPredYuv, *slice->m_refPicList[list][ref]->m_reconPic, amvpCand, merange, m_bFrameParallel);

I made an attempt at doing this using the existing methods in motion.cpp
but there is this chicken and egg issue with MV clipping and the search
range mvmin/mvmax generation that make it impossible to do without
making motion.cpp aware of a whole pile of other classes, and I'm not
sure it is worth the trouble.

The idea was to merge MVP selection with the logic that picks between
the MVP and 0 as the search origin. It doesn't look possible to do this
in motion.cpp but perhaps it can be done as a helper function in
search.cpp.  I'll try this approach and see if it goes anywhere

> +                mvp = amvpCand[mvpIdx];
> +
>                  setSearchRange(cu, mvp, merange, mvmin, mvmax);
> -                satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv);
> +                int satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv);
>  
>                  /* Get total cost of partition, but only include MV bit cost once */
>                  bits += m_me.bitcost(outmv);
>                  uint32_t cost = (satdCost - m_me.mvcost(outmv)) + m_rdCost.getCost(bits);
>  
>                  /* Refine MVP selection, updates: mvp, mvpIdx, bits, cost */
> -                checkBestMVP(interMode.amvpCand[list][ref], outmv, mvp, mvpIdx, bits, cost);
> +                checkBestMVP(amvpCand, outmv, mvp, mvpIdx, bits, cost);
>  
>                  if (cost < bestME[list].cost)
>                  {
> @@ -2134,35 +2090,13 @@
>                      bits += getTUBits(ref, numRefIdx[list]);
>  
>                      int numMvc = cu.getPMV(interMode.interNeighbours, list, ref, interMode.amvpCand[list][ref], mvc);
> -
> -                    // Pick the best possible MVP from AMVP candidates based on least residual
> -                    int mvpIdx = 0;
>                      int merange = m_param->searchRange;
> -
> -                    if (interMode.amvpCand[list][ref][0] != interMode.amvpCand[list][ref][1])
> -                    {
> -                        uint32_t bestCost = MAX_INT;
> -                        for (int i = 0; i < AMVP_NUM_CANDS; i++)
> -                        {
> -                            MV mvCand = interMode.amvpCand[list][ref][i];
> -
> -                            // NOTE: skip mvCand if Y is > merange and -FN>1
> -                            if (m_bFrameParallel && (mvCand.y >= (merange + 1) * 4))
> -                                continue;
> -
> -                            cu.clipMv(mvCand);
> -                            predInterLumaPixel(pu, tmpPredYuv, *slice->m_refPicList[list][ref]->m_reconPic, mvCand);
> -                            uint32_t cost = m_me.bufSAD(tmpPredYuv.getLumaAddr(pu.puAbsPartIdx), tmpPredYuv.m_size);
> -
> -                            if (bestCost > cost)
> -                            {
> -                                bestCost = cost;
> -                                mvpIdx = i;
> -                            }
> -                        }
> -                    }
> -
> -                    MV mvmin, mvmax, outmv, mvp = interMode.amvpCand[list][ref][mvpIdx];
> +                    MV mvmin, mvmax, outmv, mvp;
> +                    MV *amvpCand = interMode.amvpCand[list][ref];
> +
> +                    // Pick the best possible MVP IDX from AMVP candidates based on least residual
> +                    int mvpIdx = m_me.getBestmvpIdx(pu, tmpPredYuv, *slice->m_refPicList[list][ref]->m_reconPic, amvpCand, merange, m_bFrameParallel);
> +                    mvp = amvpCand[mvpIdx];
>  
>                      setSearchRange(cu, mvp, merange, mvmin, mvmax);
>                      int satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv);
> @@ -2172,7 +2106,7 @@
>                      uint32_t cost = (satdCost - m_me.mvcost(outmv)) + m_rdCost.getCost(bits);
>  
>                      /* Refine MVP selection, updates: mvp, mvpIdx, bits, cost */
> -                    checkBestMVP(interMode.amvpCand[list][ref], outmv, mvp, mvpIdx, bits, cost);
> +                    checkBestMVP(amvpCand, outmv, mvp, mvpIdx, bits, cost);
>  
>                      if (cost < bestME[list].cost)
>                      {
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho