[x265] [PATCH] MV refinement for multipass encoding

Tue Jun 6 11:48:58 CEST 2017

On Mon, Jun 5, 2017 at 3:45 PM, <ashok at multicorewareinc.com> wrote:

> # HG changeset patch
> # User Ashok Kumar Mishra <ashok at multicorewareinc.com>
> # Date 1496656244 -19800
> #      Mon Jun 05 15:20:44 2017 +0530
> # Node ID c04d02d71f206431b6b6e60460b81dcc85fc5db5
> # Parent  de49a722b256d94c9ba30b5d88459026bea528b8
> MV refinement for multipass encoding
>

Pushed to default branch

>
> diff -r de49a722b256 -r c04d02d71f20 doc/reST/cli.rst
> --- a/doc/reST/cli.rst  Wed May 24 20:01:59 2017 +0530
> +++ b/doc/reST/cli.rst  Mon Jun 05 15:20:44 2017 +0530
> @@ -911,6 +911,12 @@
>         inter modes for blocks of size one smaller than the min-cu-size of
> the
>         incoming analysis data from the previous encode. Default disabled.
>
> +.. option:: --refine-mv
> +
> +       Enables refinement of motion vector for scaled video. Evaluates
> the best
> +       motion vector by searching the surrounding eight integer and
> subpel pixel
> +    positions.
> +
>  Options which affect the transform unit quad-tree, sometimes referred to
>  as the residual quad-tree (RQT).
>
> diff -r de49a722b256 -r c04d02d71f20 source/CMakeLists.txt
> --- a/source/CMakeLists.txt     Wed May 24 20:01:59 2017 +0530
> +++ b/source/CMakeLists.txt     Mon Jun 05 15:20:44 2017 +0530
> @@ -29,7 +29,7 @@
>  option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
>  mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
>  # X265_BUILD must be incremented each time the public API is changed
> -set(X265_BUILD 120)
> +set(X265_BUILD 121)
>  configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
>                 "${PROJECT_BINARY_DIR}/x265.def")
>  configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
> diff -r de49a722b256 -r c04d02d71f20 source/common/param.cpp
> --- a/source/common/param.cpp   Wed May 24 20:01:59 2017 +0530
> +++ b/source/common/param.cpp   Mon Jun 05 15:20:44 2017 +0530
> @@ -280,6 +280,7 @@
>      param->scaleFactor = 0;
>      param->intraRefine = 0;
>      param->interRefine = 0;
> +    param->mvRefine = 0;
>  }
>
>  int x265_param_default_preset(x265_param* param, const char* preset,
> const char* tune)
> @@ -963,6 +964,7 @@
>          OPT("scale-factor") p->scaleFactor = atoi(value);
>          OPT("refine-intra")p->intraRefine = atobool(value);
>          OPT("refine-inter")p->interRefine = atobool(value);
> +        OPT("refine-mv")p->mvRefine = atobool(value);
>          else
>              return X265_PARAM_BAD_NAME;
>      }
> @@ -1685,6 +1687,7 @@
>      s += sprintf(s, " scale-factor=%d", p->scaleFactor);
>      s += sprintf(s, " refine-intra=%d", p->intraRefine);
>      s += sprintf(s, " refine-inter=%d", p->interRefine);
> +    s += sprintf(s, " refine-mv=%d", p->mvRefine);
>      BOOL(p->bLimitSAO, "limit-sao");
>      s += sprintf(s, " ctu-info=%d", p->bCTUInfo);
>  #undef BOOL
> diff -r de49a722b256 -r c04d02d71f20 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp       Wed May 24 20:01:59 2017 +0530
> +++ b/source/encoder/analysis.cpp       Mon Jun 05 15:20:44 2017 +0530
> @@ -2267,14 +2267,16 @@
>                      int cuIdx = (mode.cu.m_cuAddr *
> parentCTU.m_numPartitions) + cuGeom.absPartIdx;
>                      mode.cu.m_mergeFlag[pu.puAbsPartIdx] =
> interDataCTU->mergeFlag[cuIdx + part];
>                      mode.cu.setPUInterDir(interDataCTU->interDir[cuIdx +
> part], pu.puAbsPartIdx, part);
> -                    for (int dir = 0; dir < m_slice->isInterB() + 1;
> dir++)
> +                    for (int list = 0; list < m_slice->isInterB() + 1;
> list++)
>                      {
> -                        mode.cu.setPUMv(dir, interDataCTU->mv[dir][cuIdx
> + part], pu.puAbsPartIdx, part);
> -                        mode.cu.setPURefIdx(dir,
> interDataCTU->refIdx[dir][cuIdx + part], pu.puAbsPartIdx, part);
> -                        mode.cu.m_mvpIdx[dir][pu.puAbsPartIdx] =
> interDataCTU->mvpIdx[dir][cuIdx + part];
> +                        mode.cu.setPUMv(list,
> interDataCTU->mv[list][cuIdx + part], pu.puAbsPartIdx, part);
> +                        mode.cu.setPURefIdx(list,
> interDataCTU->refIdx[list][cuIdx + part], pu.puAbsPartIdx, part);
> +                        mode.cu.m_mvpIdx[list][pu.puAbsPartIdx] =
> interDataCTU->mvpIdx[list][cuIdx + part];
>                      }
>                      if (!mode.cu.m_mergeFlag[pu.puAbsPartIdx])
>                      {
> +                        if (m_param->mvRefine)
> +                            m_me.setSourcePU(*mode.fencYuv, pu.ctuAddr,
> pu.cuAbsPartIdx, pu.puAbsPartIdx, pu.width, pu.height,
> m_param->searchMethod, m_param->subpelRefine, false);
>                          //AMVP
>                          MV mvc[(MD_ABOVE_LEFT + 1) * 2 + 2];
>                          mode.cu.getNeighbourMV(part, pu.puAbsPartIdx,
> mode.interNeighbours);
> @@ -2285,6 +2287,12 @@
>                                  continue;
>                              mode.cu.getPMV(mode.interNeighbours, list,
> ref, mode.amvpCand[list][ref], mvc);
>                              MV mvp = mode.amvpCand[list][ref][mode.
> cu.m_mvpIdx[list][pu.puAbsPartIdx]];
> +                            if (m_param->mvRefine)
> +                            {
> +                                MV outmv;
> +                                searchMV(mode, pu, list, ref, outmv);
> +                                mode.cu.setPUMv(list, outmv,
> pu.puAbsPartIdx, part);
> +                            }
>                              mode.cu.m_mvd[list][pu.puAbsPartIdx] =
> mode.cu.m_mv[list][pu.puAbsPartIdx] - mvp;
>                          }
>                      }
> @@ -2293,7 +2301,6 @@
>                          MVField candMvField[MRG_MAX_NUM_CANDS][2]; //
> double length for mv of both lists
>                          uint8_t candDir[MRG_MAX_NUM_CANDS];
>                          mode.cu.getInterMergeCandidates(pu.puAbsPartIdx,
> part, candMvField, candDir);
> -                        mode.cu.m_mvpIdx[0][pu.puAbsPartIdx] =
> interDataCTU->mvpIdx[0][cuIdx + part];
>                          uint8_t mvpIdx = mode.cu.m_mvpIdx[0][pu.
> puAbsPartIdx];
>                          mode.cu.setPUInterDir(candDir[mvpIdx],
> pu.puAbsPartIdx, part);
>                          mode.cu.setPUMv(0, candMvField[mvpIdx][0].mv,
> pu.puAbsPartIdx, part);
> diff -r de49a722b256 -r c04d02d71f20 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp        Wed May 24 20:01:59 2017 +0530
> +++ b/source/encoder/encoder.cpp        Mon Jun 05 15:20:44 2017 +0530
> @@ -2310,6 +2310,15 @@
>          x265_log(p, X265_LOG_WARNING, "Inter refinement does not support
> limitTU. Disabling limitTU.\n");
>          p->limitTU = 0;
>      }
> +
> +       if (p->mvRefine)
> +    {
> +        if (p->analysisMode != X265_ANALYSIS_LOAD ||
> p->analysisRefineLevel < 10 || !p->scaleFactor)
> +        {
> +            x265_log(p, X265_LOG_WARNING, "MV refinement requires
> analysis load, refine-level 10, scale factor. Disabling inter refine.\n");
> +            p->mvRefine = 0;
> +        }
> +    }
>
>      if ((p->analysisMultiPassRefine || p->analysisMultiPassDistortion)
> && (p->bDistributeModeAnalysis || p->bDistributeMotionEstimation))
>      {
> diff -r de49a722b256 -r c04d02d71f20 source/encoder/motion.cpp
> --- a/source/encoder/motion.cpp Wed May 24 20:01:59 2017 +0530
> +++ b/source/encoder/motion.cpp Mon Jun 05 15:20:44 2017 +0530
> @@ -598,6 +598,139 @@
>      }
>  }
>
> +void MotionEstimate::refineMV(ReferencePlanes* ref,
> +                              const MV&        mvmin,
> +                              const MV&        mvmax,
> +                              const MV&        qmvp,
> +                              MV&              outQMv)
> +{
> +    ALIGN_VAR_16(int, costs[16]);
> +    if (ctuAddr >= 0)
> +        blockOffset = ref->reconPic->getLumaAddr(ctuAddr, absPartIdx) -
> ref->reconPic->getLumaAddr(0);
> +    intptr_t stride = ref->lumaStride;
> +    pixel* fenc = fencPUYuv.m_buf[0];
> +    pixel* fref = ref->fpelPlane[0] + blockOffset;
> +
> +    setMVP(qmvp);
> +
> +    MV qmvmin = mvmin.toQPel();
> +    MV qmvmax = mvmax.toQPel();
> +
> +    /* The term cost used here means satd/sad values for that particular
> search.
> +     * The costs used in ME integer search only includes the SAD cost of
> motion
> +     * residual and sqrtLambda times MVD bits.  The subpel refine steps
> use SATD
> +     * cost of residual and sqrtLambda * MVD bits.
> +    */
> +
> +    // measure SATD cost at clipped QPEL MVP
> +    MV pmv = qmvp.clipped(qmvmin, qmvmax);
> +    MV bestpre = pmv;
> +    int bprecost;
> +
> +    bprecost = subpelCompare(ref, pmv, sad);
> +
> +    /* re-measure full pel rounded MVP with SAD as search start point */
> +    MV bmv = pmv.roundToFPel();
> +    int bcost = bprecost;
> +    if (pmv.isSubpel())
> +        bcost = sad(fenc, FENC_STRIDE, fref + bmv.x + bmv.y * stride,
> stride) + mvcost(bmv << 2);
> +
> +    /* square refine */
> +    int dir = 0;
> +    COST_MV_X4_DIR(0, -1, 0, 1, -1, 0, 1, 0, costs);
> +    if ((bmv.y - 1 >= mvmin.y) & (bmv.y - 1 <= mvmax.y))
> +        COPY2_IF_LT(bcost, costs[0], dir, 1);
> +    if ((bmv.y + 1 >= mvmin.y) & (bmv.y + 1 <= mvmax.y))
> +        COPY2_IF_LT(bcost, costs[1], dir, 2);
> +    COPY2_IF_LT(bcost, costs[2], dir, 3);
> +    COPY2_IF_LT(bcost, costs[3], dir, 4);
> +    COST_MV_X4_DIR(-1, -1, -1, 1, 1, -1, 1, 1, costs);
> +    if ((bmv.y - 1 >= mvmin.y) & (bmv.y - 1 <= mvmax.y))
> +        COPY2_IF_LT(bcost, costs[0], dir, 5);
> +    if ((bmv.y + 1 >= mvmin.y) & (bmv.y + 1 <= mvmax.y))
> +        COPY2_IF_LT(bcost, costs[1], dir, 6);
> +    if ((bmv.y - 1 >= mvmin.y) & (bmv.y - 1 <= mvmax.y))
> +        COPY2_IF_LT(bcost, costs[2], dir, 7);
> +    if ((bmv.y + 1 >= mvmin.y) & (bmv.y + 1 <= mvmax.y))
> +        COPY2_IF_LT(bcost, costs[3], dir, 8);
> +    bmv += square1[dir];
> +
> +    if (bprecost < bcost)
> +    {
> +        bmv = bestpre;
> +        bcost = bprecost;
> +    }
> +    else
> +        bmv = bmv.toQPel(); // promote search bmv to qpel
> +
> +    // TO DO: Change SubpelWorkload to fine tune MV
> +    // Now it is set to 5 for experiment.
> +    // const SubpelWorkload& wl = workload[this->subpelRefine];
> +    const SubpelWorkload& wl = workload[5];
> +
> +    pixelcmp_t hpelcomp;
> +
> +    if (wl.hpel_satd)
> +    {
> +        bcost = subpelCompare(ref, bmv, satd) + mvcost(bmv);
> +        hpelcomp = satd;
> +    }
> +    else
> +        hpelcomp = sad;
> +
> +    for (int iter = 0; iter < wl.hpel_iters; iter++)
> +    {
> +        int bdir = 0;
> +        for (int i = 1; i <= wl.hpel_dirs; i++)
> +        {
> +            MV qmv = bmv + square1[i] * 2;
> +
> +            // check mv range for slice bound
> +            if ((qmv.y < qmvmin.y) | (qmv.y > qmvmax.y))
> +                continue;
> +
> +            int cost = subpelCompare(ref, qmv, hpelcomp) + mvcost(qmv);
> +            COPY2_IF_LT(bcost, cost, bdir, i);
> +        }
> +
> +        if (bdir)
> +            bmv += square1[bdir] * 2;
> +        else
> +            break;
> +    }
> +
> +    /* if HPEL search used SAD, remeasure with SATD before QPEL */
> +    if (!wl.hpel_satd)
> +        bcost = subpelCompare(ref, bmv, satd) + mvcost(bmv);
> +
> +    for (int iter = 0; iter < wl.qpel_iters; iter++)
> +    {
> +        int bdir = 0;
> +        for (int i = 1; i <= wl.qpel_dirs; i++)
> +        {
> +            MV qmv = bmv + square1[i];
> +
> +            // check mv range for slice bound
> +            if ((qmv.y < qmvmin.y) | (qmv.y > qmvmax.y))
> +                continue;
> +
> +            int cost = subpelCompare(ref, qmv, satd) + mvcost(qmv);
> +            COPY2_IF_LT(bcost, cost, bdir, i);
> +        }
> +
> +        if (bdir)
> +            bmv += square1[bdir];
> +        else
> +            break;
> +    }
> +
> +    // check mv range for slice bound
> +    X265_CHECK(((pmv.y >= qmvmin.y) & (pmv.y <= qmvmax.y)), "mv beyond
> range!");
> +
> +    x265_emms();
> +    outQMv = bmv;
> +}
> +
>  int MotionEstimate::motionEstimate(ReferencePlanes *ref,
>                                     const MV &       mvmin,
>                                     const MV &       mvmax,
> diff -r de49a722b256 -r c04d02d71f20 source/encoder/motion.h
> --- a/source/encoder/motion.h   Wed May 24 20:01:59 2017 +0530
> +++ b/source/encoder/motion.h   Mon Jun 05 15:20:44 2017 +0530
> @@ -92,6 +92,7 @@
>                 chromaSatd(refYuv.getCrAddr(puPartIdx), refYuv.m_csize,
> fencPUYuv.m_buf[2], fencPUYuv.m_csize);
>      }
>
> +    void refineMV(ReferencePlanes* ref, const MV& mvmin, const MV& mvmax,
> const MV& qmvp, MV& outQMv);
>      int motionEstimate(ReferencePlanes* ref, const MV & mvmin, const MV
> & mvmax, const MV & qmvp, int numCandidates, const MV * mvc, int merange,
> MV & outQMv, pixel *srcReferencePlane = 0);
>
>      int subpelCompare(ReferencePlanes* ref, const MV &qmv, pixelcmp_t);
> diff -r de49a722b256 -r c04d02d71f20 source/encoder/search.cpp
> --- a/source/encoder/search.cpp Wed May 24 20:01:59 2017 +0530
> +++ b/source/encoder/search.cpp Mon Jun 05 15:20:44 2017 +0530
> @@ -2108,6 +2108,17 @@
>      }
>  }
>
> +void Search::searchMV(Mode& interMode, const PredictionUnit& pu, int
> list, int ref, MV& outmv)
> +{
> +    CUData& cu = interMode.cu;
> +    const Slice *slice = m_slice;
> +    MV mv = cu.m_mv[list][pu.puAbsPartIdx];
> +    cu.clipMv(mv);
> +    MV mvmin, mvmax;
> +    setSearchRange(cu, mv, m_param->searchRange, mvmin, mvmax);
> +    m_me.refineMV(&slice->m_mref[list][ref], mvmin, mvmax, mv, outmv);
> +}
> +
>  /* find the best inter prediction for each PU of specified mode */
>  void Search::predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool
> bChromaMC, uint32_t refMasks[2])
>  {
> diff -r de49a722b256 -r c04d02d71f20 source/encoder/search.h
> --- a/source/encoder/search.h   Wed May 24 20:01:59 2017 +0530
> +++ b/source/encoder/search.h   Mon Jun 05 15:20:44 2017 +0530
> @@ -311,6 +311,7 @@
>      // estimation inter prediction (non-skip)
>      void     predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool
> bChromaMC, uint32_t masks[2]);
>
> +    void     searchMV(Mode& interMode, const PredictionUnit& pu, int
> list, int ref, MV& outmv);
>      // encode residual and compute rd-cost for inter mode
>      void     encodeResAndCalcRdInterCU(Mode& interMode, const CUGeom&
> cuGeom);
>      void     encodeResAndCalcRdSkipCU(Mode& interMode);
> diff -r de49a722b256 -r c04d02d71f20 source/x265.h
> --- a/source/x265.h     Wed May 24 20:01:59 2017 +0530
> +++ b/source/x265.h     Mon Jun 05 15:20:44 2017 +0530
> @@ -1449,6 +1449,9 @@
>      /* Enable inter refinement in load mode*/
>      int       interRefine;
>
> +    /* Enable motion vector refinement in load mode*/
> +    int       mvRefine;
> +
>  } x265_param;
>
>  /* x265_param_alloc:
> diff -r de49a722b256 -r c04d02d71f20 source/x265cli.h
> --- a/source/x265cli.h  Wed May 24 20:01:59 2017 +0530
> +++ b/source/x265cli.h  Mon Jun 05 15:20:44 2017 +0530
> @@ -277,6 +277,8 @@
>      { "dhdr10-info",    required_argument, NULL, 0 },
>      { "dhdr10-opt",           no_argument, NULL, 0},
>      { "no-dhdr10-opt",        no_argument, NULL, 0},
> +    { "refine-mv",            no_argument, NULL, 0 },
> +    { "no-refine-mv",         no_argument, NULL, 0 },
>      { 0, 0, 0, 0 },
>      { 0, 0, 0, 0 },
>      { 0, 0, 0, 0 },
> @@ -448,6 +450,7 @@
>      H0("   --scale-factor <int>          Specify factor by which input
> video is scaled down for analysis save mode. Default %d\n",
> param->scaleFactor);
>      H0("   --[no-]refine-intra           Enable intra refinement for load
> mode. Default %s\n", OPT(param->intraRefine));
>      H0("   --[no-]refine-inter           Enable inter refinement for load
> mode. Default %s\n", OPT(param->interRefine));
> +    H0("   --[no-]refine-mv              Enable mv refinement for load
> mode. Default %s\n", OPT(param->mvRefine));
>      H0("   --aq-mode <integer>           Mode for Adaptive Quantization -
> 0:none 1:uniform AQ 2:auto variance 3:auto variance with bias to dark
> scenes. Default %d\n", param->rc.aqMode);
>      H0("   --aq-strength <float>         Reduces blocking and blurring in
> flat and textured areas (0 to 3.0). Default %.2f\n", param->rc.aqStrength);
>      H0("   --[no-]aq-motion              Adaptive Quantization based on
> the relative motion of each CU w.r.t., frame. Default %s\n",
> OPT(param->bOptCUDeltaQP));
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20170606/9d5f3f16/attachment-0001.html>