[x265] [PATCH] search: move MVP Index selection to motion estimate to avoid code duplication
Steve Borho
steve at borho.org
Wed Apr 29 20:59:12 CEST 2015
On 04/29, gopu at multicorewareinc.com wrote:
> # HG changeset patch
> # User Gopu Govindaswamy <gopu at multicorewareinc.com>
> # Date 1430293410 -19800
> # Wed Apr 29 13:13:30 2015 +0530
> # Node ID cc6da5218a188ded2d239bb8dbf6c3399978c93f
> # Parent c4d9ee2cef03ef74f5623784d514ffcdf725bec4
> search: move MVP Index selection to motion estimate to avoid code duplication
>
> diff -r c4d9ee2cef03 -r cc6da5218a18 source/encoder/motion.cpp
> --- a/source/encoder/motion.cpp Tue Apr 28 14:34:45 2015 -0500
> +++ b/source/encoder/motion.cpp Wed Apr 29 13:13:30 2015 +0530
> @@ -1280,3 +1280,45 @@
>
> return cost;
> }
> +
> +int MotionEstimate::getBestmvpIdx(const PredictionUnit& pu, Yuv& dstYuv, const PicYuv& refPic, const MV* amvp, const int merange, const bool bFrameParallel)
> +{
> + pixel* dst = dstYuv.getLumaAddr(pu.puAbsPartIdx);
> + intptr_t dstStride = dstYuv.m_size;
> + int mvpidx = 0;
> + int mvpcost = MotionEstimate::COST_MAX;
> + intptr_t srcStride = refPic.m_stride;
> + int cost;
eh, I fail to see how this is avoiding code duplication when you've
copied the motion compensation code below, when this class already
subpelCompare()
> + if (amvp[0] == amvp[1])
> + mvpidx = 0;
this should be return 0;
> + for (int i = 0; i < AMVP_NUM_CANDS; i++)
> + {
> + int xFrac = amvp[i].x & 0x3;
> + int yFrac = amvp[i].y & 0x3;
> + intptr_t srcOffset = (amvp[i].x >> 2) + (amvp[i].y >> 2) * srcStride;
> + const pixel* src = refPic.getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx) + srcOffset;
this would crash since the MVP is no longer being clipped to the
available pixels
> + // NOTE: skip mvCand if Y is > merange and -FN>1
> + if (bFrameParallel && (amvp[i].y >= (merange + 1) * 4))
> + continue;
> +
> + if (!(yFrac | xFrac))
> + primitives.pu[partEnum].copy_pp(dst, dstStride, src, srcStride);
> + else if (!yFrac)
> + primitives.pu[partEnum].luma_hpp(src, srcStride, dst, dstStride, xFrac);
> + else if (!xFrac)
> + primitives.pu[partEnum].luma_vpp(src, srcStride, dst, dstStride, yFrac);
> + else
> + primitives.pu[partEnum].luma_hvpp(src, srcStride, dst, dstStride, xFrac, yFrac);
> +
> + cost = bufSAD(dst, dstStride);
> + if (cost < mvpcost)
> + {
> + mvpcost = cost;
> + mvpidx = i;
> + }
> + }
> + return mvpidx;
> +}
> diff -r c4d9ee2cef03 -r cc6da5218a18 source/encoder/motion.h
> --- a/source/encoder/motion.h Tue Apr 28 14:34:45 2015 -0500
> +++ b/source/encoder/motion.h Wed Apr 29 13:13:30 2015 +0530
> @@ -29,6 +29,7 @@
> #include "mv.h"
> #include "bitcost.h"
> #include "yuv.h"
> +#include "predict.h"
>
> namespace x265 {
> // private x265 namespace
> @@ -93,6 +94,8 @@
>
> int subpelCompare(ReferencePlanes* ref, const MV &qmv, pixelcmp_t);
>
> + int getBestmvpIdx(const PredictionUnit& pu, Yuv& dstYuv, const PicYuv& refPic, const MV* amvp, const int merange, const bool bFrameParallel);
> +
> protected:
>
> inline void StarPatternSearch(ReferencePlanes *ref,
> diff -r c4d9ee2cef03 -r cc6da5218a18 source/encoder/search.cpp
> --- a/source/encoder/search.cpp Tue Apr 28 14:34:45 2015 -0500
> +++ b/source/encoder/search.cpp Wed Apr 29 13:13:30 2015 +0530
> @@ -1923,38 +1923,17 @@
> MV mvc[(MD_ABOVE_LEFT + 1) * 2 + 1];
> int numMvc = interMode.cu.getPMV(interMode.interNeighbours, list, ref, interMode.amvpCand[list][ref], mvc);
>
> - int mvpIdx = 0;
> int merange = m_param->searchRange;
> MotionData* bestME = interMode.bestME[part];
> -
> - if (interMode.amvpCand[list][ref][0] != interMode.amvpCand[list][ref][1])
> - {
> - uint32_t bestCost = MAX_INT;
> - for (int i = 0; i < AMVP_NUM_CANDS; i++)
> - {
> - MV mvCand = interMode.amvpCand[list][ref][i];
> -
> - // NOTE: skip mvCand if Y is > merange and -FN>1
> - if (m_bFrameParallel && (mvCand.y >= (merange + 1) * 4))
> - continue;
> -
> - interMode.cu.clipMv(mvCand);
this clip here prevents crashes
> - Yuv& tmpPredYuv = m_rqt[cuGeom.depth].tmpPredYuv;
> - predInterLumaPixel(pu, tmpPredYuv, *m_slice->m_refPicList[list][ref]->m_reconPic, mvCand);
> - uint32_t cost = m_me.bufSAD(tmpPredYuv.getLumaAddr(pu.puAbsPartIdx), tmpPredYuv.m_size);
> -
> - if (bestCost > cost)
> - {
> - bestCost = cost;
> - mvpIdx = i;
> - }
> - }
> - }
> -
> - MV mvmin, mvmax, outmv, mvp = interMode.amvpCand[list][ref][mvpIdx];
> + MV mvmin, mvmax, outmv, mvp;
> + MV *amvpCand = interMode.amvpCand[list][ref];
> +
> + // Pick the best possible MVP IDX from AMVP candidates based on least residual
> + Yuv& tmpPredYuv = m_rqt[cuGeom.depth].tmpPredYuv;
> + int mvpIdx = m_me.getBestmvpIdx(pu, tmpPredYuv, *m_slice->m_refPicList[list][ref]->m_reconPic, amvpCand, merange, m_bFrameParallel);
> + mvp = amvpCand[mvpIdx];
> +
> setSearchRange(interMode.cu, mvp, merange, mvmin, mvmax);
> -
> int satdCost = m_me.motionEstimate(&m_slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv);
>
> /* Get total cost of partition, but only include MV bit cost once */
> @@ -1962,7 +1941,7 @@
> uint32_t cost = (satdCost - m_me.mvcost(outmv)) + m_rdCost.getCost(bits);
>
> /* Refine MVP selection, updates: mvp, mvpIdx, bits, cost */
> - checkBestMVP(interMode.amvpCand[list][ref], outmv, mvp, mvpIdx, bits, cost);
> + checkBestMVP(amvpCand, outmv, mvp, mvpIdx, bits, cost);
>
> /* tie goes to the smallest ref ID, just like --no-pme */
> ScopedLock _lock(master.m_meLock);
> @@ -2051,46 +2030,23 @@
> bits += getTUBits(ref, numRefIdx[list]);
>
> int numMvc = cu.getPMV(interMode.interNeighbours, list, ref, interMode.amvpCand[list][ref], mvc);
> -
> - // Pick the best possible MVP from AMVP candidates based on least residual
> - int mvpIdx = 0;
> int merange = m_param->searchRange;
> -
> - if (interMode.amvpCand[list][ref][0] != interMode.amvpCand[list][ref][1])
> - {
> - uint32_t bestCost = MAX_INT;
> - for (int i = 0; i < AMVP_NUM_CANDS; i++)
> - {
> - MV mvCand = interMode.amvpCand[list][ref][i];
> -
> - // NOTE: skip mvCand if Y is > merange and -FN>1
> - if (m_bFrameParallel && (mvCand.y >= (merange + 1) * 4))
> - continue;
> -
> - cu.clipMv(mvCand);
> - predInterLumaPixel(pu, tmpPredYuv, *slice->m_refPicList[list][ref]->m_reconPic, mvCand);
> - uint32_t cost = m_me.bufSAD(tmpPredYuv.getLumaAddr(pu.puAbsPartIdx), tmpPredYuv.m_size);
> -
> - if (bestCost > cost)
> - {
> - bestCost = cost;
> - mvpIdx = i;
> - }
> - }
> - }
> -
> - MV mvmin, mvmax, outmv, mvp = interMode.amvpCand[list][ref][mvpIdx];
> -
> - int satdCost;
> + MV mvmin, mvmax, outmv, mvp;
> + MV *amvpCand = interMode.amvpCand[list][ref];
> +
> + // Pick the best possible MVP IDX from AMVP candidates based on least residual
> + int mvpIdx = m_me.getBestmvpIdx(pu, tmpPredYuv, *slice->m_refPicList[list][ref]->m_reconPic, amvpCand, merange, m_bFrameParallel);
I made an attempt at doing this using the existing methods in motion.cpp
but there is this chicken and egg issue with MV clipping and the search
range mvmin/mvmax generation that make it impossible to do without
making motion.cpp aware of a whole pile of other classes, and I'm not
sure it is worth the trouble.
The idea was to merge MVP selection with the logic that picks between
the MVP and 0 as the search origin. It doesn't look possible to do this
in motion.cpp but perhaps it can be done as a helper function in
search.cpp. I'll try this approach and see if it goes anywhere
> + mvp = amvpCand[mvpIdx];
> +
> setSearchRange(cu, mvp, merange, mvmin, mvmax);
> - satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv);
> + int satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv);
>
> /* Get total cost of partition, but only include MV bit cost once */
> bits += m_me.bitcost(outmv);
> uint32_t cost = (satdCost - m_me.mvcost(outmv)) + m_rdCost.getCost(bits);
>
> /* Refine MVP selection, updates: mvp, mvpIdx, bits, cost */
> - checkBestMVP(interMode.amvpCand[list][ref], outmv, mvp, mvpIdx, bits, cost);
> + checkBestMVP(amvpCand, outmv, mvp, mvpIdx, bits, cost);
>
> if (cost < bestME[list].cost)
> {
> @@ -2134,35 +2090,13 @@
> bits += getTUBits(ref, numRefIdx[list]);
>
> int numMvc = cu.getPMV(interMode.interNeighbours, list, ref, interMode.amvpCand[list][ref], mvc);
> -
> - // Pick the best possible MVP from AMVP candidates based on least residual
> - int mvpIdx = 0;
> int merange = m_param->searchRange;
> -
> - if (interMode.amvpCand[list][ref][0] != interMode.amvpCand[list][ref][1])
> - {
> - uint32_t bestCost = MAX_INT;
> - for (int i = 0; i < AMVP_NUM_CANDS; i++)
> - {
> - MV mvCand = interMode.amvpCand[list][ref][i];
> -
> - // NOTE: skip mvCand if Y is > merange and -FN>1
> - if (m_bFrameParallel && (mvCand.y >= (merange + 1) * 4))
> - continue;
> -
> - cu.clipMv(mvCand);
> - predInterLumaPixel(pu, tmpPredYuv, *slice->m_refPicList[list][ref]->m_reconPic, mvCand);
> - uint32_t cost = m_me.bufSAD(tmpPredYuv.getLumaAddr(pu.puAbsPartIdx), tmpPredYuv.m_size);
> -
> - if (bestCost > cost)
> - {
> - bestCost = cost;
> - mvpIdx = i;
> - }
> - }
> - }
> -
> - MV mvmin, mvmax, outmv, mvp = interMode.amvpCand[list][ref][mvpIdx];
> + MV mvmin, mvmax, outmv, mvp;
> + MV *amvpCand = interMode.amvpCand[list][ref];
> +
> + // Pick the best possible MVP IDX from AMVP candidates based on least residual
> + int mvpIdx = m_me.getBestmvpIdx(pu, tmpPredYuv, *slice->m_refPicList[list][ref]->m_reconPic, amvpCand, merange, m_bFrameParallel);
> + mvp = amvpCand[mvpIdx];
>
> setSearchRange(cu, mvp, merange, mvmin, mvmax);
> int satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv);
> @@ -2172,7 +2106,7 @@
> uint32_t cost = (satdCost - m_me.mvcost(outmv)) + m_rdCost.getCost(bits);
>
> /* Refine MVP selection, updates: mvp, mvpIdx, bits, cost */
> - checkBestMVP(interMode.amvpCand[list][ref], outmv, mvp, mvpIdx, bits, cost);
> + checkBestMVP(amvpCand, outmv, mvp, mvpIdx, bits, cost);
>
> if (cost < bestME[list].cost)
> {
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list