[x265] [PATCH] analysis: add an additional round of sub-pel refinement for inter 2Nx2N in rd 5 and 6
Steve Borho
steve at borho.org
Thu May 21 17:25:32 CEST 2015
On 05/21, Steve Borho wrote:
> On 05/21, santhoshini at multicorewareinc.com wrote:
> > # HG changeset patch
> > # User Santhoshini Sekar<santhoshini at multicorewareinc.com>
> > # Date 1432182660 -19800
> > # Thu May 21 10:01:00 2015 +0530
> > # Node ID 630b378b744f4bf442839680f5120d7d299d2acd
> > # Parent dc4fcfc574ade14ecc841797ad08be9753fad58e
> > analysis: add an additional round of sub-pel refinement for inter 2Nx2N in rd 5 and 6
> >
> > diff -r dc4fcfc574ad -r 630b378b744f source/encoder/analysis.cpp
> > --- a/source/encoder/analysis.cpp Wed May 20 12:17:44 2015 -0500
> > +++ b/source/encoder/analysis.cpp Thu May 21 10:01:00 2015 +0530
> > @@ -742,6 +742,24 @@
> > cuStat.avgCost[depth] = (temp + md.bestMode->rdCost) / cuStat.count[depth];
> > }
> >
> > + /* If zero-residual, do not bother doing subpelRefine */
> > + bool subpelRefine = !!(md.bestMode->cu.m_predMode[0] & MODE_INTER) && !(md.bestMode->cu.m_mergeFlag[0]) && (md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N) && (md.bestMode->cu.m_cuDepth[0] == depth);
>
> these long expressions should be broken into two lines
> 'subpelRefine' is not a very descriptive term, this is rd-refine since
> it is refining based on RD cost
>
> > + if (subpelRefine && m_param->rdLevel > 4)
> > + {
> > + int hpelDirs = MotionEstimate::hpelDirCount(m_param->subpelRefine);
> > + if (m_slice->m_pps->bUseDQP && depth <= m_slice->m_pps->maxCuDQPDepth)
> > + setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, cuGeom));
> > + uint64_t bcost = md.bestMode->rdCost;
> > + int bdir = 0;
> > + for (int i = 1; i <= hpelDirs; i++)
> > + {
> > + qPelRefine(*md.bestMode, cuGeom, true, i);
> > + encodeResAndCalcRdInterCU(*md.bestMode, cuGeom);
> > + COPY2_IF_LT(bcost, md.bestMode->rdCost, bdir, i);
> > + }
> > + qPelRefine(*md.bestMode, cuGeom, true, bdir);
> > + encodeResAndCalcRdInterCU(*md.bestMode, cuGeom);
> > + }
> > /* Copy best data to encData CTU and recon */
> > md.bestMode->cu.copyToPic(depth);
> > if (md.bestMode != &md.pred[PRED_SPLIT])
> > @@ -1312,6 +1330,24 @@
> > checkBestMode(*splitPred, depth);
> > }
> >
> > + /* If zero-residual, do not bother doing subpelRefine */
> > + bool subpelRefine = !!(md.bestMode->cu.m_predMode[0] & MODE_INTER) && !(md.bestMode->cu.m_mergeFlag[0]) && (md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N) && (md.bestMode->cu.m_cuDepth[0] == depth);
> > + if (subpelRefine)
> > + {
> > + int hpelDirs = MotionEstimate::hpelDirCount(m_param->subpelRefine);
> > + uint64_t bcost = md.bestMode->rdCost;
> > + int bdir = 0;
> > + if (m_slice->m_pps->bUseDQP && depth <= m_slice->m_pps->maxCuDQPDepth)
> > + setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, cuGeom));
> > + for (int i = 1; i <= hpelDirs; i++)
> > + {
> > + qPelRefine(*md.bestMode, cuGeom, true, i);
> > + encodeResAndCalcRdInterCU(*md.bestMode, cuGeom);
> > + COPY2_IF_LT(bcost, md.bestMode->rdCost, bdir, i);
> > + }
> > + qPelRefine(*md.bestMode, cuGeom, true, bdir);
> > + encodeResAndCalcRdInterCU(*md.bestMode, cuGeom);
> > + }
> > /* Copy best data to encData CTU and recon */
> > md.bestMode->cu.copyToPic(depth);
> > if (md.bestMode != &md.pred[PRED_SPLIT])
> > diff -r dc4fcfc574ad -r 630b378b744f source/encoder/motion.cpp
> > --- a/source/encoder/motion.cpp Wed May 20 12:17:44 2015 -0500
> > +++ b/source/encoder/motion.cpp Thu May 21 10:01:00 2015 +0530
> > @@ -155,6 +155,11 @@
> > workload[subme].qpel_iters / 2;
> > }
> >
> > +int MotionEstimate::hpelDirCount(int subme)
> > +{
> > + return workload[subme].hpel_dirs;
> > +}
> > +
> > MotionEstimate::~MotionEstimate()
> > {
> > fencPUYuv.destroy();
> > @@ -1205,6 +1210,49 @@
> > return bcost;
> > }
> >
> > +int MotionEstimate::qPelCompare(ReferencePlanes *ref,
> > + const MV & mvmin,
> > + const MV & mvmax,
> > + const MV& mvp,
> > + const MV& mv,
> > + MV & outQMv,
> > + int halfPelIdx)
> > +{
> > + setMVP(mvp);
>
> no idea why this function is necessary
>
> > + MV qmvmin = mvmin.toQPel();
> > + MV qmvmax = mvmax.toQPel();
> > +
> > + MV fmv = mv.roundToFPel();
> > + fmv = fmv.clipped(qmvmin, qmvmax);
> > + int bcost = INT_MAX;
> > + const SubpelWorkload& wl = workload[this->subpelRefine];
> > +
> > + MV hmv = fmv + square1[halfPelIdx] * 2;
> > + bcost = subpelCompare(ref, hmv, satd) + mvcost(hmv);
> > + MV bmv = hmv;
> > +
> > + for (int iter = 0; iter < wl.qpel_iters; iter++)
> > + {
> > + int bdir = 0;
> > + for (int i = 1; i <= wl.qpel_dirs; i++)
> > + {
> > + MV qmv = hmv + square1[i];
> > + int cost = subpelCompare(ref, qmv, satd) + mvcost(qmv);
> > + COPY2_IF_LT(bcost, cost, bdir, i);
> > + }
> > +
> > + if (bdir)
> > + bmv += square1[bdir];
> > + else
> > + break;
> > + }
> > +
> > + x265_emms();
> > + outQMv = bmv;
> > + return bcost;
> > +}
> > +
> > int MotionEstimate::subpelCompare(ReferencePlanes *ref, const MV& qmv, pixelcmp_t cmp)
> > {
> > intptr_t refStride = ref->lumaStride;
> > diff -r dc4fcfc574ad -r 630b378b744f source/encoder/motion.h
> > --- a/source/encoder/motion.h Wed May 20 12:17:44 2015 -0500
> > +++ b/source/encoder/motion.h Thu May 21 10:01:00 2015 +0530
> > @@ -69,6 +69,7 @@
> >
> > static void initScales();
> > static int hpelIterationCount(int subme);
> > + static int hpelDirCount(int subme);
> > void init(int method, int refine, int csp);
> >
> > /* Methods called at slice setup */
> > @@ -90,6 +91,7 @@
> > }
> >
> > int motionEstimate(ReferencePlanes* ref, const MV & mvmin, const MV & mvmax, const MV & qmvp, int numCandidates, const MV * mvc, int merange, MV & outQMv);
> > + int qPelCompare(ReferencePlanes* ref, const MV & mvmin, const MV & mvmax, const MV & mvp, const MV & mv, MV & outQMv, int halfPelIdx);
> >
> > int subpelCompare(ReferencePlanes* ref, const MV &qmv, pixelcmp_t);
> >
> > diff -r dc4fcfc574ad -r 630b378b744f source/encoder/search.cpp
> > --- a/source/encoder/search.cpp Wed May 20 12:17:44 2015 -0500
> > +++ b/source/encoder/search.cpp Thu May 21 10:01:00 2015 +0530
> > @@ -2348,6 +2348,54 @@
> > interMode.sa8dBits += totalmebits;
> > }
> >
> > +void Search::qPelRefine(Mode& interMode, const CUGeom& cuGeom, bool bChromaSA8D, int halfpelIdx)
> > +{
> > + CUData& cu = interMode.cu;
> > + Yuv* predYuv = &interMode.predYuv;
> > +
> > + const Slice *slice = m_slice;
> > + uint32_t interDir = cu.m_interDir[0];
> > +
> > + const int* numRefIdx = slice->m_numRefIdx;
> > +
> > + MotionData* bestME = interMode.bestME[0];
> > + PredictionUnit pu(cu, cuGeom, 0);
> > +
> > + for (uint32_t list = 0; list < 2; list++)
> > + {
> > + if (interDir & (1 << list))
> > + {
> > + int ref = bestME[list].ref;
> > + uint32_t bits = m_listSelBits[list] + MVP_IDX_BITS;
> > + bits += getTUBits(ref, numRefIdx[list]);
> > +
> > + int merange = m_param->searchRange;
> > +
> > + MV mvmin, mvmax, outmv, mvp = interMode.bestME[0][0].mvp;
> > + MV mv = interMode.bestME[0][0].mv;
> > +
> > + int satdCost;
> > + setSearchRange(cu, mv, merange, mvmin, mvmax);
> > + satdCost = m_me.qPelCompare(&slice->m_mref[list][ref], mvmin, mvmax, mvp, mv, outmv, halfpelIdx);
> > +
> > + /* Get total cost of partition, but only include MV bit cost once */
> > + bits += m_me.bitcost(outmv);
> > + uint32_t cost = (satdCost - m_me.mvcost(outmv)) + m_rdCost.getCost(bits);
> > +
> > + if (cost < bestME[list].cost)
> > + {
> > + bestME[list].mv = outmv;
> > + bestME[list].cost = cost;
> > + bestME[list].bits = bits;
> > + }
> > + }
> > + }
> > + motionCompensation(cu, pu, *predYuv, true, bChromaSA8D);
>
> 1. bidir refinement should a different feature, it needs to get built
> into predInterSearch() itself. rd-refine should only be used to
> refine a unidir prediction.
>
> 2. Doing all this work to update bestME[] satd and bits costs is a waste
> of time.
>
> 3. you're not writing the new MV into the CU, so motionCompensation
> below is just re-generating the original prediction for this CU,
> ignoring bestME
>
> 4. the whole point of rd-refine is to measure the full RD cost at each
> offset. using SATD to pick a direction in qPelCompare is defeating
> the point. All this function should be doing for hpel refine is:
>
> cu.m_mv[0][0] = interMode.bestME[0][0].mv + square1[halfPelIdx] * 2;
> motionCompensation(cu, pu, *predYuv, true, true);
>
> and qpel refine:
>
> cu.m_mv[0][0] = interMode.bestME[0][0].mv + square1[halfPelIdx];
> motionCompensation(cu, pu, *predYuv, true, true);
>
> it probably doesn't need to be a function at all
>
>
> At the end, the final refined MV must fully configured in the CU with:
>
> cu.setPUMv(0, cu.m_mv[0][0], 0, 0);
> cu.m_mvd[0][0] = cu.m_mvd[0][0].mvp - cu.m_mv[0][0];
>
> And, thinking out loud, m_mvd probably needs to be removed from CUData
> since it can always be calculated by subtracting the MV from its MVP.
> Cacheing it has no useful value, except to trip us up when we forget to
> update it (we are not even setting it on bidir CUs now)
ignore this bit about m_mvd, I forgot that we're storing mvpIdx but not
the mvp itself, so we have to keep mvd.
the rest still stands, except the example code needs to be:
int list = cu.m_interDir[0];
if (list == 3) /* do not refine */
/* try new hpel offset */
cu.m_mv[list][0] = interMode.bestME[list][0].mv + square1[halfPelIdx] * 2;
motionCompensation(cu, pu, *predYuv, true, true);
/* save final MV */
cu.setPUMv(list, cu.m_mv[list][0], 0, 0);
cu.m_mvd[list][0] = bestME[0][list].mvp - cu.m_mv[0][0];
--
Steve Borho
More information about the x265-devel
mailing list