[x265] [PATCH] analysis: add an additional round of sub-pel refinement for inter 2Nx2N in rd 5 and 6

Thu May 21 05:29:27 CEST 2015

On Wed, May 20, 2015 at 9:21 PM, Steve Borho <steve at borho.org> wrote:

> On 05/20, Santhoshini Sekar wrote:
> > On Tue, May 19, 2015 at 9:35 PM, Steve Borho <steve at borho.org> wrote:
> >
> > > On 05/19, santhoshini at multicorewareinc.com wrote:
> > > > # HG changeset patch
> > > > # User Santhoshini Sekar<santhoshini at multicorewareinc.com>
> > > > # Date 1432028003 -19800
> > > > #      Tue May 19 15:03:23 2015 +0530
> > > > # Node ID 904ac8808858baaeaaa333b5a105af50c1107db0
> > > > # Parent  d7b100e51e828833eee006f1da93e499ac161d28
> > > > analysis: add an additional round of sub-pel refinement for inter
> 2Nx2N
> > > in rd 5 and 6
> > > >
> > > > diff -r d7b100e51e82 -r 904ac8808858 source/common/cudata.cpp
> > > > --- a/source/common/cudata.cpp        Mon May 18 18:24:08 2015 -0500
> > > > +++ b/source/common/cudata.cpp        Tue May 19 15:03:23 2015 +0530
> > > > @@ -456,6 +456,41 @@
> > > >      memcpy(ctu.m_trCoeff[2] + tmpC2, m_trCoeff[2], sizeof(coeff_t) *
> > > tmpC);
> > > >  }
> > > >
> > > > +void CUData::copyToCU(CUData& ctu) const
> > > > +{
> > > > +    m_partCopy((uint8_t*)ctu.m_qp, (uint8_t*)m_qp);
> > > > +    m_partCopy(ctu.m_log2CUSize, m_log2CUSize);
> > > > +    m_partCopy(ctu.m_lumaIntraDir, m_lumaIntraDir);
> > > > +    m_partCopy(ctu.m_tqBypass, m_tqBypass);
> > > > +    m_partCopy((uint8_t*)ctu.m_refIdx[0], (uint8_t*)m_refIdx[0]);
> > > > +    m_partCopy((uint8_t*)ctu.m_refIdx[1], (uint8_t*)m_refIdx[1]);
> > > > +    m_partCopy(ctu.m_cuDepth, m_cuDepth);
> > > > +    m_partCopy(ctu.m_predMode, m_predMode);
> > > > +    m_partCopy(ctu.m_partSize, m_partSize);
> > > > +    m_partCopy(ctu.m_mergeFlag, m_mergeFlag);
> > > > +    m_partCopy(ctu.m_interDir, m_interDir);
> > > > +    m_partCopy(ctu.m_mvpIdx[0], m_mvpIdx[0]);
> > > > +    m_partCopy(ctu.m_mvpIdx[1], m_mvpIdx[1]);
> > > > +    m_partCopy(ctu.m_tuDepth, m_tuDepth);
> > > > +    m_partCopy(ctu.m_transformSkip[0], m_transformSkip[0]);
> > > > +    m_partCopy(ctu.m_transformSkip[1], m_transformSkip[1]);
> > > > +    m_partCopy(ctu.m_transformSkip[2], m_transformSkip[2]);
> > > > +    m_partCopy(ctu.m_cbf[0], m_cbf[0]);
> > > > +    m_partCopy(ctu.m_cbf[1], m_cbf[1]);
> > > > +    m_partCopy(ctu.m_cbf[2], m_cbf[2]);
> > > > +    m_partCopy(ctu.m_chromaIntraDir, m_chromaIntraDir);
> > > > +
> > > > +    memcpy(ctu.m_mv[0],  m_mv[0],  m_numPartitions * sizeof(MV));
> > > > +    memcpy(ctu.m_mv[1],  m_mv[1],  m_numPartitions * sizeof(MV));
> > > > +    memcpy(ctu.m_mvd[0], m_mvd[0], m_numPartitions * sizeof(MV));
> > > > +    memcpy(ctu.m_mvd[1], m_mvd[1], m_numPartitions * sizeof(MV));
> > > > +
> > > > +    memcpy(ctu.m_trCoeff[0], m_trCoeff[0], sizeof(coeff_t));
> > > > +
> > > > +    memcpy(ctu.m_trCoeff[1], m_trCoeff[1], sizeof(coeff_t));
> > > > +    memcpy(ctu.m_trCoeff[2], m_trCoeff[2], sizeof(coeff_t));
> > > w/s nit, remove the blank line above
> > > > +}
> > > > +
> > > >  /* The reverse of copyToPic, called only by encodeResidue */
> > > >  void CUData::copyFromPic(const CUData& ctu, const CUGeom& cuGeom)
> > > >  {
> > > > diff -r d7b100e51e82 -r 904ac8808858 source/common/cudata.h
> > > > --- a/source/common/cudata.h  Mon May 18 18:24:08 2015 -0500
> > > > +++ b/source/common/cudata.h  Tue May 19 15:03:23 2015 +0530
> > > > @@ -188,6 +188,7 @@
> > > >      void     copyPartFrom(const CUData& cu, const CUGeom& childGeom,
> > > uint32_t subPartIdx);
> > > >      void     setEmptyPart(const CUGeom& childGeom, uint32_t
> subPartIdx);
> > > >      void     copyToPic(uint32_t depth) const;
> > > > +    void     copyToCU(CUData& ctu) const;
> > > >
> > > >      /* RD-0 methods called only from encodeResidue */
> > > >      void     copyFromPic(const CUData& ctu, const CUGeom& cuGeom);
> > > > diff -r d7b100e51e82 -r 904ac8808858 source/encoder/analysis.cpp
> > > > --- a/source/encoder/analysis.cpp     Mon May 18 18:24:08 2015 -0500
> > > > +++ b/source/encoder/analysis.cpp     Tue May 19 15:03:23 2015 +0530
> > > > @@ -739,7 +739,31 @@
> > > >          cuStat.count[depth] += 1;
> > > >          cuStat.avgCost[depth] = (temp + md.bestMode->rdCost) /
> > > cuStat.count[depth];
> > > >      }
> > > > +    /* If zero-residual, do not bother doing subpelRefine */
> > > > +    bool subpelRefine = !!(md.bestMode->cu.m_predMode[0] &
> MODE_INTER)
> > > && !(md.bestMode->cu.m_mergeFlag[0]) && (md.bestMode->cu.m_partSize[0]
> ==
> > > SIZE_2Nx2N) && (md.bestMode->cu.m_cuDepth[0] == depth);
> > > > +    if (subpelRefine && m_param->rdLevel > 4)
> > > > +    {
> > > > +        int hpelDirs =
> > > MotionEstimate::hpelDirCount(m_param->subpelRefine);
> > > >
> > > > +        Mode* rdRefine = &md.pred[PRED_RD_REFINE];
> > > > +        rdRefine->initCosts();
> > > > +        rdRefine->cu.initSubCU(parentCTU, cuGeom, qp);
> > > > +        memcpy(rdRefine->bestME[0], md.bestMode->bestME[0],
> > > sizeof(MotionData));
> > > > +        if (m_slice->m_sliceType == B_SLICE)
> > > > +            memcpy(&rdRefine->bestME[0][1],
> &md.bestMode->bestME[0][1],
> > > sizeof(MotionData));
> > >
> > > using copyToCU() here means cu.initSubCU() was probably a waste of time
> > >
> > > > +        md.bestMode->cu.copyToCU(rdRefine->cu);
> > > > +        rdRefine->reconYuv.copyFromYuv(md.bestMode->reconYuv);
> > > > +        rdRefine->predYuv.copyFromYuv(md.bestMode->predYuv);
> > > > +
> > > > +        for (int i = 1; i <= hpelDirs; i++)
> > > > +        {
> > > > +            qPelRefine(*rdRefine, cuGeom, true, i);
> > > > +            if (m_slice->m_pps->bUseDQP && depth <=
> > > m_slice->m_pps->maxCuDQPDepth)
> > > > +                setLambdaFromQP(parentCTU,
> > > calculateQpforCuSize(parentCTU, cuGeom));
> > >
> > > why is this necessary here? calculateQpforCuSize(parentCTU, cuGeom))
> > > better return the same 'qp' value in bestMode->cu else you are in a lot
> > > of trouble. Why do this inside the loop?
> > >
> >
> > We need this step here because md.bestMode->cu.m_qp[0] will be changed if
> > there was no residual and it will be set with RefQP inside checkDQP().
> > But we want to pass the original QP. Yes this needn't be inside loop, can
> > be done just once.
> >
> > > > +            encodeResAndCalcRdInterCU(*rdRefine, cuGeom);
> > > > +            checkBestMode(*rdRefine, depth);
> > > > +        }
> > >
> > > I fail to see how this works. If hpelDirs is 8, and at i=1 the RD cost
> > > is better than bestMode, then checkBestMode will point to rdRefine. So
> > > far so good.. but then at i=8 it could have worse cost and you're stuck
> > > because checkBestMode() is not going to go back to the original mode,
> > > there is only one bestMode pointer.
> > >
> > > Why do we want to go back to the original best mode? Can't we use the
> best
> > mode chosen after each subpel refinement?
>
> No, it won't.
>
> > > You can't avoid doing an extra encodeResAndCalcRdInterCU() at the end,
> > > so you might as well forget about the extra PRED_RD_REFINE.
> > >
> > By using PRED_RD_REFINE we'll have all temporary data in it and I need
> not
> > do the final extra encode as the pointer md.bestMode will always have the
> > best data.
>
> No, it doesn't, you are still confused about what checkBestMode() does.
>
> With your algorithm bestMode starts out as a pointer to
> md.pred[PRED_2Nx2N] and by the end bestMost points to
> md.pred[PRED_RD_REFINE] if *any* of the subpel refinement steps had less
> RD cost then PRED_2Nx2N.  But the contents of md.pred[PRED_RD_REFINE] at
> the end of that for() loop over the refine offsets will *always* be the
> output of the last HPEL refinement attempt.
>
> encodeResAndCalcRdInterCU(*rdRefine, cuGeom) is overwriting the
> coefficients and RD data in md.pred[PRED_RD_REFINE], and the bestMode
> pointer is not tracking which invocation of encodeResAndCalcRdInterCU().
>
> If bestMode already points to md.pred[PRED_RD_REFINE], then calling
> checkBestMode(*rdRefine, depth) has no effect.
>

Yes, I understand what is happening here. I'll correct this and resend the
patch.

>
> You have to use something like this:
>
> > >     int bcost = bestMode->rdCost;
> > >     itn bdir = 0;
> > >
> > >     for (int i = 1; i <= hpelDirs; i++)
> > >     {
> > >        qPelRefine(*bestMode, cuGeom, true, i);
> > >        encodeResAndCalcRdInterCU(*bestMode, cuGeom);
> > >        COPY2_IF_LT(bcost, bestMode->rdCost, bdir, i);
> > >     }
> > >
> > >     qPelRefine(*bestMode, cuGeom, true, bdir);
> > >     encodeResAndCalcRdInterCU(*bestMode, cuGeom);
>
> --
> Steve Borho
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150521/ceba9c91/attachment.html>