<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Wed, May 20, 2015 at 9:21 PM, Steve Borho <span dir="ltr"><<a href="mailto:steve@borho.org" target="_blank">steve@borho.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="HOEnZb"><div class="h5">On 05/20, Santhoshini Sekar wrote:<br>

> On Tue, May 19, 2015 at 9:35 PM, Steve Borho <<a href="mailto:steve@borho.org">steve@borho.org</a>> wrote:<br>

><br>

> > On 05/19, <a href="mailto:santhoshini@multicorewareinc.com">santhoshini@multicorewareinc.com</a> wrote:<br>

> > > # HG changeset patch<br>

> > > # User Santhoshini Sekar<<a href="mailto:santhoshini@multicorewareinc.com">santhoshini@multicorewareinc.com</a>><br>

> > > # Date 1432028003 -19800<br>

> > > #      Tue May 19 15:03:23 2015 +0530<br>

> > > # Node ID 904ac8808858baaeaaa333b5a105af50c1107db0<br>

> > > # Parent  d7b100e51e828833eee006f1da93e499ac161d28<br>

> > > analysis: add an additional round of sub-pel refinement for inter 2Nx2N<br>

> > in rd 5 and 6<br>

> > ><br>

> > > diff -r d7b100e51e82 -r 904ac8808858 source/common/cudata.cpp<br>

> > > --- a/source/common/cudata.cpp        Mon May 18 18:24:08 2015 -0500<br>

> > > +++ b/source/common/cudata.cpp        Tue May 19 15:03:23 2015 +0530<br>

> > > @@ -456,6 +456,41 @@<br>

> > >      memcpy(ctu.m_trCoeff[2] + tmpC2, m_trCoeff[2], sizeof(coeff_t) *<br>

> > tmpC);<br>

> > >  }<br>

> > ><br>

> > > +void CUData::copyToCU(CUData& ctu) const<br>

> > > +{<br>

> > > +    m_partCopy((uint8_t*)ctu.m_qp, (uint8_t*)m_qp);<br>

> > > +    m_partCopy(ctu.m_log2CUSize, m_log2CUSize);<br>

> > > +    m_partCopy(ctu.m_lumaIntraDir, m_lumaIntraDir);<br>

> > > +    m_partCopy(ctu.m_tqBypass, m_tqBypass);<br>

> > > +    m_partCopy((uint8_t*)ctu.m_refIdx[0], (uint8_t*)m_refIdx[0]);<br>

> > > +    m_partCopy((uint8_t*)ctu.m_refIdx[1], (uint8_t*)m_refIdx[1]);<br>

> > > +    m_partCopy(ctu.m_cuDepth, m_cuDepth);<br>

> > > +    m_partCopy(ctu.m_predMode, m_predMode);<br>

> > > +    m_partCopy(ctu.m_partSize, m_partSize);<br>

> > > +    m_partCopy(ctu.m_mergeFlag, m_mergeFlag);<br>

> > > +    m_partCopy(ctu.m_interDir, m_interDir);<br>

> > > +    m_partCopy(ctu.m_mvpIdx[0], m_mvpIdx[0]);<br>

> > > +    m_partCopy(ctu.m_mvpIdx[1], m_mvpIdx[1]);<br>

> > > +    m_partCopy(ctu.m_tuDepth, m_tuDepth);<br>

> > > +    m_partCopy(ctu.m_transformSkip[0], m_transformSkip[0]);<br>

> > > +    m_partCopy(ctu.m_transformSkip[1], m_transformSkip[1]);<br>

> > > +    m_partCopy(ctu.m_transformSkip[2], m_transformSkip[2]);<br>

> > > +    m_partCopy(ctu.m_cbf[0], m_cbf[0]);<br>

> > > +    m_partCopy(ctu.m_cbf[1], m_cbf[1]);<br>

> > > +    m_partCopy(ctu.m_cbf[2], m_cbf[2]);<br>

> > > +    m_partCopy(ctu.m_chromaIntraDir, m_chromaIntraDir);<br>

> > > +<br>

> > > +    memcpy(ctu.m_mv[0],  m_mv[0],  m_numPartitions * sizeof(MV));<br>

> > > +    memcpy(ctu.m_mv[1],  m_mv[1],  m_numPartitions * sizeof(MV));<br>

> > > +    memcpy(ctu.m_mvd[0], m_mvd[0], m_numPartitions * sizeof(MV));<br>

> > > +    memcpy(ctu.m_mvd[1], m_mvd[1], m_numPartitions * sizeof(MV));<br>

> > > +<br>

> > > +    memcpy(ctu.m_trCoeff[0], m_trCoeff[0], sizeof(coeff_t));<br>

> > > +<br>

> > > +    memcpy(ctu.m_trCoeff[1], m_trCoeff[1], sizeof(coeff_t));<br>

> > > +    memcpy(ctu.m_trCoeff[2], m_trCoeff[2], sizeof(coeff_t));<br>

> > w/s nit, remove the blank line above<br>

> > > +}<br>

> > > +<br>

> > >  /* The reverse of copyToPic, called only by encodeResidue */<br>

> > >  void CUData::copyFromPic(const CUData& ctu, const CUGeom& cuGeom)<br>

> > >  {<br>

> > > diff -r d7b100e51e82 -r 904ac8808858 source/common/cudata.h<br>

> > > --- a/source/common/cudata.h  Mon May 18 18:24:08 2015 -0500<br>

> > > +++ b/source/common/cudata.h  Tue May 19 15:03:23 2015 +0530<br>

> > > @@ -188,6 +188,7 @@<br>

> > >      void     copyPartFrom(const CUData& cu, const CUGeom& childGeom,<br>

> > uint32_t subPartIdx);<br>

> > >      void     setEmptyPart(const CUGeom& childGeom, uint32_t subPartIdx);<br>

> > >      void     copyToPic(uint32_t depth) const;<br>

> > > +    void     copyToCU(CUData& ctu) const;<br>

> > ><br>

> > >      /* RD-0 methods called only from encodeResidue */<br>

> > >      void     copyFromPic(const CUData& ctu, const CUGeom& cuGeom);<br>

> > > diff -r d7b100e51e82 -r 904ac8808858 source/encoder/analysis.cpp<br>

> > > --- a/source/encoder/analysis.cpp     Mon May 18 18:24:08 2015 -0500<br>

> > > +++ b/source/encoder/analysis.cpp     Tue May 19 15:03:23 2015 +0530<br>

> > > @@ -739,7 +739,31 @@<br>

> > >          cuStat.count[depth] += 1;<br>

> > >          cuStat.avgCost[depth] = (temp + md.bestMode->rdCost) /<br>

> > cuStat.count[depth];<br>

> > >      }<br>

> > > +    /* If zero-residual, do not bother doing subpelRefine */<br>

> > > +    bool subpelRefine = !!(md.bestMode->cu.m_predMode[0] & MODE_INTER)<br>

> > && !(md.bestMode->cu.m_mergeFlag[0]) && (md.bestMode->cu.m_partSize[0] ==<br>

> > SIZE_2Nx2N) && (md.bestMode->cu.m_cuDepth[0] == depth);<br>

> > > +    if (subpelRefine && m_param->rdLevel > 4)<br>

> > > +    {<br>

> > > +        int hpelDirs =<br>

> > MotionEstimate::hpelDirCount(m_param->subpelRefine);<br>

> > ><br>

> > > +        Mode* rdRefine = &md.pred[PRED_RD_REFINE];<br>

> > > +        rdRefine->initCosts();<br>

> > > +        rdRefine->cu.initSubCU(parentCTU, cuGeom, qp);<br>

> > > +        memcpy(rdRefine->bestME[0], md.bestMode->bestME[0],<br>

> > sizeof(MotionData));<br>

> > > +        if (m_slice->m_sliceType == B_SLICE)<br>

> > > +            memcpy(&rdRefine->bestME[0][1], &md.bestMode->bestME[0][1],<br>

> > sizeof(MotionData));<br>

> ><br>

> > using copyToCU() here means cu.initSubCU() was probably a waste of time<br>

> ><br>

> > > +        md.bestMode->cu.copyToCU(rdRefine->cu);<br>

> > > +        rdRefine->reconYuv.copyFromYuv(md.bestMode->reconYuv);<br>

> > > +        rdRefine->predYuv.copyFromYuv(md.bestMode->predYuv);<br>

> > > +<br>

> > > +        for (int i = 1; i <= hpelDirs; i++)<br>

> > > +        {<br>

> > > +            qPelRefine(*rdRefine, cuGeom, true, i);<br>

> > > +            if (m_slice->m_pps->bUseDQP && depth <=<br>

> > m_slice->m_pps->maxCuDQPDepth)<br>

> > > +                setLambdaFromQP(parentCTU,<br>

> > calculateQpforCuSize(parentCTU, cuGeom));<br>

> ><br>

> > why is this necessary here? calculateQpforCuSize(parentCTU, cuGeom))<br>

> > better return the same 'qp' value in bestMode->cu else you are in a lot<br>

> > of trouble. Why do this inside the loop?<br>

> ><br>

><br>

> We need this step here because md.bestMode->cu.m_qp[0] will be changed if<br>

> there was no residual and it will be set with RefQP inside checkDQP().<br>

> But we want to pass the original QP. Yes this needn't be inside loop, can<br>

> be done just once.<br>

><br>

> > > +            encodeResAndCalcRdInterCU(*rdRefine, cuGeom);<br>

> > > +            checkBestMode(*rdRefine, depth);<br>

> > > +        }<br>

> ><br>

> > I fail to see how this works. If hpelDirs is 8, and at i=1 the RD cost<br>

> > is better than bestMode, then checkBestMode will point to rdRefine. So<br>

> > far so good.. but then at i=8 it could have worse cost and you're stuck<br>

> > because checkBestMode() is not going to go back to the original mode,<br>

> > there is only one bestMode pointer.<br>

> ><br>

> > Why do we want to go back to the original best mode? Can't we use the best<br>

> mode chosen after each subpel refinement?<br>

<br>

</div></div>No, it won't.<br>

<span class=""><br>

> > You can't avoid doing an extra encodeResAndCalcRdInterCU() at the end,<br>

> > so you might as well forget about the extra PRED_RD_REFINE.<br>

> ><br>

> By using PRED_RD_REFINE we'll have all temporary data in it and I need not<br>

> do the final extra encode as the pointer md.bestMode will always have the<br>

> best data.<br>

<br>

</span>No, it doesn't, you are still confused about what checkBestMode() does.<br>

<br>

With your algorithm bestMode starts out as a pointer to<br>

md.pred[PRED_2Nx2N] and by the end bestMost points to<br>

md.pred[PRED_RD_REFINE] if *any* of the subpel refinement steps had less<br>

RD cost then PRED_2Nx2N.  But the contents of md.pred[PRED_RD_REFINE] at<br>

the end of that for() loop over the refine offsets will *always* be the<br>

output of the last HPEL refinement attempt.<br>

<br>

encodeResAndCalcRdInterCU(*rdRefine, cuGeom) is overwriting the<br>

coefficients and RD data in md.pred[PRED_RD_REFINE], and the bestMode<br>

pointer is not tracking which invocation of encodeResAndCalcRdInterCU().<br>

<br>

If bestMode already points to md.pred[PRED_RD_REFINE], then calling<br>

checkBestMode(*rdRefine, depth) has no effect.<br></blockquote><div><br></div><div>Yes, I understand what is happening here. I'll correct this and resend the patch. </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

<br>

You have to use something like this:<br>

<span class="im HOEnZb"><br>

> >     int bcost = bestMode->rdCost;<br>

> >     itn bdir = 0;<br>

> ><br>

> >     for (int i = 1; i <= hpelDirs; i++)<br>

> >     {<br>

> >        qPelRefine(*bestMode, cuGeom, true, i);<br>

> >        encodeResAndCalcRdInterCU(*bestMode, cuGeom);<br>

> >        COPY2_IF_LT(bcost, bestMode->rdCost, bdir, i);<br>

> >     }<br>

> ><br>

> >     qPelRefine(*bestMode, cuGeom, true, bdir);<br>

> >     encodeResAndCalcRdInterCU(*bestMode, cuGeom);<br>

<br>

</span><div class="HOEnZb"><div class="h5">--<br>

Steve Borho<br>

_______________________________________________<br>

x265-devel mailing list<br>

<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>

<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>

</div></div></blockquote></div><br></div></div>