[x265] [PATCH] analysis: re-order RD 0/4 analysis to do splits before ME or intra

Wed May 20 02:08:39 CEST 2015

On 05/19, Steve Borho wrote:
> On 05/19, ashok at multicorewareinc.com wrote:
> > # HG changeset patch
> > # User Ashok Kumar Mishra<ashok at multicorewareinc.com>
> > # Date 1431933378 -19800
> > #      Mon May 18 12:46:18 2015 +0530
> > # Node ID 1e2e70f90e4484b32217c7579bca98180929cf72
> > # Parent  d7b100e51e828833eee006f1da93e499ac161d28
> > analysis: re-order RD 0/4 analysis to do splits before ME or intra
> > 
> > diff -r d7b100e51e82 -r 1e2e70f90e44 source/encoder/analysis.cpp
> > --- a/source/encoder/analysis.cpp	Mon May 18 18:24:08 2015 -0500
> > +++ b/source/encoder/analysis.cpp	Mon May 18 12:46:18 2015 +0530
> > @@ -756,19 +756,79 @@
> >      bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
> >      bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
> >      uint32_t minDepth = topSkipMinDepth(parentCTU, cuGeom);
> > -
> > +    bool earlyskip = false;
> >      if (mightNotSplit && depth >= minDepth)
> >      {
> > -        bool bTryIntra = m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames;
> > -
> >          /* Compute Merge Cost */
> >          md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
> >          md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
> >          checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
> > -
> > -        bool earlyskip = false;
> >          if (m_param->rdLevel)
> >              earlyskip = m_param->bEnableEarlySkip && md.bestMode && md.bestMode->cu.isSkipped(0); // TODO: sa8d threshold per depth
> > +    }
> > +
> > +    bool bNoSplit = false;
> > +    if (md.bestMode)
> > +    {
> > +        bNoSplit = md.bestMode->cu.isSkipped(0);
> > +        if (mightSplit && depth && depth >= minDepth && !bNoSplit)
> > +            bNoSplit = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode);
> > +    }
> > +
> > +    if (mightSplit && !bNoSplit)
> > +    {
> > +        Mode* splitPred = &md.pred[PRED_SPLIT];
> > +        splitPred->initCosts();
> > +        CUData* splitCU = &splitPred->cu;
> > +        splitCU->initSubCU(parentCTU, cuGeom, qp);
> > +
> > +        uint32_t nextDepth = depth + 1;
> > +        ModeDepth& nd = m_modeDepth[nextDepth];
> > +        invalidateContexts(nextDepth);
> > +        Entropy* nextContext = &m_rqt[depth].cur;
> > +        int nextQP = qp;
> > +
> > +        for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
> > +        {
> > +            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
> > +            if (childGeom.flags & CUGeom::PRESENT)
> > +            {
> > +                m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.absPartIdx);
> > +                m_rqt[nextDepth].cur.load(*nextContext);
> > +
> > +                if (m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth)
> > +                    nextQP = setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, childGeom));
> > +
> > +                compressInterCU_rd0_4(parentCTU, childGeom, nextQP);
> > +
> > +                // Save best CU and pred data for this sub CU
> > +                splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
> > +                splitPred->addSubCosts(*nd.bestMode);
> > +
> > +                if (m_param->rdLevel)
> > +                    nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv, childGeom.numPartitions * subPartIdx);
> > +                else
> > +                    nd.bestMode->predYuv.copyToPartYuv(splitPred->predYuv, childGeom.numPartitions * subPartIdx);
> > +                if (m_param->rdLevel > 1)
> > +                    nextContext = &nd.bestMode->contexts;
> > +            }
> > +            else
> > +                splitCU->setEmptyPart(childGeom, subPartIdx);
> > +        }
> > +        nextContext->store(splitPred->contexts);
> > +
> > +        if (mightNotSplit)
> > +            addSplitFlagCost(*splitPred, cuGeom.depth);
> > +        else if (m_param->rdLevel > 1)
> > +            updateModeCost(*splitPred);
> > +        else
> > +            splitPred->sa8dCost = m_rdCost.calcRdSADCost(splitPred->distortion, splitPred->sa8dBits);
> > +    }
> > +
> > +    if (mightNotSplit && depth >= minDepth)
> > +    {
> > +        if (m_slice->m_pps->bUseDQP && depth <= m_slice->m_pps->maxCuDQPDepth && m_slice->m_pps->maxCuDQPDepth != 0)
> > +            setLambdaFromQP(parentCTU, qp);
> 
> This could likely be optimized as:
> 
>   if (m_rdCost->m_qp != qp)
>     setLambdaFromQP(parentCTU, qp);

actually, it would be better to call setLambdaFromQP() unconditionally
here, then have it early-out if the passed in QP matches m_qp (and init
m_qp to an invalid QP in the rdcost constructor). This way we also save
work when AQ did not specify a change in QP.

> >          if (!earlyskip)
> >          {
> > @@ -834,7 +894,7 @@
> >                          bestInter = &md.pred[PRED_nRx2N];
> >                  }
> >              }
> > -
> > +            bool bTryIntra = m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames;
> >              if (m_param->rdLevel >= 3)
> >              {
> >                  /* Calculate RD cost of best inter option */
> > @@ -950,63 +1010,19 @@
> >              addSplitFlagCost(*md.bestMode, cuGeom.depth);
> >      }
> >  
> > -    bool bNoSplit = false;
> > -    if (md.bestMode)
> > +    if (mightNotSplit && md.bestMode)
> >      {
> > -        bNoSplit = md.bestMode->cu.isSkipped(0);
> > -        if (mightSplit && depth && depth >= minDepth && !bNoSplit)
> > -            bNoSplit = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode);
> > +        /* early-out statistics */
> > +        FrameData& curEncData = *m_frame->m_encData;
> > +        FrameData::RCStatCU& cuStat = curEncData.m_cuStat[parentCTU.m_cuAddr];
> > +        uint64_t temp = cuStat.avgCost[depth] * cuStat.count[depth];
> > +        cuStat.count[depth] += 1;
> > +        cuStat.avgCost[depth] = (temp + md.bestMode->rdCost) / cuStat.count[depth];
> >      }
> >  
> >      if (mightSplit && !bNoSplit)
> >      {
> >          Mode* splitPred = &md.pred[PRED_SPLIT];
> > -        splitPred->initCosts();
> > -        CUData* splitCU = &splitPred->cu;
> > -        splitCU->initSubCU(parentCTU, cuGeom, qp);
> > -
> > -        uint32_t nextDepth = depth + 1;
> > -        ModeDepth& nd = m_modeDepth[nextDepth];
> > -        invalidateContexts(nextDepth);
> > -        Entropy* nextContext = &m_rqt[depth].cur;
> > -        int nextQP = qp;
> > -
> > -        for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
> > -        {
> > -            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
> > -            if (childGeom.flags & CUGeom::PRESENT)
> > -            {
> > -                m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.absPartIdx);
> > -                m_rqt[nextDepth].cur.load(*nextContext);
> > -
> > -                if (m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth)
> > -                    nextQP = setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, childGeom));
> > -
> > -                compressInterCU_rd0_4(parentCTU, childGeom, nextQP);
> > -
> > -                // Save best CU and pred data for this sub CU
> > -                splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
> > -                splitPred->addSubCosts(*nd.bestMode);
> > -
> > -                if (m_param->rdLevel)
> > -                    nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv, childGeom.numPartitions * subPartIdx);
> > -                else
> > -                    nd.bestMode->predYuv.copyToPartYuv(splitPred->predYuv, childGeom.numPartitions * subPartIdx);
> > -                if (m_param->rdLevel > 1)
> > -                    nextContext = &nd.bestMode->contexts;
> > -            }
> > -            else
> > -                splitCU->setEmptyPart(childGeom, subPartIdx);
> > -        }
> > -        nextContext->store(splitPred->contexts);
> > -
> > -        if (mightNotSplit)
> > -            addSplitFlagCost(*splitPred, cuGeom.depth);
> > -        else if (m_param->rdLevel > 1)
> > -            updateModeCost(*splitPred);
> > -        else
> > -            splitPred->sa8dCost = m_rdCost.calcRdSADCost(splitPred->distortion, splitPred->sa8dBits);
> > -
> >          if (!md.bestMode)
> >              md.bestMode = splitPred;
> >          else if (m_param->rdLevel > 1)
> > @@ -1016,21 +1032,11 @@
> >  
> >          checkDQPForSplitPred(*md.bestMode, cuGeom);
> >      }
> > -    if (mightNotSplit)
> > -    {
> > -        /* early-out statistics */
> > -        FrameData& curEncData = *m_frame->m_encData;
> > -        FrameData::RCStatCU& cuStat = curEncData.m_cuStat[parentCTU.m_cuAddr];
> > -        uint64_t temp = cuStat.avgCost[depth] * cuStat.count[depth];
> > -        cuStat.count[depth] += 1;
> > -        cuStat.avgCost[depth] = (temp + md.bestMode->rdCost) / cuStat.count[depth];
> > -    }
> >  
> >      /* Copy best data to encData CTU and recon */
> >      X265_CHECK(md.bestMode->ok(), "best mode is not ok");
> >      md.bestMode->cu.copyToPic(depth);
> > -    if (md.bestMode != &md.pred[PRED_SPLIT] && m_param->rdLevel)
> > -        md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr, cuGeom.absPartIdx);
> > +    md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr, cuGeom.absPartIdx);
> >  }
> >  
> >  void Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder, int32_t qp)
> > _______________________________________________
> > x265-devel mailing list
> > x265-devel at videolan.org
> > https://mailman.videolan.org/listinfo/x265-devel
> 
> -- 
> Steve Borho

-- 
Steve Borho