[x265] [PATCH] analysis: re-order analysis to do splits before ME or intra for pmode

Fri Aug 14 12:57:15 CEST 2015

Yes, performance has improved, will send after some time for all presets.

On Fri, Aug 14, 2015 at 4:20 PM, Steve Borho <steve at borho.org> wrote:

> On 08/14, ashok at multicorewareinc.com wrote:
> > # HG changeset patch
> > # User Ashok Kumar Mishra<ashok at multicorewareinc.com>
> > # Date 1439540228 -19800
> > #      Fri Aug 14 13:47:08 2015 +0530
> > # Node ID 9e26bef14543025908ed979b3d217417baf1ac8f
> > # Parent  d56b2466c04459205287e1581d8a36eebf372ba6
> > analysis: re-order analysis to do splits before ME or intra for pmode
>
> I'm happy to see these patches. They look good, do you have any example
> before/after performance and compression numbers?
>
> > diff -r d56b2466c044 -r 9e26bef14543 source/encoder/analysis.cpp
> > --- a/source/encoder/analysis.cpp     Wed Aug 12 18:12:20 2015 +0530
> > +++ b/source/encoder/analysis.cpp     Fri Aug 14 13:47:08 2015 +0530
> > @@ -505,16 +505,82 @@
> >
> >      X265_CHECK(m_param->rdLevel >= 2, "compressInterCU_dist does not
> support RD 0 or 1\n");
> >
> > +    PMODE pmode(*this, cuGeom);
> > +
> > +    if (mightNotSplit && depth >= minDepth)
> > +    {
> > +        /* Initialize all prediction CUs based on parentCTU */
> > +        md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
> > +        md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
> > +
> > +        if (m_param->rdLevel <= 4)
> > +            checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP],
> md.pred[PRED_MERGE], cuGeom);
> > +        else
> > +            checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP],
> md.pred[PRED_MERGE], cuGeom, false);
> > +    }
> > +
> > +    bool bNoSplit = false;
> > +    if (md.bestMode)
> > +    {
> > +        bNoSplit = md.bestMode->cu.isSkipped(0);
> > +        if (mightSplit && depth && depth >= minDepth && !bNoSplit &&
> m_param->rdLevel <= 4)
> > +            bNoSplit = recursionDepthCheck(parentCTU, cuGeom,
> *md.bestMode);
> > +    }
> > +
> > +    if (mightSplit && !bNoSplit)
> > +    {
> > +        Mode* splitPred = &md.pred[PRED_SPLIT];
> > +        splitPred->initCosts();
> > +        CUData* splitCU = &splitPred->cu;
> > +        splitCU->initSubCU(parentCTU, cuGeom, qp);
> > +
> > +        uint32_t nextDepth = depth + 1;
> > +        ModeDepth& nd = m_modeDepth[nextDepth];
> > +        invalidateContexts(nextDepth);
> > +        Entropy* nextContext = &m_rqt[depth].cur;
> > +        int nextQP = qp;
> > +
> > +        for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
> > +        {
> > +            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset +
> subPartIdx);
> > +            if (childGeom.flags & CUGeom::PRESENT)
> > +            {
> > +                m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv,
> childGeom.absPartIdx);
> > +                m_rqt[nextDepth].cur.load(*nextContext);
> > +
> > +                if (m_slice->m_pps->bUseDQP && nextDepth <=
> m_slice->m_pps->maxCuDQPDepth)
> > +                    nextQP = setLambdaFromQP(parentCTU,
> calculateQpforCuSize(parentCTU, childGeom));
> > +
> > +                compressInterCU_dist(parentCTU, childGeom, nextQP);
> > +
> > +                // Save best CU and pred data for this sub CU
> > +                splitCU->copyPartFrom(nd.bestMode->cu, childGeom,
> subPartIdx);
> > +                splitPred->addSubCosts(*nd.bestMode);
> > +
> > +
> nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv,
> childGeom.numPartitions * subPartIdx);
> > +                nextContext = &nd.bestMode->contexts;
> > +            }
> > +            else
> > +                splitCU->setEmptyPart(childGeom, subPartIdx);
> > +        }
> > +        nextContext->store(splitPred->contexts);
> > +
> > +        if (mightNotSplit)
> > +            addSplitFlagCost(*splitPred, cuGeom.depth);
> > +        else
> > +            updateModeCost(*splitPred);
> > +
> > +        checkDQPForSplitPred(*splitPred, cuGeom);
> > +    }
> > +
> >      if (mightNotSplit && depth >= minDepth)
> >      {
> >          int bTryAmp = m_slice->m_sps->maxAMPDepth > depth;
> >          int bTryIntra = m_slice->m_sliceType != B_SLICE ||
> m_param->bIntraInBFrames;
> >
> > -        PMODE pmode(*this, cuGeom);
> > +        if (m_slice->m_pps->bUseDQP && depth <=
> m_slice->m_pps->maxCuDQPDepth && m_slice->m_pps->maxCuDQPDepth != 0)
> > +            setLambdaFromQP(parentCTU, qp);
> >
> > -        /* Initialize all prediction CUs based on parentCTU */
> > -        md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
> > -        md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
> >          if (bTryIntra)
> >          {
> >              md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
> > @@ -548,8 +614,6 @@
> >
> >          if (m_param->rdLevel <= 4)
> >          {
> > -            checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP],
> md.pred[PRED_MERGE], cuGeom);
> > -
> >              {
> >                  ProfileCUScope(parentCTU, pmodeBlockTime,
> countPModeMasters);
> >                  pmode.waitForExit();
> > @@ -632,14 +696,13 @@
> >          }
> >          else
> >          {
> > -            checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP],
> md.pred[PRED_MERGE], cuGeom, false);
> >              {
> >                  ProfileCUScope(parentCTU, pmodeBlockTime,
> countPModeMasters);
> >                  pmode.waitForExit();
> >              }
> >
> >              checkBestMode(md.pred[PRED_2Nx2N], depth);
> > -            if (m_slice->m_sliceType == B_SLICE)
> > +            if (m_slice->m_sliceType == B_SLICE &&
> md.pred[PRED_BIDIR].sa8dCost < MAX_INT64)
> >                  checkBestMode(md.pred[PRED_BIDIR], depth);
> >
> >              if (m_param->bEnableRectInter)
> > @@ -664,14 +727,6 @@
> >              }
> >          }
> >
> > -        if (md.bestMode->rdCost == MAX_INT64 && !bTryIntra)
> > -        {
> > -            md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
> > -            checkIntraInInter(md.pred[PRED_INTRA], cuGeom);
> > -            encodeIntraInInter(md.pred[PRED_INTRA], cuGeom);
> > -            checkBestMode(md.pred[PRED_INTRA], depth);
> > -        }
> > -
> >          if (m_bTryLossless)
> >              tryLossless(cuGeom);
> >
> > @@ -679,60 +734,9 @@
> >              addSplitFlagCost(*md.bestMode, cuGeom.depth);
> >      }
> >
> > -    bool bNoSplit = false;
> > -    if (md.bestMode)
> > -    {
> > -        bNoSplit = md.bestMode->cu.isSkipped(0);
> > -        if (mightSplit && depth && depth >= minDepth && !bNoSplit &&
> m_param->rdLevel <= 4)
> > -            bNoSplit = recursionDepthCheck(parentCTU, cuGeom,
> *md.bestMode);
> > -    }
> > -
> > -    if (mightSplit && !bNoSplit)
> > -    {
> > -        Mode* splitPred = &md.pred[PRED_SPLIT];
> > -        splitPred->initCosts();
> > -        CUData* splitCU = &splitPred->cu;
> > -        splitCU->initSubCU(parentCTU, cuGeom, qp);
> > -
> > -        uint32_t nextDepth = depth + 1;
> > -        ModeDepth& nd = m_modeDepth[nextDepth];
> > -        invalidateContexts(nextDepth);
> > -        Entropy* nextContext = &m_rqt[depth].cur;
> > -        int nextQP = qp;
> > -
> > -        for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
> > -        {
> > -            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset +
> subPartIdx);
> > -            if (childGeom.flags & CUGeom::PRESENT)
> > -            {
> > -                m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv,
> childGeom.absPartIdx);
> > -                m_rqt[nextDepth].cur.load(*nextContext);
> > -
> > -                if (m_slice->m_pps->bUseDQP && nextDepth <=
> m_slice->m_pps->maxCuDQPDepth)
> > -                    nextQP = setLambdaFromQP(parentCTU,
> calculateQpforCuSize(parentCTU, childGeom));
> > -
> > -                compressInterCU_dist(parentCTU, childGeom, nextQP);
> > -
> > -                // Save best CU and pred data for this sub CU
> > -                splitCU->copyPartFrom(nd.bestMode->cu, childGeom,
> subPartIdx);
> > -                splitPred->addSubCosts(*nd.bestMode);
> > -
> > -
> nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv,
> childGeom.numPartitions * subPartIdx);
> > -                nextContext = &nd.bestMode->contexts;
> > -            }
> > -            else
> > -                splitCU->setEmptyPart(childGeom, subPartIdx);
> > -        }
> > -        nextContext->store(splitPred->contexts);
> > -
> > -        if (mightNotSplit)
> > -            addSplitFlagCost(*splitPred, cuGeom.depth);
> > -        else
> > -            updateModeCost(*splitPred);
> > -
> > -        checkDQPForSplitPred(*splitPred, cuGeom);
> > -        checkBestMode(*splitPred, depth);
> > -    }
> > +    /* compare split RD cost against best cost */
> > +    if (mightSplit && !bNoSplit)
> > +        checkBestMode(md.pred[PRED_SPLIT], depth);
> >
> >      if (mightNotSplit)
> >      {
> > @@ -746,8 +750,7 @@
> >
> >      /* Copy best data to encData CTU and recon */
> >      md.bestMode->cu.copyToPic(depth);
> > -    if (md.bestMode != &md.pred[PRED_SPLIT])
> > -        md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic,
> cuAddr, cuGeom.absPartIdx);
> > +    md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr,
> cuGeom.absPartIdx);
> >  }
> >
> >  uint32_t Analysis::compressInterCU_rd0_4(const CUData& parentCTU, const
> CUGeom& cuGeom, int32_t qp)
> > _______________________________________________
> > x265-devel mailing list
> > x265-devel at videolan.org
> > https://mailman.videolan.org/listinfo/x265-devel
>
> --
> Steve Borho
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150814/8f2614f1/attachment-0001.html>