[x265] [PATCH] analysis: re-order RD 0/4 analysis to do splits before ME or intra

Wed May 20 12:39:16 CEST 2015

Thanks.

With the smoke tests, about 2/3rd of the tests show positive/neutral encode
efficiency gains, while a third show marginally lower encode efficiency,
with a couple of commandlines showing a surprising drop.

On Tue, May 19, 2015 at 6:45 PM, <ashok at multicorewareinc.com> wrote:

> # HG changeset patch
> # User Ashok Kumar Mishra<ashok at multicorewareinc.com>
> # Date 1431933378 -19800
> #      Mon May 18 12:46:18 2015 +0530
> # Node ID 1e2e70f90e4484b32217c7579bca98180929cf72
> # Parent  d7b100e51e828833eee006f1da93e499ac161d28
> analysis: re-order RD 0/4 analysis to do splits before ME or intra
>
> diff -r d7b100e51e82 -r 1e2e70f90e44 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp       Mon May 18 18:24:08 2015 -0500
> +++ b/source/encoder/analysis.cpp       Mon May 18 12:46:18 2015 +0530
> @@ -756,19 +756,79 @@
>      bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
>      bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
>      uint32_t minDepth = topSkipMinDepth(parentCTU, cuGeom);
> -
> +    bool earlyskip = false;
>      if (mightNotSplit && depth >= minDepth)
>      {
> -        bool bTryIntra = m_slice->m_sliceType != B_SLICE ||
> m_param->bIntraInBFrames;
> -
>          /* Compute Merge Cost */
>          md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
>          md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
>          checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE],
> cuGeom);
> -
> -        bool earlyskip = false;
>          if (m_param->rdLevel)
>              earlyskip = m_param->bEnableEarlySkip && md.bestMode &&
> md.bestMode->cu.isSkipped(0); // TODO: sa8d threshold per depth
> +    }
> +
> +    bool bNoSplit = false;
> +    if (md.bestMode)
> +    {
> +        bNoSplit = md.bestMode->cu.isSkipped(0);
> +        if (mightSplit && depth && depth >= minDepth && !bNoSplit)
> +            bNoSplit = recursionDepthCheck(parentCTU, cuGeom,
> *md.bestMode);
> +    }
> +
> +    if (mightSplit && !bNoSplit)
> +    {
> +        Mode* splitPred = &md.pred[PRED_SPLIT];
> +        splitPred->initCosts();
> +        CUData* splitCU = &splitPred->cu;
> +        splitCU->initSubCU(parentCTU, cuGeom, qp);
> +
> +        uint32_t nextDepth = depth + 1;
> +        ModeDepth& nd = m_modeDepth[nextDepth];
> +        invalidateContexts(nextDepth);
> +        Entropy* nextContext = &m_rqt[depth].cur;
> +        int nextQP = qp;
> +
> +        for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
> +        {
> +            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset +
> subPartIdx);
> +            if (childGeom.flags & CUGeom::PRESENT)
> +            {
> +                m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv,
> childGeom.absPartIdx);
> +                m_rqt[nextDepth].cur.load(*nextContext);
> +
> +                if (m_slice->m_pps->bUseDQP && nextDepth <=
> m_slice->m_pps->maxCuDQPDepth)
> +                    nextQP = setLambdaFromQP(parentCTU,
> calculateQpforCuSize(parentCTU, childGeom));
> +
> +                compressInterCU_rd0_4(parentCTU, childGeom, nextQP);
> +
> +                // Save best CU and pred data for this sub CU
> +                splitCU->copyPartFrom(nd.bestMode->cu, childGeom,
> subPartIdx);
> +                splitPred->addSubCosts(*nd.bestMode);
> +
> +                if (m_param->rdLevel)
> +
> nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv,
> childGeom.numPartitions * subPartIdx);
> +                else
> +
> nd.bestMode->predYuv.copyToPartYuv(splitPred->predYuv,
> childGeom.numPartitions * subPartIdx);
> +                if (m_param->rdLevel > 1)
> +                    nextContext = &nd.bestMode->contexts;
> +            }
> +            else
> +                splitCU->setEmptyPart(childGeom, subPartIdx);
> +        }
> +        nextContext->store(splitPred->contexts);
> +
> +        if (mightNotSplit)
> +            addSplitFlagCost(*splitPred, cuGeom.depth);
> +        else if (m_param->rdLevel > 1)
> +            updateModeCost(*splitPred);
> +        else
> +            splitPred->sa8dCost =
> m_rdCost.calcRdSADCost(splitPred->distortion, splitPred->sa8dBits);
> +    }
> +
> +    if (mightNotSplit && depth >= minDepth)
> +    {
> +        if (m_slice->m_pps->bUseDQP && depth <=
> m_slice->m_pps->maxCuDQPDepth && m_slice->m_pps->maxCuDQPDepth != 0)
> +            setLambdaFromQP(parentCTU, qp);
>
>          if (!earlyskip)
>          {
> @@ -834,7 +894,7 @@
>                          bestInter = &md.pred[PRED_nRx2N];
>                  }
>              }
> -
> +            bool bTryIntra = m_slice->m_sliceType != B_SLICE ||
> m_param->bIntraInBFrames;
>              if (m_param->rdLevel >= 3)
>              {
>                  /* Calculate RD cost of best inter option */
> @@ -950,63 +1010,19 @@
>              addSplitFlagCost(*md.bestMode, cuGeom.depth);
>      }
>
> -    bool bNoSplit = false;
> -    if (md.bestMode)
> +    if (mightNotSplit && md.bestMode)
>      {
> -        bNoSplit = md.bestMode->cu.isSkipped(0);
> -        if (mightSplit && depth && depth >= minDepth && !bNoSplit)
> -            bNoSplit = recursionDepthCheck(parentCTU, cuGeom,
> *md.bestMode);
> +        /* early-out statistics */
> +        FrameData& curEncData = *m_frame->m_encData;
> +        FrameData::RCStatCU& cuStat =
> curEncData.m_cuStat[parentCTU.m_cuAddr];
> +        uint64_t temp = cuStat.avgCost[depth] * cuStat.count[depth];
> +        cuStat.count[depth] += 1;
> +        cuStat.avgCost[depth] = (temp + md.bestMode->rdCost) /
> cuStat.count[depth];
>      }
>

This stats accumulation above should be moved further below - so in the
case where only split costs were available, the early out stats would not
change significantly. I have a suspicion this caused the drop in encode
efficiency.

>
>      if (mightSplit && !bNoSplit)
>      {
>          Mode* splitPred = &md.pred[PRED_SPLIT];
> -        splitPred->initCosts();
> -        CUData* splitCU = &splitPred->cu;
> -        splitCU->initSubCU(parentCTU, cuGeom, qp);
> -
> -        uint32_t nextDepth = depth + 1;
> -        ModeDepth& nd = m_modeDepth[nextDepth];
> -        invalidateContexts(nextDepth);
> -        Entropy* nextContext = &m_rqt[depth].cur;
> -        int nextQP = qp;
> -
> -        for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
> -        {
> -            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset +
> subPartIdx);
> -            if (childGeom.flags & CUGeom::PRESENT)
> -            {
> -                m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv,
> childGeom.absPartIdx);
> -                m_rqt[nextDepth].cur.load(*nextContext);
> -
> -                if (m_slice->m_pps->bUseDQP && nextDepth <=
> m_slice->m_pps->maxCuDQPDepth)
> -                    nextQP = setLambdaFromQP(parentCTU,
> calculateQpforCuSize(parentCTU, childGeom));
> -
> -                compressInterCU_rd0_4(parentCTU, childGeom, nextQP);
> -
> -                // Save best CU and pred data for this sub CU
> -                splitCU->copyPartFrom(nd.bestMode->cu, childGeom,
> subPartIdx);
> -                splitPred->addSubCosts(*nd.bestMode);
> -
> -                if (m_param->rdLevel)
> -
> nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv,
> childGeom.numPartitions * subPartIdx);
> -                else
> -
> nd.bestMode->predYuv.copyToPartYuv(splitPred->predYuv,
> childGeom.numPartitions * subPartIdx);
> -                if (m_param->rdLevel > 1)
> -                    nextContext = &nd.bestMode->contexts;
> -            }
> -            else
> -                splitCU->setEmptyPart(childGeom, subPartIdx);
> -        }
> -        nextContext->store(splitPred->contexts);
> -
> -        if (mightNotSplit)
> -            addSplitFlagCost(*splitPred, cuGeom.depth);
> -        else if (m_param->rdLevel > 1)
> -            updateModeCost(*splitPred);
> -        else
> -            splitPred->sa8dCost =
> m_rdCost.calcRdSADCost(splitPred->distortion, splitPred->sa8dBits);
> -
>          if (!md.bestMode)
>              md.bestMode = splitPred;
>          else if (m_param->rdLevel > 1)
> @@ -1016,21 +1032,11 @@
>
>          checkDQPForSplitPred(*md.bestMode, cuGeom);
>      }
> -    if (mightNotSplit)
> -    {
> -        /* early-out statistics */
> -        FrameData& curEncData = *m_frame->m_encData;
> -        FrameData::RCStatCU& cuStat =
> curEncData.m_cuStat[parentCTU.m_cuAddr];
> -        uint64_t temp = cuStat.avgCost[depth] * cuStat.count[depth];
> -        cuStat.count[depth] += 1;
> -        cuStat.avgCost[depth] = (temp + md.bestMode->rdCost) /
> cuStat.count[depth];
> -    }
>
>      /* Copy best data to encData CTU and recon */
>      X265_CHECK(md.bestMode->ok(), "best mode is not ok");
>      md.bestMode->cu.copyToPic(depth);
> -    if (md.bestMode != &md.pred[PRED_SPLIT] && m_param->rdLevel)
> -        md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr,
> cuGeom.absPartIdx);
> +    md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr,
> cuGeom.absPartIdx);
>  }
>
>  void Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const
> CUGeom& cuGeom, uint32_t &zOrder, int32_t qp)
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150520/a6219399/attachment.html>