[x265] [PATCH] analysis: re-order RD 0/4 analysis to do splits before ME or intra

Wed May 20 13:33:00 CEST 2015

Ok, pushing this series in. After the additional patch, it's pretty much a
win, especially the efficiency improvements in 10-bit are really solid.

On Wed, May 20, 2015 at 4:09 PM, Deepthi Nandakumar <
deepthi at multicorewareinc.com> wrote:

> Thanks.
>
> With the smoke tests, about 2/3rd of the tests show positive/neutral
> encode efficiency gains, while a third show marginally lower encode
> efficiency, with a couple of commandlines showing a surprising drop.
>
> On Tue, May 19, 2015 at 6:45 PM, <ashok at multicorewareinc.com> wrote:
>
>> # HG changeset patch
>> # User Ashok Kumar Mishra<ashok at multicorewareinc.com>
>> # Date 1431933378 -19800
>> #      Mon May 18 12:46:18 2015 +0530
>> # Node ID 1e2e70f90e4484b32217c7579bca98180929cf72
>> # Parent  d7b100e51e828833eee006f1da93e499ac161d28
>> analysis: re-order RD 0/4 analysis to do splits before ME or intra
>>
>> diff -r d7b100e51e82 -r 1e2e70f90e44 source/encoder/analysis.cpp
>> --- a/source/encoder/analysis.cpp       Mon May 18 18:24:08 2015 -0500
>> +++ b/source/encoder/analysis.cpp       Mon May 18 12:46:18 2015 +0530
>> @@ -756,19 +756,79 @@
>>      bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
>>      bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
>>      uint32_t minDepth = topSkipMinDepth(parentCTU, cuGeom);
>> -
>> +    bool earlyskip = false;
>>      if (mightNotSplit && depth >= minDepth)
>>      {
>> -        bool bTryIntra = m_slice->m_sliceType != B_SLICE ||
>> m_param->bIntraInBFrames;
>> -
>>          /* Compute Merge Cost */
>>          md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
>>          md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
>>          checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE],
>> cuGeom);
>> -
>> -        bool earlyskip = false;
>>          if (m_param->rdLevel)
>>              earlyskip = m_param->bEnableEarlySkip && md.bestMode &&
>> md.bestMode->cu.isSkipped(0); // TODO: sa8d threshold per depth
>> +    }
>> +
>> +    bool bNoSplit = false;
>> +    if (md.bestMode)
>> +    {
>> +        bNoSplit = md.bestMode->cu.isSkipped(0);
>> +        if (mightSplit && depth && depth >= minDepth && !bNoSplit)
>> +            bNoSplit = recursionDepthCheck(parentCTU, cuGeom,
>> *md.bestMode);
>> +    }
>> +
>> +    if (mightSplit && !bNoSplit)
>> +    {
>> +        Mode* splitPred = &md.pred[PRED_SPLIT];
>> +        splitPred->initCosts();
>> +        CUData* splitCU = &splitPred->cu;
>> +        splitCU->initSubCU(parentCTU, cuGeom, qp);
>> +
>> +        uint32_t nextDepth = depth + 1;
>> +        ModeDepth& nd = m_modeDepth[nextDepth];
>> +        invalidateContexts(nextDepth);
>> +        Entropy* nextContext = &m_rqt[depth].cur;
>> +        int nextQP = qp;
>> +
>> +        for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
>> +        {
>> +            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset +
>> subPartIdx);
>> +            if (childGeom.flags & CUGeom::PRESENT)
>> +            {
>> +                m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv,
>> childGeom.absPartIdx);
>> +                m_rqt[nextDepth].cur.load(*nextContext);
>> +
>> +                if (m_slice->m_pps->bUseDQP && nextDepth <=
>> m_slice->m_pps->maxCuDQPDepth)
>> +                    nextQP = setLambdaFromQP(parentCTU,
>> calculateQpforCuSize(parentCTU, childGeom));
>> +
>> +                compressInterCU_rd0_4(parentCTU, childGeom, nextQP);
>> +
>> +                // Save best CU and pred data for this sub CU
>> +                splitCU->copyPartFrom(nd.bestMode->cu, childGeom,
>> subPartIdx);
>> +                splitPred->addSubCosts(*nd.bestMode);
>> +
>> +                if (m_param->rdLevel)
>> +
>> nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv,
>> childGeom.numPartitions * subPartIdx);
>> +                else
>> +
>> nd.bestMode->predYuv.copyToPartYuv(splitPred->predYuv,
>> childGeom.numPartitions * subPartIdx);
>> +                if (m_param->rdLevel > 1)
>> +                    nextContext = &nd.bestMode->contexts;
>> +            }
>> +            else
>> +                splitCU->setEmptyPart(childGeom, subPartIdx);
>> +        }
>> +        nextContext->store(splitPred->contexts);
>> +
>> +        if (mightNotSplit)
>> +            addSplitFlagCost(*splitPred, cuGeom.depth);
>> +        else if (m_param->rdLevel > 1)
>> +            updateModeCost(*splitPred);
>> +        else
>> +            splitPred->sa8dCost =
>> m_rdCost.calcRdSADCost(splitPred->distortion, splitPred->sa8dBits);
>> +    }
>> +
>> +    if (mightNotSplit && depth >= minDepth)
>> +    {
>> +        if (m_slice->m_pps->bUseDQP && depth <=
>> m_slice->m_pps->maxCuDQPDepth && m_slice->m_pps->maxCuDQPDepth != 0)
>> +            setLambdaFromQP(parentCTU, qp);
>>
>>          if (!earlyskip)
>>          {
>> @@ -834,7 +894,7 @@
>>                          bestInter = &md.pred[PRED_nRx2N];
>>                  }
>>              }
>> -
>> +            bool bTryIntra = m_slice->m_sliceType != B_SLICE ||
>> m_param->bIntraInBFrames;
>>              if (m_param->rdLevel >= 3)
>>              {
>>                  /* Calculate RD cost of best inter option */
>> @@ -950,63 +1010,19 @@
>>              addSplitFlagCost(*md.bestMode, cuGeom.depth);
>>      }
>>
>> -    bool bNoSplit = false;
>> -    if (md.bestMode)
>> +    if (mightNotSplit && md.bestMode)
>>      {
>> -        bNoSplit = md.bestMode->cu.isSkipped(0);
>> -        if (mightSplit && depth && depth >= minDepth && !bNoSplit)
>> -            bNoSplit = recursionDepthCheck(parentCTU, cuGeom,
>> *md.bestMode);
>> +        /* early-out statistics */
>> +        FrameData& curEncData = *m_frame->m_encData;
>> +        FrameData::RCStatCU& cuStat =
>> curEncData.m_cuStat[parentCTU.m_cuAddr];
>> +        uint64_t temp = cuStat.avgCost[depth] * cuStat.count[depth];
>> +        cuStat.count[depth] += 1;
>> +        cuStat.avgCost[depth] = (temp + md.bestMode->rdCost) /
>> cuStat.count[depth];
>>      }
>>
>
> This stats accumulation above should be moved further below - so in the
> case where only split costs were available, the early out stats would not
> change significantly. I have a suspicion this caused the drop in encode
> efficiency.
>
>>
>>      if (mightSplit && !bNoSplit)
>>      {
>>          Mode* splitPred = &md.pred[PRED_SPLIT];
>> -        splitPred->initCosts();
>> -        CUData* splitCU = &splitPred->cu;
>> -        splitCU->initSubCU(parentCTU, cuGeom, qp);
>> -
>> -        uint32_t nextDepth = depth + 1;
>> -        ModeDepth& nd = m_modeDepth[nextDepth];
>> -        invalidateContexts(nextDepth);
>> -        Entropy* nextContext = &m_rqt[depth].cur;
>> -        int nextQP = qp;
>> -
>> -        for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
>> -        {
>> -            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset +
>> subPartIdx);
>> -            if (childGeom.flags & CUGeom::PRESENT)
>> -            {
>> -                m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv,
>> childGeom.absPartIdx);
>> -                m_rqt[nextDepth].cur.load(*nextContext);
>> -
>> -                if (m_slice->m_pps->bUseDQP && nextDepth <=
>> m_slice->m_pps->maxCuDQPDepth)
>> -                    nextQP = setLambdaFromQP(parentCTU,
>> calculateQpforCuSize(parentCTU, childGeom));
>> -
>> -                compressInterCU_rd0_4(parentCTU, childGeom, nextQP);
>> -
>> -                // Save best CU and pred data for this sub CU
>> -                splitCU->copyPartFrom(nd.bestMode->cu, childGeom,
>> subPartIdx);
>> -                splitPred->addSubCosts(*nd.bestMode);
>> -
>> -                if (m_param->rdLevel)
>> -
>> nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv,
>> childGeom.numPartitions * subPartIdx);
>> -                else
>> -
>> nd.bestMode->predYuv.copyToPartYuv(splitPred->predYuv,
>> childGeom.numPartitions * subPartIdx);
>> -                if (m_param->rdLevel > 1)
>> -                    nextContext = &nd.bestMode->contexts;
>> -            }
>> -            else
>> -                splitCU->setEmptyPart(childGeom, subPartIdx);
>> -        }
>> -        nextContext->store(splitPred->contexts);
>> -
>> -        if (mightNotSplit)
>> -            addSplitFlagCost(*splitPred, cuGeom.depth);
>> -        else if (m_param->rdLevel > 1)
>> -            updateModeCost(*splitPred);
>> -        else
>> -            splitPred->sa8dCost =
>> m_rdCost.calcRdSADCost(splitPred->distortion, splitPred->sa8dBits);
>> -
>>          if (!md.bestMode)
>>              md.bestMode = splitPred;
>>          else if (m_param->rdLevel > 1)
>> @@ -1016,21 +1032,11 @@
>>
>>          checkDQPForSplitPred(*md.bestMode, cuGeom);
>>      }
>> -    if (mightNotSplit)
>> -    {
>> -        /* early-out statistics */
>> -        FrameData& curEncData = *m_frame->m_encData;
>> -        FrameData::RCStatCU& cuStat =
>> curEncData.m_cuStat[parentCTU.m_cuAddr];
>> -        uint64_t temp = cuStat.avgCost[depth] * cuStat.count[depth];
>> -        cuStat.count[depth] += 1;
>> -        cuStat.avgCost[depth] = (temp + md.bestMode->rdCost) /
>> cuStat.count[depth];
>> -    }
>>
>>      /* Copy best data to encData CTU and recon */
>>      X265_CHECK(md.bestMode->ok(), "best mode is not ok");
>>      md.bestMode->cu.copyToPic(depth);
>> -    if (md.bestMode != &md.pred[PRED_SPLIT] && m_param->rdLevel)
>> -        md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr,
>> cuGeom.absPartIdx);
>> +    md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr,
>> cuGeom.absPartIdx);
>>  }
>>
>>  void Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const
>> CUGeom& cuGeom, uint32_t &zOrder, int32_t qp)
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150520/c64f09a3/attachment-0001.html>