[x265] [PATCH] analysis: re-order RD 0/4 analysis to do splits before ME or intra
Deepthi Nandakumar
deepthi at multicorewareinc.com
Wed May 20 12:39:16 CEST 2015
Thanks.
With the smoke tests, about 2/3rd of the tests show positive/neutral encode
efficiency gains, while a third show marginally lower encode efficiency,
with a couple of commandlines showing a surprising drop.
On Tue, May 19, 2015 at 6:45 PM, <ashok at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Ashok Kumar Mishra<ashok at multicorewareinc.com>
> # Date 1431933378 -19800
> # Mon May 18 12:46:18 2015 +0530
> # Node ID 1e2e70f90e4484b32217c7579bca98180929cf72
> # Parent d7b100e51e828833eee006f1da93e499ac161d28
> analysis: re-order RD 0/4 analysis to do splits before ME or intra
>
> diff -r d7b100e51e82 -r 1e2e70f90e44 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp Mon May 18 18:24:08 2015 -0500
> +++ b/source/encoder/analysis.cpp Mon May 18 12:46:18 2015 +0530
> @@ -756,19 +756,79 @@
> bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
> bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
> uint32_t minDepth = topSkipMinDepth(parentCTU, cuGeom);
> -
> + bool earlyskip = false;
> if (mightNotSplit && depth >= minDepth)
> {
> - bool bTryIntra = m_slice->m_sliceType != B_SLICE ||
> m_param->bIntraInBFrames;
> -
> /* Compute Merge Cost */
> md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
> md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
> checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE],
> cuGeom);
> -
> - bool earlyskip = false;
> if (m_param->rdLevel)
> earlyskip = m_param->bEnableEarlySkip && md.bestMode &&
> md.bestMode->cu.isSkipped(0); // TODO: sa8d threshold per depth
> + }
> +
> + bool bNoSplit = false;
> + if (md.bestMode)
> + {
> + bNoSplit = md.bestMode->cu.isSkipped(0);
> + if (mightSplit && depth && depth >= minDepth && !bNoSplit)
> + bNoSplit = recursionDepthCheck(parentCTU, cuGeom,
> *md.bestMode);
> + }
> +
> + if (mightSplit && !bNoSplit)
> + {
> + Mode* splitPred = &md.pred[PRED_SPLIT];
> + splitPred->initCosts();
> + CUData* splitCU = &splitPred->cu;
> + splitCU->initSubCU(parentCTU, cuGeom, qp);
> +
> + uint32_t nextDepth = depth + 1;
> + ModeDepth& nd = m_modeDepth[nextDepth];
> + invalidateContexts(nextDepth);
> + Entropy* nextContext = &m_rqt[depth].cur;
> + int nextQP = qp;
> +
> + for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
> + {
> + const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset +
> subPartIdx);
> + if (childGeom.flags & CUGeom::PRESENT)
> + {
> + m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv,
> childGeom.absPartIdx);
> + m_rqt[nextDepth].cur.load(*nextContext);
> +
> + if (m_slice->m_pps->bUseDQP && nextDepth <=
> m_slice->m_pps->maxCuDQPDepth)
> + nextQP = setLambdaFromQP(parentCTU,
> calculateQpforCuSize(parentCTU, childGeom));
> +
> + compressInterCU_rd0_4(parentCTU, childGeom, nextQP);
> +
> + // Save best CU and pred data for this sub CU
> + splitCU->copyPartFrom(nd.bestMode->cu, childGeom,
> subPartIdx);
> + splitPred->addSubCosts(*nd.bestMode);
> +
> + if (m_param->rdLevel)
> +
> nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv,
> childGeom.numPartitions * subPartIdx);
> + else
> +
> nd.bestMode->predYuv.copyToPartYuv(splitPred->predYuv,
> childGeom.numPartitions * subPartIdx);
> + if (m_param->rdLevel > 1)
> + nextContext = &nd.bestMode->contexts;
> + }
> + else
> + splitCU->setEmptyPart(childGeom, subPartIdx);
> + }
> + nextContext->store(splitPred->contexts);
> +
> + if (mightNotSplit)
> + addSplitFlagCost(*splitPred, cuGeom.depth);
> + else if (m_param->rdLevel > 1)
> + updateModeCost(*splitPred);
> + else
> + splitPred->sa8dCost =
> m_rdCost.calcRdSADCost(splitPred->distortion, splitPred->sa8dBits);
> + }
> +
> + if (mightNotSplit && depth >= minDepth)
> + {
> + if (m_slice->m_pps->bUseDQP && depth <=
> m_slice->m_pps->maxCuDQPDepth && m_slice->m_pps->maxCuDQPDepth != 0)
> + setLambdaFromQP(parentCTU, qp);
>
> if (!earlyskip)
> {
> @@ -834,7 +894,7 @@
> bestInter = &md.pred[PRED_nRx2N];
> }
> }
> -
> + bool bTryIntra = m_slice->m_sliceType != B_SLICE ||
> m_param->bIntraInBFrames;
> if (m_param->rdLevel >= 3)
> {
> /* Calculate RD cost of best inter option */
> @@ -950,63 +1010,19 @@
> addSplitFlagCost(*md.bestMode, cuGeom.depth);
> }
>
> - bool bNoSplit = false;
> - if (md.bestMode)
> + if (mightNotSplit && md.bestMode)
> {
> - bNoSplit = md.bestMode->cu.isSkipped(0);
> - if (mightSplit && depth && depth >= minDepth && !bNoSplit)
> - bNoSplit = recursionDepthCheck(parentCTU, cuGeom,
> *md.bestMode);
> + /* early-out statistics */
> + FrameData& curEncData = *m_frame->m_encData;
> + FrameData::RCStatCU& cuStat =
> curEncData.m_cuStat[parentCTU.m_cuAddr];
> + uint64_t temp = cuStat.avgCost[depth] * cuStat.count[depth];
> + cuStat.count[depth] += 1;
> + cuStat.avgCost[depth] = (temp + md.bestMode->rdCost) /
> cuStat.count[depth];
> }
>
This stats accumulation above should be moved further below - so in the
case where only split costs were available, the early out stats would not
change significantly. I have a suspicion this caused the drop in encode
efficiency.
>
> if (mightSplit && !bNoSplit)
> {
> Mode* splitPred = &md.pred[PRED_SPLIT];
> - splitPred->initCosts();
> - CUData* splitCU = &splitPred->cu;
> - splitCU->initSubCU(parentCTU, cuGeom, qp);
> -
> - uint32_t nextDepth = depth + 1;
> - ModeDepth& nd = m_modeDepth[nextDepth];
> - invalidateContexts(nextDepth);
> - Entropy* nextContext = &m_rqt[depth].cur;
> - int nextQP = qp;
> -
> - for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
> - {
> - const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset +
> subPartIdx);
> - if (childGeom.flags & CUGeom::PRESENT)
> - {
> - m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv,
> childGeom.absPartIdx);
> - m_rqt[nextDepth].cur.load(*nextContext);
> -
> - if (m_slice->m_pps->bUseDQP && nextDepth <=
> m_slice->m_pps->maxCuDQPDepth)
> - nextQP = setLambdaFromQP(parentCTU,
> calculateQpforCuSize(parentCTU, childGeom));
> -
> - compressInterCU_rd0_4(parentCTU, childGeom, nextQP);
> -
> - // Save best CU and pred data for this sub CU
> - splitCU->copyPartFrom(nd.bestMode->cu, childGeom,
> subPartIdx);
> - splitPred->addSubCosts(*nd.bestMode);
> -
> - if (m_param->rdLevel)
> -
> nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv,
> childGeom.numPartitions * subPartIdx);
> - else
> -
> nd.bestMode->predYuv.copyToPartYuv(splitPred->predYuv,
> childGeom.numPartitions * subPartIdx);
> - if (m_param->rdLevel > 1)
> - nextContext = &nd.bestMode->contexts;
> - }
> - else
> - splitCU->setEmptyPart(childGeom, subPartIdx);
> - }
> - nextContext->store(splitPred->contexts);
> -
> - if (mightNotSplit)
> - addSplitFlagCost(*splitPred, cuGeom.depth);
> - else if (m_param->rdLevel > 1)
> - updateModeCost(*splitPred);
> - else
> - splitPred->sa8dCost =
> m_rdCost.calcRdSADCost(splitPred->distortion, splitPred->sa8dBits);
> -
> if (!md.bestMode)
> md.bestMode = splitPred;
> else if (m_param->rdLevel > 1)
> @@ -1016,21 +1032,11 @@
>
> checkDQPForSplitPred(*md.bestMode, cuGeom);
> }
> - if (mightNotSplit)
> - {
> - /* early-out statistics */
> - FrameData& curEncData = *m_frame->m_encData;
> - FrameData::RCStatCU& cuStat =
> curEncData.m_cuStat[parentCTU.m_cuAddr];
> - uint64_t temp = cuStat.avgCost[depth] * cuStat.count[depth];
> - cuStat.count[depth] += 1;
> - cuStat.avgCost[depth] = (temp + md.bestMode->rdCost) /
> cuStat.count[depth];
> - }
>
> /* Copy best data to encData CTU and recon */
> X265_CHECK(md.bestMode->ok(), "best mode is not ok");
> md.bestMode->cu.copyToPic(depth);
> - if (md.bestMode != &md.pred[PRED_SPLIT] && m_param->rdLevel)
> - md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr,
> cuGeom.absPartIdx);
> + md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr,
> cuGeom.absPartIdx);
> }
>
> void Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const
> CUGeom& cuGeom, uint32_t &zOrder, int32_t qp)
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150520/a6219399/attachment.html>
More information about the x265-devel
mailing list