[x265] [PATCH] analysis: re-order RD 0/4 analysis to do splits before ME or intra
Deepthi Nandakumar
deepthi at multicorewareinc.com
Wed May 20 13:33:00 CEST 2015
Ok, pushing this series in. After the additional patch, it's pretty much a
win, especially the efficiency improvements in 10-bit are really solid.
On Wed, May 20, 2015 at 4:09 PM, Deepthi Nandakumar <
deepthi at multicorewareinc.com> wrote:
> Thanks.
>
> With the smoke tests, about 2/3rd of the tests show positive/neutral
> encode efficiency gains, while a third show marginally lower encode
> efficiency, with a couple of commandlines showing a surprising drop.
>
> On Tue, May 19, 2015 at 6:45 PM, <ashok at multicorewareinc.com> wrote:
>
>> # HG changeset patch
>> # User Ashok Kumar Mishra<ashok at multicorewareinc.com>
>> # Date 1431933378 -19800
>> # Mon May 18 12:46:18 2015 +0530
>> # Node ID 1e2e70f90e4484b32217c7579bca98180929cf72
>> # Parent d7b100e51e828833eee006f1da93e499ac161d28
>> analysis: re-order RD 0/4 analysis to do splits before ME or intra
>>
>> diff -r d7b100e51e82 -r 1e2e70f90e44 source/encoder/analysis.cpp
>> --- a/source/encoder/analysis.cpp Mon May 18 18:24:08 2015 -0500
>> +++ b/source/encoder/analysis.cpp Mon May 18 12:46:18 2015 +0530
>> @@ -756,19 +756,79 @@
>> bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
>> bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
>> uint32_t minDepth = topSkipMinDepth(parentCTU, cuGeom);
>> -
>> + bool earlyskip = false;
>> if (mightNotSplit && depth >= minDepth)
>> {
>> - bool bTryIntra = m_slice->m_sliceType != B_SLICE ||
>> m_param->bIntraInBFrames;
>> -
>> /* Compute Merge Cost */
>> md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
>> md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
>> checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE],
>> cuGeom);
>> -
>> - bool earlyskip = false;
>> if (m_param->rdLevel)
>> earlyskip = m_param->bEnableEarlySkip && md.bestMode &&
>> md.bestMode->cu.isSkipped(0); // TODO: sa8d threshold per depth
>> + }
>> +
>> + bool bNoSplit = false;
>> + if (md.bestMode)
>> + {
>> + bNoSplit = md.bestMode->cu.isSkipped(0);
>> + if (mightSplit && depth && depth >= minDepth && !bNoSplit)
>> + bNoSplit = recursionDepthCheck(parentCTU, cuGeom,
>> *md.bestMode);
>> + }
>> +
>> + if (mightSplit && !bNoSplit)
>> + {
>> + Mode* splitPred = &md.pred[PRED_SPLIT];
>> + splitPred->initCosts();
>> + CUData* splitCU = &splitPred->cu;
>> + splitCU->initSubCU(parentCTU, cuGeom, qp);
>> +
>> + uint32_t nextDepth = depth + 1;
>> + ModeDepth& nd = m_modeDepth[nextDepth];
>> + invalidateContexts(nextDepth);
>> + Entropy* nextContext = &m_rqt[depth].cur;
>> + int nextQP = qp;
>> +
>> + for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
>> + {
>> + const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset +
>> subPartIdx);
>> + if (childGeom.flags & CUGeom::PRESENT)
>> + {
>> + m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv,
>> childGeom.absPartIdx);
>> + m_rqt[nextDepth].cur.load(*nextContext);
>> +
>> + if (m_slice->m_pps->bUseDQP && nextDepth <=
>> m_slice->m_pps->maxCuDQPDepth)
>> + nextQP = setLambdaFromQP(parentCTU,
>> calculateQpforCuSize(parentCTU, childGeom));
>> +
>> + compressInterCU_rd0_4(parentCTU, childGeom, nextQP);
>> +
>> + // Save best CU and pred data for this sub CU
>> + splitCU->copyPartFrom(nd.bestMode->cu, childGeom,
>> subPartIdx);
>> + splitPred->addSubCosts(*nd.bestMode);
>> +
>> + if (m_param->rdLevel)
>> +
>> nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv,
>> childGeom.numPartitions * subPartIdx);
>> + else
>> +
>> nd.bestMode->predYuv.copyToPartYuv(splitPred->predYuv,
>> childGeom.numPartitions * subPartIdx);
>> + if (m_param->rdLevel > 1)
>> + nextContext = &nd.bestMode->contexts;
>> + }
>> + else
>> + splitCU->setEmptyPart(childGeom, subPartIdx);
>> + }
>> + nextContext->store(splitPred->contexts);
>> +
>> + if (mightNotSplit)
>> + addSplitFlagCost(*splitPred, cuGeom.depth);
>> + else if (m_param->rdLevel > 1)
>> + updateModeCost(*splitPred);
>> + else
>> + splitPred->sa8dCost =
>> m_rdCost.calcRdSADCost(splitPred->distortion, splitPred->sa8dBits);
>> + }
>> +
>> + if (mightNotSplit && depth >= minDepth)
>> + {
>> + if (m_slice->m_pps->bUseDQP && depth <=
>> m_slice->m_pps->maxCuDQPDepth && m_slice->m_pps->maxCuDQPDepth != 0)
>> + setLambdaFromQP(parentCTU, qp);
>>
>> if (!earlyskip)
>> {
>> @@ -834,7 +894,7 @@
>> bestInter = &md.pred[PRED_nRx2N];
>> }
>> }
>> -
>> + bool bTryIntra = m_slice->m_sliceType != B_SLICE ||
>> m_param->bIntraInBFrames;
>> if (m_param->rdLevel >= 3)
>> {
>> /* Calculate RD cost of best inter option */
>> @@ -950,63 +1010,19 @@
>> addSplitFlagCost(*md.bestMode, cuGeom.depth);
>> }
>>
>> - bool bNoSplit = false;
>> - if (md.bestMode)
>> + if (mightNotSplit && md.bestMode)
>> {
>> - bNoSplit = md.bestMode->cu.isSkipped(0);
>> - if (mightSplit && depth && depth >= minDepth && !bNoSplit)
>> - bNoSplit = recursionDepthCheck(parentCTU, cuGeom,
>> *md.bestMode);
>> + /* early-out statistics */
>> + FrameData& curEncData = *m_frame->m_encData;
>> + FrameData::RCStatCU& cuStat =
>> curEncData.m_cuStat[parentCTU.m_cuAddr];
>> + uint64_t temp = cuStat.avgCost[depth] * cuStat.count[depth];
>> + cuStat.count[depth] += 1;
>> + cuStat.avgCost[depth] = (temp + md.bestMode->rdCost) /
>> cuStat.count[depth];
>> }
>>
>
> This stats accumulation above should be moved further below - so in the
> case where only split costs were available, the early out stats would not
> change significantly. I have a suspicion this caused the drop in encode
> efficiency.
>
>>
>> if (mightSplit && !bNoSplit)
>> {
>> Mode* splitPred = &md.pred[PRED_SPLIT];
>> - splitPred->initCosts();
>> - CUData* splitCU = &splitPred->cu;
>> - splitCU->initSubCU(parentCTU, cuGeom, qp);
>> -
>> - uint32_t nextDepth = depth + 1;
>> - ModeDepth& nd = m_modeDepth[nextDepth];
>> - invalidateContexts(nextDepth);
>> - Entropy* nextContext = &m_rqt[depth].cur;
>> - int nextQP = qp;
>> -
>> - for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
>> - {
>> - const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset +
>> subPartIdx);
>> - if (childGeom.flags & CUGeom::PRESENT)
>> - {
>> - m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv,
>> childGeom.absPartIdx);
>> - m_rqt[nextDepth].cur.load(*nextContext);
>> -
>> - if (m_slice->m_pps->bUseDQP && nextDepth <=
>> m_slice->m_pps->maxCuDQPDepth)
>> - nextQP = setLambdaFromQP(parentCTU,
>> calculateQpforCuSize(parentCTU, childGeom));
>> -
>> - compressInterCU_rd0_4(parentCTU, childGeom, nextQP);
>> -
>> - // Save best CU and pred data for this sub CU
>> - splitCU->copyPartFrom(nd.bestMode->cu, childGeom,
>> subPartIdx);
>> - splitPred->addSubCosts(*nd.bestMode);
>> -
>> - if (m_param->rdLevel)
>> -
>> nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv,
>> childGeom.numPartitions * subPartIdx);
>> - else
>> -
>> nd.bestMode->predYuv.copyToPartYuv(splitPred->predYuv,
>> childGeom.numPartitions * subPartIdx);
>> - if (m_param->rdLevel > 1)
>> - nextContext = &nd.bestMode->contexts;
>> - }
>> - else
>> - splitCU->setEmptyPart(childGeom, subPartIdx);
>> - }
>> - nextContext->store(splitPred->contexts);
>> -
>> - if (mightNotSplit)
>> - addSplitFlagCost(*splitPred, cuGeom.depth);
>> - else if (m_param->rdLevel > 1)
>> - updateModeCost(*splitPred);
>> - else
>> - splitPred->sa8dCost =
>> m_rdCost.calcRdSADCost(splitPred->distortion, splitPred->sa8dBits);
>> -
>> if (!md.bestMode)
>> md.bestMode = splitPred;
>> else if (m_param->rdLevel > 1)
>> @@ -1016,21 +1032,11 @@
>>
>> checkDQPForSplitPred(*md.bestMode, cuGeom);
>> }
>> - if (mightNotSplit)
>> - {
>> - /* early-out statistics */
>> - FrameData& curEncData = *m_frame->m_encData;
>> - FrameData::RCStatCU& cuStat =
>> curEncData.m_cuStat[parentCTU.m_cuAddr];
>> - uint64_t temp = cuStat.avgCost[depth] * cuStat.count[depth];
>> - cuStat.count[depth] += 1;
>> - cuStat.avgCost[depth] = (temp + md.bestMode->rdCost) /
>> cuStat.count[depth];
>> - }
>>
>> /* Copy best data to encData CTU and recon */
>> X265_CHECK(md.bestMode->ok(), "best mode is not ok");
>> md.bestMode->cu.copyToPic(depth);
>> - if (md.bestMode != &md.pred[PRED_SPLIT] && m_param->rdLevel)
>> - md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr,
>> cuGeom.absPartIdx);
>> + md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr,
>> cuGeom.absPartIdx);
>> }
>>
>> void Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const
>> CUGeom& cuGeom, uint32_t &zOrder, int32_t qp)
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150520/c64f09a3/attachment-0001.html>
More information about the x265-devel
mailing list