[x265] [PATCH] analysis: dump and reuse the bestmergeCand for skip and merge mode
Deepthi Nandakumar
deepthi at multicorewareinc.com
Tue Feb 3 06:17:09 CET 2015
On Mon, Feb 2, 2015 at 10:23 AM, <gopu at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Gopu Govindaswamy <gopu at multicorewareinc.com>
> # Date 1422852790 -19800
> # Mon Feb 02 10:23:10 2015 +0530
> # Node ID db56dc779466c5b54a55b5dadbcd04e882011729
> # Parent 6c5156500d6d4fa655acaf7a8b77f2ba3a0f794b
> analysis: dump and reuse the bestmergeCand for skip and merge mode
>
> diff -r 6c5156500d6d -r db56dc779466 source/common/common.h
> --- a/source/common/common.h Fri Jan 30 11:54:22 2015 -0600
> +++ b/source/common/common.h Mon Feb 02 10:23:10 2015 +0530
> @@ -376,6 +376,7 @@
> int32_t* ref;
> uint8_t* depth;
> uint8_t* modes;
> + uint32_t* bestMergeCand;
> };
>
> /* Stores intra analysis data for a single frame. This struct needs
> better packing */
> diff -r 6c5156500d6d -r db56dc779466 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp Fri Jan 30 11:54:22 2015 -0600
> +++ b/source/encoder/analysis.cpp Mon Feb 02 10:23:10 2015 +0530
> @@ -140,6 +140,7 @@
> int numPredDir = m_slice->isInterP() ? 1 : 2;
> m_reuseInterDataCTU = (analysis_inter_data
> *)m_frame->m_analysisData.interData;
> reuseRef = &m_reuseInterDataCTU->ref[ctu.m_cuAddr *
> X265_MAX_PRED_MODE_PER_CTU * numPredDir];
> + reuseBestMergeCand =
> &m_reuseInterDataCTU->bestMergeCand[ctu.m_cuAddr * CUGeom::MAX_GEOMS];
> }
> }
>
> @@ -1066,21 +1067,6 @@
> md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom);
> checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP],
> md.pred[PRED_MERGE], cuGeom);
>
> - if ((m_slice->m_sliceType != B_SLICE ||
> m_param->bIntraInBFrames) &&
> - (!m_param->bEnableCbfFastMode ||
> md.bestMode->cu.getQtRootCbf(0)))
> - {
> - md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
> - checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL);
> - checkBestMode(md.pred[PRED_INTRA], depth);
> -
> - if (depth == g_maxCUDepth && cuGeom.log2CUSize >
> m_slice->m_sps->quadtreeTULog2MinSize)
> - {
> - md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU,
> cuGeom);
> - checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN,
> &reuseModes[zOrder]);
> - checkBestMode(md.pred[PRED_INTRA_NxN], depth);
> - }
> - }
> -
> if (m_bTryLossless)
> tryLossless(cuGeom);
>
> @@ -1388,29 +1374,10 @@
> bool foundCbf0Merge = false;
> bool triedPZero = false, triedBZero = false;
> bestPred->rdCost = MAX_INT64;
> - for (uint32_t i = 0; i < maxNumMergeCand; i++)
> +
> + if (m_param->analysisMode == X265_ANALYSIS_LOAD)
> {
> - if (m_bFrameParallel &&
> - (mvFieldNeighbours[i][0].mv.y >= (m_param->searchRange + 1) *
> 4 ||
> - mvFieldNeighbours[i][1].mv.y >= (m_param->searchRange + 1) *
> 4))
> - continue;
> -
> - /* the merge candidate list is packed with MV(0,0) ref 0 when it
> is not full */
> - if (interDirNeighbours[i] == 1 &&
> !mvFieldNeighbours[i][0].mv.word && !mvFieldNeighbours[i][0].refIdx)
> - {
> - if (triedPZero)
> - continue;
> - triedPZero = true;
> - }
> - else if (interDirNeighbours[i] == 3 &&
> - !mvFieldNeighbours[i][0].mv.word &&
> !mvFieldNeighbours[i][0].refIdx &&
> - !mvFieldNeighbours[i][1].mv.word &&
> !mvFieldNeighbours[i][1].refIdx)
> - {
> - if (triedBZero)
> - continue;
> - triedBZero = true;
> - }
> -
> + uint32_t i = *reuseBestMergeCand;
> tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i; /* merge candidate
> ID is stored in L0 MVP idx */
> tempPred->cu.m_interDir[0] = interDirNeighbours[i];
> tempPred->cu.m_mv[0][0] = mvFieldNeighbours[i][0].mv;
> @@ -1424,24 +1391,20 @@
>
> uint8_t hasCbf = true;
> bool swapped = false;
> - if (!foundCbf0Merge)
> +
> + /* if the best prediction has CBF (not a skip) then try merge
> with residual */
> + encodeResAndCalcRdInterCU(*tempPred, cuGeom);
> + hasCbf = tempPred->cu.getQtRootCbf(0);
> + foundCbf0Merge = !hasCbf;
> +
> + if (tempPred->rdCost < bestPred->rdCost)
> {
> - /* if the best prediction has CBF (not a skip) then try merge
> with residual */
> -
> - encodeResAndCalcRdInterCU(*tempPred, cuGeom);
> - hasCbf = tempPred->cu.getQtRootCbf(0);
> - foundCbf0Merge = !hasCbf;
> -
> - if (tempPred->rdCost < bestPred->rdCost)
> - {
> - std::swap(tempPred, bestPred);
> - swapped = true;
> - }
> + std::swap(tempPred, bestPred);
> + swapped = true;
> }
> if (!m_param->bLossless && hasCbf)
> {
> /* try merge without residual (skip), if not lossless coding
> */
> -
> if (swapped)
> {
> tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i;
> @@ -1453,12 +1416,88 @@
> tempPred->cu.setPredModeSubParts(MODE_INTER);
> tempPred->predYuv.copyFromYuv(bestPred->predYuv);
> }
> -
> +
> encodeResAndCalcRdSkipCU(*tempPred);
>
> if (tempPred->rdCost < bestPred->rdCost)
> std::swap(tempPred, bestPred);
> }
> + reuseBestMergeCand++;
> + }
>
This is way too much code duplication. Lets fold this in by changing
maxNumMergeCand based on analysis-mode.
> + else
> + {
> + for (uint32_t i = 0; i < maxNumMergeCand; i++)
> + {
> + if (m_bFrameParallel &&
> + (mvFieldNeighbours[i][0].mv.y >= (m_param->searchRange +
> 1) * 4 ||
> + mvFieldNeighbours[i][1].mv.y >= (m_param->searchRange +
> 1) * 4))
> + continue;
> +
> + /* the merge candidate list is packed with MV(0,0) ref 0 when
> it is not full */
> + if (interDirNeighbours[i] == 1 &&
> !mvFieldNeighbours[i][0].mv.word && !mvFieldNeighbours[i][0].refIdx)
> + {
> + if (triedPZero)
> + continue;
> + triedPZero = true;
> + }
> + else if (interDirNeighbours[i] == 3 &&
> + !mvFieldNeighbours[i][0].mv.word &&
> !mvFieldNeighbours[i][0].refIdx &&
> + !mvFieldNeighbours[i][1].mv.word &&
> !mvFieldNeighbours[i][1].refIdx)
> + {
> + if (triedBZero)
> + continue;
> + triedBZero = true;
> + }
> +
> + tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i; /* merge
> candidate ID is stored in L0 MVP idx */
> + tempPred->cu.m_interDir[0] = interDirNeighbours[i];
> + tempPred->cu.m_mv[0][0] = mvFieldNeighbours[i][0].mv;
> + tempPred->cu.m_refIdx[0][0] =
> (int8_t)mvFieldNeighbours[i][0].refIdx;
> + tempPred->cu.m_mv[1][0] = mvFieldNeighbours[i][1].mv;
> + tempPred->cu.m_refIdx[1][0] =
> (int8_t)mvFieldNeighbours[i][1].refIdx;
> + tempPred->cu.setPredModeSubParts(MODE_INTER); /* must be
> cleared between encode iterations */
> +
> + prepMotionCompensation(tempPred->cu, cuGeom, 0);
> + motionCompensation(tempPred->predYuv, true, true);
> +
> + uint8_t hasCbf = true;
> + bool swapped = false;
> + if (!foundCbf0Merge)
> + {
> + /* if the best prediction has CBF (not a skip) then try
> merge with residual */
> +
> + encodeResAndCalcRdInterCU(*tempPred, cuGeom);
> + hasCbf = tempPred->cu.getQtRootCbf(0);
> + foundCbf0Merge = !hasCbf;
> +
> + if (tempPred->rdCost < bestPred->rdCost)
> + {
> + std::swap(tempPred, bestPred);
> + swapped = true;
> + }
> + }
> + if (!m_param->bLossless && hasCbf)
> + {
> + /* try merge without residual (skip), if not lossless
> coding */
> +
> + if (swapped)
> + {
> + tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i;
> + tempPred->cu.m_interDir[0] = interDirNeighbours[i];
> + tempPred->cu.m_mv[0][0] = mvFieldNeighbours[i][0].mv;
> + tempPred->cu.m_refIdx[0][0] =
> (int8_t)mvFieldNeighbours[i][0].refIdx;
> + tempPred->cu.m_mv[1][0] = mvFieldNeighbours[i][1].mv;
> + tempPred->cu.m_refIdx[1][0] =
> (int8_t)mvFieldNeighbours[i][1].refIdx;
> + tempPred->cu.setPredModeSubParts(MODE_INTER);
> + tempPred->predYuv.copyFromYuv(bestPred->predYuv);
> + }
> +
> + encodeResAndCalcRdSkipCU(*tempPred);
> +
> + if (tempPred->rdCost < bestPred->rdCost)
> + std::swap(tempPred, bestPred);
> + }
> + }
> }
>
> if (bestPred->rdCost < MAX_INT64)
> @@ -1473,6 +1512,12 @@
> bestPred->cu.setPUMv(1, mvFieldNeighbours[bestCand][1].mv, 0, 0);
> bestPred->cu.setPURefIdx(1,
> (int8_t)mvFieldNeighbours[bestCand][1].refIdx, 0, 0);
> }
> +
> + if (m_param->analysisMode == X265_ANALYSIS_SAVE)
> + {
> + *reuseBestMergeCand = bestPred->cu.m_mvpIdx[0][0];
> + reuseBestMergeCand++;
> + }
> }
>
> void Analysis::checkInter_rd0_4(Mode& interMode, const CUGeom& cuGeom,
> PartSize partSize)
> diff -r 6c5156500d6d -r db56dc779466 source/encoder/analysis.h
> --- a/source/encoder/analysis.h Fri Jan 30 11:54:22 2015 -0600
> +++ b/source/encoder/analysis.h Mon Feb 02 10:23:10 2015 +0530
> @@ -78,6 +78,7 @@
> analysis_intra_data* m_reuseIntraDataCTU;
> analysis_inter_data* m_reuseInterDataCTU;
> int32_t* reuseRef;
> + uint32_t* reuseBestMergeCand;
> Analysis();
> bool create(ThreadLocalData* tld);
> void destroy();
> diff -r 6c5156500d6d -r db56dc779466 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp Fri Jan 30 11:54:22 2015 -0600
> +++ b/source/encoder/encoder.cpp Mon Feb 02 10:23:10 2015 +0530
> @@ -1628,6 +1628,7 @@
> CHECKED_MALLOC_ZERO(interData->ref, int32_t,
> analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2);
> CHECKED_MALLOC(interData->depth, uint8_t, analysis->numPartitions
> * analysis->numCUsInFrame);
> CHECKED_MALLOC(interData->modes, uint8_t, analysis->numPartitions
> * analysis->numCUsInFrame);
> + CHECKED_MALLOC_ZERO(interData->bestMergeCand, uint32_t,
> analysis->numCUsInFrame * CUGeom::MAX_GEOMS);
> analysis->interData = interData;
> }
> return;
> @@ -1651,6 +1652,7 @@
> X265_FREE(((analysis_inter_data*)analysis->interData)->ref);
> X265_FREE(((analysis_inter_data*)analysis->interData)->depth);
> X265_FREE(((analysis_inter_data*)analysis->interData)->modes);
> +
> X265_FREE(((analysis_inter_data*)analysis->interData)->bestMergeCand);
> X265_FREE(analysis->interData);
> }
> }
> @@ -1716,6 +1718,7 @@
> X265_FREAD(((analysis_inter_data *)analysis->interData)->ref,
> sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU,
> m_analysisFile);
> X265_FREAD(((analysis_inter_data *)analysis->interData)->depth,
> sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions,
> m_analysisFile);
> X265_FREAD(((analysis_inter_data *)analysis->interData)->modes,
> sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions,
> m_analysisFile);
> + X265_FREAD(((analysis_inter_data
> *)analysis->interData)->bestMergeCand, sizeof(uint32_t),
> analysis->numCUsInFrame * CUGeom::MAX_GEOMS, m_analysisFile);
> consumedBytes += frameRecordSize;
> totalConsumedBytes = consumedBytes;
> }
> @@ -1724,6 +1727,7 @@
> X265_FREAD(((analysis_inter_data *)analysis->interData)->ref,
> sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2,
> m_analysisFile);
> X265_FREAD(((analysis_inter_data *)analysis->interData)->depth,
> sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions,
> m_analysisFile);
> X265_FREAD(((analysis_inter_data *)analysis->interData)->modes,
> sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions,
> m_analysisFile);
> + X265_FREAD(((analysis_inter_data
> *)analysis->interData)->bestMergeCand, sizeof(uint32_t),
> analysis->numCUsInFrame * CUGeom::MAX_GEOMS, m_analysisFile);
> consumedBytes += frameRecordSize;
> }
> #undef X265_FREAD
> @@ -1750,11 +1754,13 @@
> {
> analysis->frameRecordSize += sizeof(int32_t) *
> analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU;
> analysis->frameRecordSize += sizeof(uint8_t) *
> analysis->numCUsInFrame * analysis->numPartitions * 2;
> + analysis->frameRecordSize += sizeof(uint32_t) *
> analysis->numCUsInFrame * CUGeom::MAX_GEOMS;
> }
> else
> {
> analysis->frameRecordSize += sizeof(int32_t) *
> analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2;
> analysis->frameRecordSize += sizeof(uint8_t) *
> analysis->numCUsInFrame * analysis->numPartitions * 2;
> + analysis->frameRecordSize += sizeof(uint32_t) *
> analysis->numCUsInFrame * CUGeom::MAX_GEOMS;
> }
>
> X265_FWRITE(&analysis->frameRecordSize, sizeof(uint32_t), 1,
> m_analysisFile);
> @@ -1774,12 +1780,14 @@
> X265_FWRITE(((analysis_inter_data*)analysis->interData)->ref,
> sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU,
> m_analysisFile);
> X265_FWRITE(((analysis_inter_data*)analysis->interData)->depth,
> sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions,
> m_analysisFile);
> X265_FWRITE(((analysis_inter_data*)analysis->interData)->modes,
> sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions,
> m_analysisFile);
> +
> X265_FWRITE(((analysis_inter_data*)analysis->interData)->bestMergeCand,
> sizeof(uint32_t), analysis->numCUsInFrame * CUGeom::MAX_GEOMS,
> m_analysisFile);
> }
> else
> {
> X265_FWRITE(((analysis_inter_data*)analysis->interData)->ref,
> sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2,
> m_analysisFile);
> X265_FWRITE(((analysis_inter_data*)analysis->interData)->depth,
> sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions,
> m_analysisFile);
> X265_FWRITE(((analysis_inter_data*)analysis->interData)->modes,
> sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions,
> m_analysisFile);
> +
> X265_FWRITE(((analysis_inter_data*)analysis->interData)->bestMergeCand,
> sizeof(uint32_t), analysis->numCUsInFrame * CUGeom::MAX_GEOMS,
> m_analysisFile);
> }
> #undef X265_FWRITE
> }
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150203/d4223ca2/attachment-0001.html>
More information about the x265-devel
mailing list