[x265] [PATCH] analysis: dump and reuse bestmergeCand for skip and merge mode
Steve Borho
steve at borho.org
Fri Jan 30 18:40:10 CET 2015
On 01/30, gopu at multicorewareinc.com wrote:
> # HG changeset patch
> # User Gopu Govindaswamy <gopu at multicorewareinc.com>
> # Date 1422614706 -19800
> # Fri Jan 30 16:15:06 2015 +0530
> # Node ID f732981763c90cd9bf7db88fae6e526932bf596d
> # Parent 5e5dc3763f6386da9722903033a2b9dd263a5226
> analysis: dump and reuse bestmergeCand for skip and merge mode
>
> diff -r 5e5dc3763f63 -r f732981763c9 source/common/common.h
> --- a/source/common/common.h Thu Jan 29 10:37:54 2015 -0600
> +++ b/source/common/common.h Fri Jan 30 16:15:06 2015 +0530
> @@ -318,6 +318,7 @@
> #define CHROMA_H_SHIFT(x) (x == X265_CSP_I420 || x == X265_CSP_I422)
> #define CHROMA_V_SHIFT(x) (x == X265_CSP_I420)
> #define X265_MAX_PRED_MODE_PER_CTU 85 * 2 * 8
> +#define MAX_RECURSIVE_PERCTU 85
This value is already defines as MAX_GEOMS
> namespace x265 {
>
> @@ -375,6 +376,7 @@
> int32_t* ref;
> uint8_t* depth;
> uint8_t* modes;
> + uint32_t* bestMergeCand;
> };
>
> /* Stores intra analysis data for a single frame. This struct needs better packing */
> diff -r 5e5dc3763f63 -r f732981763c9 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp Thu Jan 29 10:37:54 2015 -0600
> +++ b/source/encoder/analysis.cpp Fri Jan 30 16:15:06 2015 +0530
> @@ -140,6 +140,7 @@
> int numPredDir = m_slice->isInterP() ? 1 : 2;
> m_reuseInterDataCTU = (analysis_inter_data *)m_frame->m_analysisData.interData;
> reuseRef = &m_reuseInterDataCTU->ref[ctu.m_cuAddr * X265_MAX_PRED_MODE_PER_CTU * numPredDir];
> + reuseBestMergeCand = &m_reuseInterDataCTU->bestMergeCand[ctu.m_cuAddr * MAX_RECURSIVE_PERCTU];
> }
> }
>
> @@ -1066,21 +1067,6 @@
> md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom);
> checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
>
> - if ((m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames) &&
> - (!m_param->bEnableCbfFastMode || md.bestMode->cu.getQtRootCbf(0)))
> - {
> - md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
> - checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL);
> - checkBestMode(md.pred[PRED_INTRA], depth);
> -
> - if (depth == g_maxCUDepth && cuGeom.log2CUSize > m_slice->m_sps->quadtreeTULog2MinSize)
> - {
> - md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom);
> - checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN, &reuseModes[zOrder]);
> - checkBestMode(md.pred[PRED_INTRA_NxN], depth);
> - }
> - }
> -
> if (m_bTryLossless)
> tryLossless(cuGeom);
>
> @@ -1388,29 +1374,10 @@
> bool foundCbf0Merge = false;
> bool triedPZero = false, triedBZero = false;
> bestPred->rdCost = MAX_INT64;
> - for (uint32_t i = 0; i < maxNumMergeCand; i++)
> +
> + if (m_param->analysisMode == X265_ANALYSIS_LOAD)
> {
> - if (m_bFrameParallel &&
> - (mvFieldNeighbours[i][0].mv.y >= (m_param->searchRange + 1) * 4 ||
> - mvFieldNeighbours[i][1].mv.y >= (m_param->searchRange + 1) * 4))
> - continue;
> -
> - /* the merge candidate list is packed with MV(0,0) ref 0 when it is not full */
> - if (interDirNeighbours[i] == 1 && !mvFieldNeighbours[i][0].mv.word && !mvFieldNeighbours[i][0].refIdx)
> - {
> - if (triedPZero)
> - continue;
> - triedPZero = true;
> - }
> - else if (interDirNeighbours[i] == 3 &&
> - !mvFieldNeighbours[i][0].mv.word && !mvFieldNeighbours[i][0].refIdx &&
> - !mvFieldNeighbours[i][1].mv.word && !mvFieldNeighbours[i][1].refIdx)
> - {
> - if (triedBZero)
> - continue;
> - triedBZero = true;
> - }
> -
> + uint32_t i = *reuseBestMergeCand;
> tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i; /* merge candidate ID is stored in L0 MVP idx */
> tempPred->cu.m_interDir[0] = interDirNeighbours[i];
> tempPred->cu.m_mv[0][0] = mvFieldNeighbours[i][0].mv;
> @@ -1424,24 +1391,20 @@
>
> uint8_t hasCbf = true;
> bool swapped = false;
> - if (!foundCbf0Merge)
> +
> + /* if the best prediction has CBF (not a skip) then try merge with residual */
> + encodeResAndCalcRdInterCU(*tempPred, cuGeom);
> + hasCbf = tempPred->cu.getQtRootCbf(0);
> + foundCbf0Merge = !hasCbf;
> +
> + if (tempPred->rdCost < bestPred->rdCost)
> {
> - /* if the best prediction has CBF (not a skip) then try merge with residual */
> -
> - encodeResAndCalcRdInterCU(*tempPred, cuGeom);
> - hasCbf = tempPred->cu.getQtRootCbf(0);
> - foundCbf0Merge = !hasCbf;
> -
> - if (tempPred->rdCost < bestPred->rdCost)
> - {
> - std::swap(tempPred, bestPred);
> - swapped = true;
> - }
> + std::swap(tempPred, bestPred);
> + swapped = true;
> }
> if (!m_param->bLossless && hasCbf)
> {
> /* try merge without residual (skip), if not lossless coding */
> -
> if (swapped)
> {
> tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i;
> @@ -1453,12 +1416,88 @@
> tempPred->cu.setPredModeSubParts(MODE_INTER);
> tempPred->predYuv.copyFromYuv(bestPred->predYuv);
> }
> -
> +
> encodeResAndCalcRdSkipCU(*tempPred);
>
> if (tempPred->rdCost < bestPred->rdCost)
> std::swap(tempPred, bestPred);
> }
> + reuseBestMergeCand++;
> + }
> + else
> + {
> + for (uint32_t i = 0; i < maxNumMergeCand; i++)
> + {
> + if (m_bFrameParallel &&
> + (mvFieldNeighbours[i][0].mv.y >= (m_param->searchRange + 1) * 4 ||
> + mvFieldNeighbours[i][1].mv.y >= (m_param->searchRange + 1) * 4))
> + continue;
> +
> + /* the merge candidate list is packed with MV(0,0) ref 0 when it is not full */
> + if (interDirNeighbours[i] == 1 && !mvFieldNeighbours[i][0].mv.word && !mvFieldNeighbours[i][0].refIdx)
> + {
> + if (triedPZero)
> + continue;
> + triedPZero = true;
> + }
> + else if (interDirNeighbours[i] == 3 &&
> + !mvFieldNeighbours[i][0].mv.word && !mvFieldNeighbours[i][0].refIdx &&
> + !mvFieldNeighbours[i][1].mv.word && !mvFieldNeighbours[i][1].refIdx)
> + {
> + if (triedBZero)
> + continue;
> + triedBZero = true;
> + }
> +
> + tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i; /* merge candidate ID is stored in L0 MVP idx */
> + tempPred->cu.m_interDir[0] = interDirNeighbours[i];
> + tempPred->cu.m_mv[0][0] = mvFieldNeighbours[i][0].mv;
> + tempPred->cu.m_refIdx[0][0] = (int8_t)mvFieldNeighbours[i][0].refIdx;
> + tempPred->cu.m_mv[1][0] = mvFieldNeighbours[i][1].mv;
> + tempPred->cu.m_refIdx[1][0] = (int8_t)mvFieldNeighbours[i][1].refIdx;
> + tempPred->cu.setPredModeSubParts(MODE_INTER); /* must be cleared between encode iterations */
> +
> + prepMotionCompensation(tempPred->cu, cuGeom, 0);
> + motionCompensation(tempPred->predYuv, true, true);
> +
> + uint8_t hasCbf = true;
> + bool swapped = false;
> + if (!foundCbf0Merge)
> + {
> + /* if the best prediction has CBF (not a skip) then try merge with residual */
> +
> + encodeResAndCalcRdInterCU(*tempPred, cuGeom);
> + hasCbf = tempPred->cu.getQtRootCbf(0);
> + foundCbf0Merge = !hasCbf;
> +
> + if (tempPred->rdCost < bestPred->rdCost)
> + {
> + std::swap(tempPred, bestPred);
> + swapped = true;
> + }
> + }
> + if (!m_param->bLossless && hasCbf)
> + {
> + /* try merge without residual (skip), if not lossless coding */
> +
> + if (swapped)
> + {
> + tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i;
> + tempPred->cu.m_interDir[0] = interDirNeighbours[i];
> + tempPred->cu.m_mv[0][0] = mvFieldNeighbours[i][0].mv;
> + tempPred->cu.m_refIdx[0][0] = (int8_t)mvFieldNeighbours[i][0].refIdx;
> + tempPred->cu.m_mv[1][0] = mvFieldNeighbours[i][1].mv;
> + tempPred->cu.m_refIdx[1][0] = (int8_t)mvFieldNeighbours[i][1].refIdx;
> + tempPred->cu.setPredModeSubParts(MODE_INTER);
> + tempPred->predYuv.copyFromYuv(bestPred->predYuv);
> + }
> +
> + encodeResAndCalcRdSkipCU(*tempPred);
> +
> + if (tempPred->rdCost < bestPred->rdCost)
> + std::swap(tempPred, bestPred);
> + }
> + }
> }
>
> if (bestPred->rdCost < MAX_INT64)
> @@ -1473,6 +1512,12 @@
> bestPred->cu.setPUMv(1, mvFieldNeighbours[bestCand][1].mv, 0, 0);
> bestPred->cu.setPURefIdx(1, (int8_t)mvFieldNeighbours[bestCand][1].refIdx, 0, 0);
> }
> +
> + if (m_param->analysisMode == X265_ANALYSIS_SAVE)
> + {
> + *reuseBestMergeCand = bestPred->cu.m_mvpIdx[0][0];
> + reuseBestMergeCand++;
> + }
> }
>
> void Analysis::checkInter_rd0_4(Mode& interMode, const CUGeom& cuGeom, PartSize partSize)
> diff -r 5e5dc3763f63 -r f732981763c9 source/encoder/analysis.h
> --- a/source/encoder/analysis.h Thu Jan 29 10:37:54 2015 -0600
> +++ b/source/encoder/analysis.h Fri Jan 30 16:15:06 2015 +0530
> @@ -78,6 +78,7 @@
> analysis_intra_data* m_reuseIntraDataCTU;
> analysis_inter_data* m_reuseInterDataCTU;
> int32_t* reuseRef;
> + uint32_t* reuseBestMergeCand;
> Analysis();
> bool create(ThreadLocalData* tld);
> void destroy();
> diff -r 5e5dc3763f63 -r f732981763c9 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp Thu Jan 29 10:37:54 2015 -0600
> +++ b/source/encoder/encoder.cpp Fri Jan 30 16:15:06 2015 +0530
> @@ -1634,6 +1634,7 @@
> CHECKED_MALLOC_ZERO(interData->ref, int32_t, analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2);
> CHECKED_MALLOC(interData->depth, uint8_t, analysis->numPartitions * analysis->numCUsInFrame);
> CHECKED_MALLOC(interData->modes, uint8_t, analysis->numPartitions * analysis->numCUsInFrame);
> + CHECKED_MALLOC_ZERO(interData->bestMergeCand, uint32_t, analysis->numCUsInFrame * MAX_RECURSIVE_PERCTU);
> analysis->interData = interData;
> }
> return;
> @@ -1657,6 +1658,7 @@
> X265_FREE(((analysis_inter_data*)analysis->interData)->ref);
> X265_FREE(((analysis_inter_data*)analysis->interData)->depth);
> X265_FREE(((analysis_inter_data*)analysis->interData)->modes);
> + X265_FREE(((analysis_inter_data*)analysis->interData)->bestMergeCand);
> X265_FREE(analysis->interData);
> }
> }
> @@ -1722,6 +1724,7 @@
> X265_FREAD(((analysis_inter_data *)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU, m_analysisFile);
> X265_FREAD(((analysis_inter_data *)analysis->interData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
> X265_FREAD(((analysis_inter_data *)analysis->interData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
> + X265_FREAD(((analysis_inter_data *)analysis->interData)->bestMergeCand, sizeof(uint32_t), analysis->numCUsInFrame * MAX_RECURSIVE_PERCTU, m_analysisFile);
> consumedBytes += frameRecordSize;
> totalConsumedBytes = consumedBytes;
> }
> @@ -1730,6 +1733,7 @@
> X265_FREAD(((analysis_inter_data *)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2, m_analysisFile);
> X265_FREAD(((analysis_inter_data *)analysis->interData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
> X265_FREAD(((analysis_inter_data *)analysis->interData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
> + X265_FREAD(((analysis_inter_data *)analysis->interData)->bestMergeCand, sizeof(uint32_t), analysis->numCUsInFrame * MAX_RECURSIVE_PERCTU, m_analysisFile);
> consumedBytes += frameRecordSize;
> }
> #undef X265_FREAD
> @@ -1756,11 +1760,13 @@
> {
> analysis->frameRecordSize += sizeof(int32_t) * analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU;
> analysis->frameRecordSize += sizeof(uint8_t) * analysis->numCUsInFrame * analysis->numPartitions * 2;
> + analysis->frameRecordSize += sizeof(uint32_t) * analysis->numCUsInFrame * MAX_RECURSIVE_PERCTU;
> }
> else
> {
> analysis->frameRecordSize += sizeof(int32_t) * analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2;
> analysis->frameRecordSize += sizeof(uint8_t) * analysis->numCUsInFrame * analysis->numPartitions * 2;
> + analysis->frameRecordSize += sizeof(uint32_t) * analysis->numCUsInFrame * MAX_RECURSIVE_PERCTU;
> }
>
> X265_FWRITE(&analysis->frameRecordSize, sizeof(uint32_t), 1, m_analysisFile);
> @@ -1780,12 +1786,14 @@
> X265_FWRITE(((analysis_inter_data*)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU, m_analysisFile);
> X265_FWRITE(((analysis_inter_data*)analysis->interData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
> X265_FWRITE(((analysis_inter_data*)analysis->interData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
> + X265_FWRITE(((analysis_inter_data*)analysis->interData)->bestMergeCand, sizeof(uint32_t), analysis->numCUsInFrame * MAX_RECURSIVE_PERCTU, m_analysisFile);
> }
> else
> {
> X265_FWRITE(((analysis_inter_data*)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2, m_analysisFile);
> X265_FWRITE(((analysis_inter_data*)analysis->interData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
> X265_FWRITE(((analysis_inter_data*)analysis->interData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
> + X265_FWRITE(((analysis_inter_data*)analysis->interData)->bestMergeCand, sizeof(uint32_t), analysis->numCUsInFrame * MAX_RECURSIVE_PERCTU, m_analysisFile);
> }
> #undef X265_FWRITE
> }
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list