[x265] [PATCH] analysis: dump and reuse the bestmergeCand for skip and merge mode
gopu at multicorewareinc.com
gopu at multicorewareinc.com
Mon Feb 2 05:53:22 CET 2015
# HG changeset patch
# User Gopu Govindaswamy <gopu at multicorewareinc.com>
# Date 1422852790 -19800
# Mon Feb 02 10:23:10 2015 +0530
# Node ID db56dc779466c5b54a55b5dadbcd04e882011729
# Parent 6c5156500d6d4fa655acaf7a8b77f2ba3a0f794b
analysis: dump and reuse the bestmergeCand for skip and merge mode
diff -r 6c5156500d6d -r db56dc779466 source/common/common.h
--- a/source/common/common.h Fri Jan 30 11:54:22 2015 -0600
+++ b/source/common/common.h Mon Feb 02 10:23:10 2015 +0530
@@ -376,6 +376,7 @@
int32_t* ref;
uint8_t* depth;
uint8_t* modes;
+ uint32_t* bestMergeCand;
};
/* Stores intra analysis data for a single frame. This struct needs better packing */
diff -r 6c5156500d6d -r db56dc779466 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Fri Jan 30 11:54:22 2015 -0600
+++ b/source/encoder/analysis.cpp Mon Feb 02 10:23:10 2015 +0530
@@ -140,6 +140,7 @@
int numPredDir = m_slice->isInterP() ? 1 : 2;
m_reuseInterDataCTU = (analysis_inter_data *)m_frame->m_analysisData.interData;
reuseRef = &m_reuseInterDataCTU->ref[ctu.m_cuAddr * X265_MAX_PRED_MODE_PER_CTU * numPredDir];
+ reuseBestMergeCand = &m_reuseInterDataCTU->bestMergeCand[ctu.m_cuAddr * CUGeom::MAX_GEOMS];
}
}
@@ -1066,21 +1067,6 @@
md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom);
checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
- if ((m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames) &&
- (!m_param->bEnableCbfFastMode || md.bestMode->cu.getQtRootCbf(0)))
- {
- md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
- checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL);
- checkBestMode(md.pred[PRED_INTRA], depth);
-
- if (depth == g_maxCUDepth && cuGeom.log2CUSize > m_slice->m_sps->quadtreeTULog2MinSize)
- {
- md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom);
- checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN, &reuseModes[zOrder]);
- checkBestMode(md.pred[PRED_INTRA_NxN], depth);
- }
- }
-
if (m_bTryLossless)
tryLossless(cuGeom);
@@ -1388,29 +1374,10 @@
bool foundCbf0Merge = false;
bool triedPZero = false, triedBZero = false;
bestPred->rdCost = MAX_INT64;
- for (uint32_t i = 0; i < maxNumMergeCand; i++)
+
+ if (m_param->analysisMode == X265_ANALYSIS_LOAD)
{
- if (m_bFrameParallel &&
- (mvFieldNeighbours[i][0].mv.y >= (m_param->searchRange + 1) * 4 ||
- mvFieldNeighbours[i][1].mv.y >= (m_param->searchRange + 1) * 4))
- continue;
-
- /* the merge candidate list is packed with MV(0,0) ref 0 when it is not full */
- if (interDirNeighbours[i] == 1 && !mvFieldNeighbours[i][0].mv.word && !mvFieldNeighbours[i][0].refIdx)
- {
- if (triedPZero)
- continue;
- triedPZero = true;
- }
- else if (interDirNeighbours[i] == 3 &&
- !mvFieldNeighbours[i][0].mv.word && !mvFieldNeighbours[i][0].refIdx &&
- !mvFieldNeighbours[i][1].mv.word && !mvFieldNeighbours[i][1].refIdx)
- {
- if (triedBZero)
- continue;
- triedBZero = true;
- }
-
+ uint32_t i = *reuseBestMergeCand;
tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i; /* merge candidate ID is stored in L0 MVP idx */
tempPred->cu.m_interDir[0] = interDirNeighbours[i];
tempPred->cu.m_mv[0][0] = mvFieldNeighbours[i][0].mv;
@@ -1424,24 +1391,20 @@
uint8_t hasCbf = true;
bool swapped = false;
- if (!foundCbf0Merge)
+
+ /* if the best prediction has CBF (not a skip) then try merge with residual */
+ encodeResAndCalcRdInterCU(*tempPred, cuGeom);
+ hasCbf = tempPred->cu.getQtRootCbf(0);
+ foundCbf0Merge = !hasCbf;
+
+ if (tempPred->rdCost < bestPred->rdCost)
{
- /* if the best prediction has CBF (not a skip) then try merge with residual */
-
- encodeResAndCalcRdInterCU(*tempPred, cuGeom);
- hasCbf = tempPred->cu.getQtRootCbf(0);
- foundCbf0Merge = !hasCbf;
-
- if (tempPred->rdCost < bestPred->rdCost)
- {
- std::swap(tempPred, bestPred);
- swapped = true;
- }
+ std::swap(tempPred, bestPred);
+ swapped = true;
}
if (!m_param->bLossless && hasCbf)
{
/* try merge without residual (skip), if not lossless coding */
-
if (swapped)
{
tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i;
@@ -1453,12 +1416,88 @@
tempPred->cu.setPredModeSubParts(MODE_INTER);
tempPred->predYuv.copyFromYuv(bestPred->predYuv);
}
-
+
encodeResAndCalcRdSkipCU(*tempPred);
if (tempPred->rdCost < bestPred->rdCost)
std::swap(tempPred, bestPred);
}
+ reuseBestMergeCand++;
+ }
+ else
+ {
+ for (uint32_t i = 0; i < maxNumMergeCand; i++)
+ {
+ if (m_bFrameParallel &&
+ (mvFieldNeighbours[i][0].mv.y >= (m_param->searchRange + 1) * 4 ||
+ mvFieldNeighbours[i][1].mv.y >= (m_param->searchRange + 1) * 4))
+ continue;
+
+ /* the merge candidate list is packed with MV(0,0) ref 0 when it is not full */
+ if (interDirNeighbours[i] == 1 && !mvFieldNeighbours[i][0].mv.word && !mvFieldNeighbours[i][0].refIdx)
+ {
+ if (triedPZero)
+ continue;
+ triedPZero = true;
+ }
+ else if (interDirNeighbours[i] == 3 &&
+ !mvFieldNeighbours[i][0].mv.word && !mvFieldNeighbours[i][0].refIdx &&
+ !mvFieldNeighbours[i][1].mv.word && !mvFieldNeighbours[i][1].refIdx)
+ {
+ if (triedBZero)
+ continue;
+ triedBZero = true;
+ }
+
+ tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i; /* merge candidate ID is stored in L0 MVP idx */
+ tempPred->cu.m_interDir[0] = interDirNeighbours[i];
+ tempPred->cu.m_mv[0][0] = mvFieldNeighbours[i][0].mv;
+ tempPred->cu.m_refIdx[0][0] = (int8_t)mvFieldNeighbours[i][0].refIdx;
+ tempPred->cu.m_mv[1][0] = mvFieldNeighbours[i][1].mv;
+ tempPred->cu.m_refIdx[1][0] = (int8_t)mvFieldNeighbours[i][1].refIdx;
+ tempPred->cu.setPredModeSubParts(MODE_INTER); /* must be cleared between encode iterations */
+
+ prepMotionCompensation(tempPred->cu, cuGeom, 0);
+ motionCompensation(tempPred->predYuv, true, true);
+
+ uint8_t hasCbf = true;
+ bool swapped = false;
+ if (!foundCbf0Merge)
+ {
+ /* if the best prediction has CBF (not a skip) then try merge with residual */
+
+ encodeResAndCalcRdInterCU(*tempPred, cuGeom);
+ hasCbf = tempPred->cu.getQtRootCbf(0);
+ foundCbf0Merge = !hasCbf;
+
+ if (tempPred->rdCost < bestPred->rdCost)
+ {
+ std::swap(tempPred, bestPred);
+ swapped = true;
+ }
+ }
+ if (!m_param->bLossless && hasCbf)
+ {
+ /* try merge without residual (skip), if not lossless coding */
+
+ if (swapped)
+ {
+ tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i;
+ tempPred->cu.m_interDir[0] = interDirNeighbours[i];
+ tempPred->cu.m_mv[0][0] = mvFieldNeighbours[i][0].mv;
+ tempPred->cu.m_refIdx[0][0] = (int8_t)mvFieldNeighbours[i][0].refIdx;
+ tempPred->cu.m_mv[1][0] = mvFieldNeighbours[i][1].mv;
+ tempPred->cu.m_refIdx[1][0] = (int8_t)mvFieldNeighbours[i][1].refIdx;
+ tempPred->cu.setPredModeSubParts(MODE_INTER);
+ tempPred->predYuv.copyFromYuv(bestPred->predYuv);
+ }
+
+ encodeResAndCalcRdSkipCU(*tempPred);
+
+ if (tempPred->rdCost < bestPred->rdCost)
+ std::swap(tempPred, bestPred);
+ }
+ }
}
if (bestPred->rdCost < MAX_INT64)
@@ -1473,6 +1512,12 @@
bestPred->cu.setPUMv(1, mvFieldNeighbours[bestCand][1].mv, 0, 0);
bestPred->cu.setPURefIdx(1, (int8_t)mvFieldNeighbours[bestCand][1].refIdx, 0, 0);
}
+
+ if (m_param->analysisMode == X265_ANALYSIS_SAVE)
+ {
+ *reuseBestMergeCand = bestPred->cu.m_mvpIdx[0][0];
+ reuseBestMergeCand++;
+ }
}
void Analysis::checkInter_rd0_4(Mode& interMode, const CUGeom& cuGeom, PartSize partSize)
diff -r 6c5156500d6d -r db56dc779466 source/encoder/analysis.h
--- a/source/encoder/analysis.h Fri Jan 30 11:54:22 2015 -0600
+++ b/source/encoder/analysis.h Mon Feb 02 10:23:10 2015 +0530
@@ -78,6 +78,7 @@
analysis_intra_data* m_reuseIntraDataCTU;
analysis_inter_data* m_reuseInterDataCTU;
int32_t* reuseRef;
+ uint32_t* reuseBestMergeCand;
Analysis();
bool create(ThreadLocalData* tld);
void destroy();
diff -r 6c5156500d6d -r db56dc779466 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Fri Jan 30 11:54:22 2015 -0600
+++ b/source/encoder/encoder.cpp Mon Feb 02 10:23:10 2015 +0530
@@ -1628,6 +1628,7 @@
CHECKED_MALLOC_ZERO(interData->ref, int32_t, analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2);
CHECKED_MALLOC(interData->depth, uint8_t, analysis->numPartitions * analysis->numCUsInFrame);
CHECKED_MALLOC(interData->modes, uint8_t, analysis->numPartitions * analysis->numCUsInFrame);
+ CHECKED_MALLOC_ZERO(interData->bestMergeCand, uint32_t, analysis->numCUsInFrame * CUGeom::MAX_GEOMS);
analysis->interData = interData;
}
return;
@@ -1651,6 +1652,7 @@
X265_FREE(((analysis_inter_data*)analysis->interData)->ref);
X265_FREE(((analysis_inter_data*)analysis->interData)->depth);
X265_FREE(((analysis_inter_data*)analysis->interData)->modes);
+ X265_FREE(((analysis_inter_data*)analysis->interData)->bestMergeCand);
X265_FREE(analysis->interData);
}
}
@@ -1716,6 +1718,7 @@
X265_FREAD(((analysis_inter_data *)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU, m_analysisFile);
X265_FREAD(((analysis_inter_data *)analysis->interData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
X265_FREAD(((analysis_inter_data *)analysis->interData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
+ X265_FREAD(((analysis_inter_data *)analysis->interData)->bestMergeCand, sizeof(uint32_t), analysis->numCUsInFrame * CUGeom::MAX_GEOMS, m_analysisFile);
consumedBytes += frameRecordSize;
totalConsumedBytes = consumedBytes;
}
@@ -1724,6 +1727,7 @@
X265_FREAD(((analysis_inter_data *)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2, m_analysisFile);
X265_FREAD(((analysis_inter_data *)analysis->interData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
X265_FREAD(((analysis_inter_data *)analysis->interData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
+ X265_FREAD(((analysis_inter_data *)analysis->interData)->bestMergeCand, sizeof(uint32_t), analysis->numCUsInFrame * CUGeom::MAX_GEOMS, m_analysisFile);
consumedBytes += frameRecordSize;
}
#undef X265_FREAD
@@ -1750,11 +1754,13 @@
{
analysis->frameRecordSize += sizeof(int32_t) * analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU;
analysis->frameRecordSize += sizeof(uint8_t) * analysis->numCUsInFrame * analysis->numPartitions * 2;
+ analysis->frameRecordSize += sizeof(uint32_t) * analysis->numCUsInFrame * CUGeom::MAX_GEOMS;
}
else
{
analysis->frameRecordSize += sizeof(int32_t) * analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2;
analysis->frameRecordSize += sizeof(uint8_t) * analysis->numCUsInFrame * analysis->numPartitions * 2;
+ analysis->frameRecordSize += sizeof(uint32_t) * analysis->numCUsInFrame * CUGeom::MAX_GEOMS;
}
X265_FWRITE(&analysis->frameRecordSize, sizeof(uint32_t), 1, m_analysisFile);
@@ -1774,12 +1780,14 @@
X265_FWRITE(((analysis_inter_data*)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU, m_analysisFile);
X265_FWRITE(((analysis_inter_data*)analysis->interData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
X265_FWRITE(((analysis_inter_data*)analysis->interData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
+ X265_FWRITE(((analysis_inter_data*)analysis->interData)->bestMergeCand, sizeof(uint32_t), analysis->numCUsInFrame * CUGeom::MAX_GEOMS, m_analysisFile);
}
else
{
X265_FWRITE(((analysis_inter_data*)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2, m_analysisFile);
X265_FWRITE(((analysis_inter_data*)analysis->interData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
X265_FWRITE(((analysis_inter_data*)analysis->interData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
+ X265_FWRITE(((analysis_inter_data*)analysis->interData)->bestMergeCand, sizeof(uint32_t), analysis->numCUsInFrame * CUGeom::MAX_GEOMS, m_analysisFile);
}
#undef X265_FWRITE
}
More information about the x265-devel
mailing list