<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Mon, Feb 2, 2015 at 10:23 AM, <span dir="ltr"><<a href="mailto:gopu@multicorewareinc.com" target="_blank">gopu@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Gopu Govindaswamy <<a href="mailto:gopu@multicorewareinc.com">gopu@multicorewareinc.com</a>><br>
# Date 1422852790 -19800<br>
# Mon Feb 02 10:23:10 2015 +0530<br>
# Node ID db56dc779466c5b54a55b5dadbcd04e882011729<br>
# Parent 6c5156500d6d4fa655acaf7a8b77f2ba3a0f794b<br>
analysis: dump and reuse the bestmergeCand for skip and merge mode<br>
<br>
diff -r 6c5156500d6d -r db56dc779466 source/common/common.h<br>
--- a/source/common/common.h Fri Jan 30 11:54:22 2015 -0600<br>
+++ b/source/common/common.h Mon Feb 02 10:23:10 2015 +0530<br>
@@ -376,6 +376,7 @@<br>
int32_t* ref;<br>
uint8_t* depth;<br>
uint8_t* modes;<br>
+ uint32_t* bestMergeCand;<br>
};<br>
<br>
/* Stores intra analysis data for a single frame. This struct needs better packing */<br>
diff -r 6c5156500d6d -r db56dc779466 source/encoder/analysis.cpp<br>
--- a/source/encoder/analysis.cpp Fri Jan 30 11:54:22 2015 -0600<br>
+++ b/source/encoder/analysis.cpp Mon Feb 02 10:23:10 2015 +0530<br>
@@ -140,6 +140,7 @@<br>
int numPredDir = m_slice->isInterP() ? 1 : 2;<br>
m_reuseInterDataCTU = (analysis_inter_data *)m_frame->m_analysisData.interData;<br>
reuseRef = &m_reuseInterDataCTU->ref[ctu.m_cuAddr * X265_MAX_PRED_MODE_PER_CTU * numPredDir];<br>
+ reuseBestMergeCand = &m_reuseInterDataCTU->bestMergeCand[ctu.m_cuAddr * CUGeom::MAX_GEOMS];<br>
}<br>
}<br>
<br>
@@ -1066,21 +1067,6 @@<br>
md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom);<br>
checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);<br>
<br>
- if ((m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames) &&<br>
- (!m_param->bEnableCbfFastMode || md.bestMode->cu.getQtRootCbf(0)))<br>
- {<br>
- md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);<br>
- checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL);<br>
- checkBestMode(md.pred[PRED_INTRA], depth);<br>
-<br>
- if (depth == g_maxCUDepth && cuGeom.log2CUSize > m_slice->m_sps->quadtreeTULog2MinSize)<br>
- {<br>
- md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom);<br>
- checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN, &reuseModes[zOrder]);<br>
- checkBestMode(md.pred[PRED_INTRA_NxN], depth);<br>
- }<br>
- }<br>
-<br>
if (m_bTryLossless)<br>
tryLossless(cuGeom);<br>
<br>
@@ -1388,29 +1374,10 @@<br>
bool foundCbf0Merge = false;<br>
bool triedPZero = false, triedBZero = false;<br>
bestPred->rdCost = MAX_INT64;<br>
- for (uint32_t i = 0; i < maxNumMergeCand; i++)<br>
+<br>
+ if (m_param->analysisMode == X265_ANALYSIS_LOAD)<br>
{<br>
- if (m_bFrameParallel &&<br>
- (mvFieldNeighbours[i][0].mv.y >= (m_param->searchRange + 1) * 4 ||<br>
- mvFieldNeighbours[i][1].mv.y >= (m_param->searchRange + 1) * 4))<br>
- continue;<br>
-<br>
- /* the merge candidate list is packed with MV(0,0) ref 0 when it is not full */<br>
- if (interDirNeighbours[i] == 1 && !mvFieldNeighbours[i][0].mv.word && !mvFieldNeighbours[i][0].refIdx)<br>
- {<br>
- if (triedPZero)<br>
- continue;<br>
- triedPZero = true;<br>
- }<br>
- else if (interDirNeighbours[i] == 3 &&<br>
- !mvFieldNeighbours[i][0].mv.word && !mvFieldNeighbours[i][0].refIdx &&<br>
- !mvFieldNeighbours[i][1].mv.word && !mvFieldNeighbours[i][1].refIdx)<br>
- {<br>
- if (triedBZero)<br>
- continue;<br>
- triedBZero = true;<br>
- }<br>
-<br>
+ uint32_t i = *reuseBestMergeCand;<br>
tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i; /* merge candidate ID is stored in L0 MVP idx */<br>
tempPred->cu.m_interDir[0] = interDirNeighbours[i];<br>
tempPred->cu.m_mv[0][0] = mvFieldNeighbours[i][0].mv;<br>
@@ -1424,24 +1391,20 @@<br>
<br>
uint8_t hasCbf = true;<br>
bool swapped = false;<br>
- if (!foundCbf0Merge)<br>
+<br>
+ /* if the best prediction has CBF (not a skip) then try merge with residual */<br>
+ encodeResAndCalcRdInterCU(*tempPred, cuGeom);<br>
+ hasCbf = tempPred->cu.getQtRootCbf(0);<br>
+ foundCbf0Merge = !hasCbf;<br>
+<br>
+ if (tempPred->rdCost < bestPred->rdCost)<br>
{<br>
- /* if the best prediction has CBF (not a skip) then try merge with residual */<br>
-<br>
- encodeResAndCalcRdInterCU(*tempPred, cuGeom);<br>
- hasCbf = tempPred->cu.getQtRootCbf(0);<br>
- foundCbf0Merge = !hasCbf;<br>
-<br>
- if (tempPred->rdCost < bestPred->rdCost)<br>
- {<br>
- std::swap(tempPred, bestPred);<br>
- swapped = true;<br>
- }<br>
+ std::swap(tempPred, bestPred);<br>
+ swapped = true;<br>
}<br>
if (!m_param->bLossless && hasCbf)<br>
{<br>
/* try merge without residual (skip), if not lossless coding */<br>
-<br>
if (swapped)<br>
{<br>
tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i;<br>
@@ -1453,12 +1416,88 @@<br>
tempPred->cu.setPredModeSubParts(MODE_INTER);<br>
tempPred->predYuv.copyFromYuv(bestPred->predYuv);<br>
}<br>
-<br>
+<br>
encodeResAndCalcRdSkipCU(*tempPred);<br>
<br>
if (tempPred->rdCost < bestPred->rdCost)<br>
std::swap(tempPred, bestPred);<br>
}<br>
+ reuseBestMergeCand++;<br>
+ }<br></blockquote><div><br></div><div>This is way too much code duplication. Lets fold this in by changing maxNumMergeCand based on analysis-mode. <br> <br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+ else<br>
+ {<br>
+ for (uint32_t i = 0; i < maxNumMergeCand; i++)<br>
+ {<br>
+ if (m_bFrameParallel &&<br>
+ (mvFieldNeighbours[i][0].mv.y >= (m_param->searchRange + 1) * 4 ||<br>
+ mvFieldNeighbours[i][1].mv.y >= (m_param->searchRange + 1) * 4))<br>
+ continue;<br>
+<br>
+ /* the merge candidate list is packed with MV(0,0) ref 0 when it is not full */<br>
+ if (interDirNeighbours[i] == 1 && !mvFieldNeighbours[i][0].mv.word && !mvFieldNeighbours[i][0].refIdx)<br>
+ {<br>
+ if (triedPZero)<br>
+ continue;<br>
+ triedPZero = true;<br>
+ }<br>
+ else if (interDirNeighbours[i] == 3 &&<br>
+ !mvFieldNeighbours[i][0].mv.word && !mvFieldNeighbours[i][0].refIdx &&<br>
+ !mvFieldNeighbours[i][1].mv.word && !mvFieldNeighbours[i][1].refIdx)<br>
+ {<br>
+ if (triedBZero)<br>
+ continue;<br>
+ triedBZero = true;<br>
+ }<br>
+<br>
+ tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i; /* merge candidate ID is stored in L0 MVP idx */<br>
+ tempPred->cu.m_interDir[0] = interDirNeighbours[i];<br>
+ tempPred->cu.m_mv[0][0] = mvFieldNeighbours[i][0].mv;<br>
+ tempPred->cu.m_refIdx[0][0] = (int8_t)mvFieldNeighbours[i][0].refIdx;<br>
+ tempPred->cu.m_mv[1][0] = mvFieldNeighbours[i][1].mv;<br>
+ tempPred->cu.m_refIdx[1][0] = (int8_t)mvFieldNeighbours[i][1].refIdx;<br>
+ tempPred->cu.setPredModeSubParts(MODE_INTER); /* must be cleared between encode iterations */<br>
+<br>
+ prepMotionCompensation(tempPred->cu, cuGeom, 0);<br>
+ motionCompensation(tempPred->predYuv, true, true);<br>
+<br>
+ uint8_t hasCbf = true;<br>
+ bool swapped = false;<br>
+ if (!foundCbf0Merge)<br>
+ {<br>
+ /* if the best prediction has CBF (not a skip) then try merge with residual */<br>
+<br>
+ encodeResAndCalcRdInterCU(*tempPred, cuGeom);<br>
+ hasCbf = tempPred->cu.getQtRootCbf(0);<br>
+ foundCbf0Merge = !hasCbf;<br>
+<br>
+ if (tempPred->rdCost < bestPred->rdCost)<br>
+ {<br>
+ std::swap(tempPred, bestPred);<br>
+ swapped = true;<br>
+ }<br>
+ }<br>
+ if (!m_param->bLossless && hasCbf)<br>
+ {<br>
+ /* try merge without residual (skip), if not lossless coding */<br>
+<br>
+ if (swapped)<br>
+ {<br>
+ tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i;<br>
+ tempPred->cu.m_interDir[0] = interDirNeighbours[i];<br>
+ tempPred->cu.m_mv[0][0] = mvFieldNeighbours[i][0].mv;<br>
+ tempPred->cu.m_refIdx[0][0] = (int8_t)mvFieldNeighbours[i][0].refIdx;<br>
+ tempPred->cu.m_mv[1][0] = mvFieldNeighbours[i][1].mv;<br>
+ tempPred->cu.m_refIdx[1][0] = (int8_t)mvFieldNeighbours[i][1].refIdx;<br>
+ tempPred->cu.setPredModeSubParts(MODE_INTER);<br>
+ tempPred->predYuv.copyFromYuv(bestPred->predYuv);<br>
+ }<br>
+<br>
+ encodeResAndCalcRdSkipCU(*tempPred);<br>
+<br>
+ if (tempPred->rdCost < bestPred->rdCost)<br>
+ std::swap(tempPred, bestPred);<br>
+ }<br>
+ }<br>
}<br>
<br>
if (bestPred->rdCost < MAX_INT64)<br>
@@ -1473,6 +1512,12 @@<br>
bestPred->cu.setPUMv(1, mvFieldNeighbours[bestCand][1].mv, 0, 0);<br>
bestPred->cu.setPURefIdx(1, (int8_t)mvFieldNeighbours[bestCand][1].refIdx, 0, 0);<br>
}<br>
+<br>
+ if (m_param->analysisMode == X265_ANALYSIS_SAVE)<br>
+ {<br>
+ *reuseBestMergeCand = bestPred->cu.m_mvpIdx[0][0];<br>
+ reuseBestMergeCand++;<br>
+ }<br>
}<br>
<br>
void Analysis::checkInter_rd0_4(Mode& interMode, const CUGeom& cuGeom, PartSize partSize)<br>
diff -r 6c5156500d6d -r db56dc779466 source/encoder/analysis.h<br>
--- a/source/encoder/analysis.h Fri Jan 30 11:54:22 2015 -0600<br>
+++ b/source/encoder/analysis.h Mon Feb 02 10:23:10 2015 +0530<br>
@@ -78,6 +78,7 @@<br>
analysis_intra_data* m_reuseIntraDataCTU;<br>
analysis_inter_data* m_reuseInterDataCTU;<br>
int32_t* reuseRef;<br>
+ uint32_t* reuseBestMergeCand;<br>
Analysis();<br>
bool create(ThreadLocalData* tld);<br>
void destroy();<br>
diff -r 6c5156500d6d -r db56dc779466 source/encoder/encoder.cpp<br>
--- a/source/encoder/encoder.cpp Fri Jan 30 11:54:22 2015 -0600<br>
+++ b/source/encoder/encoder.cpp Mon Feb 02 10:23:10 2015 +0530<br>
@@ -1628,6 +1628,7 @@<br>
CHECKED_MALLOC_ZERO(interData->ref, int32_t, analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2);<br>
CHECKED_MALLOC(interData->depth, uint8_t, analysis->numPartitions * analysis->numCUsInFrame);<br>
CHECKED_MALLOC(interData->modes, uint8_t, analysis->numPartitions * analysis->numCUsInFrame);<br>
+ CHECKED_MALLOC_ZERO(interData->bestMergeCand, uint32_t, analysis->numCUsInFrame * CUGeom::MAX_GEOMS);<br>
analysis->interData = interData;<br>
}<br>
return;<br>
@@ -1651,6 +1652,7 @@<br>
X265_FREE(((analysis_inter_data*)analysis->interData)->ref);<br>
X265_FREE(((analysis_inter_data*)analysis->interData)->depth);<br>
X265_FREE(((analysis_inter_data*)analysis->interData)->modes);<br>
+ X265_FREE(((analysis_inter_data*)analysis->interData)->bestMergeCand);<br>
X265_FREE(analysis->interData);<br>
}<br>
}<br>
@@ -1716,6 +1718,7 @@<br>
X265_FREAD(((analysis_inter_data *)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU, m_analysisFile);<br>
X265_FREAD(((analysis_inter_data *)analysis->interData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);<br>
X265_FREAD(((analysis_inter_data *)analysis->interData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);<br>
+ X265_FREAD(((analysis_inter_data *)analysis->interData)->bestMergeCand, sizeof(uint32_t), analysis->numCUsInFrame * CUGeom::MAX_GEOMS, m_analysisFile);<br>
consumedBytes += frameRecordSize;<br>
totalConsumedBytes = consumedBytes;<br>
}<br>
@@ -1724,6 +1727,7 @@<br>
X265_FREAD(((analysis_inter_data *)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2, m_analysisFile);<br>
X265_FREAD(((analysis_inter_data *)analysis->interData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);<br>
X265_FREAD(((analysis_inter_data *)analysis->interData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);<br>
+ X265_FREAD(((analysis_inter_data *)analysis->interData)->bestMergeCand, sizeof(uint32_t), analysis->numCUsInFrame * CUGeom::MAX_GEOMS, m_analysisFile);<br>
consumedBytes += frameRecordSize;<br>
}<br>
#undef X265_FREAD<br>
@@ -1750,11 +1754,13 @@<br>
{<br>
analysis->frameRecordSize += sizeof(int32_t) * analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU;<br>
analysis->frameRecordSize += sizeof(uint8_t) * analysis->numCUsInFrame * analysis->numPartitions * 2;<br>
+ analysis->frameRecordSize += sizeof(uint32_t) * analysis->numCUsInFrame * CUGeom::MAX_GEOMS;<br>
}<br>
else<br>
{<br>
analysis->frameRecordSize += sizeof(int32_t) * analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2;<br>
analysis->frameRecordSize += sizeof(uint8_t) * analysis->numCUsInFrame * analysis->numPartitions * 2;<br>
+ analysis->frameRecordSize += sizeof(uint32_t) * analysis->numCUsInFrame * CUGeom::MAX_GEOMS;<br>
}<br>
<br>
X265_FWRITE(&analysis->frameRecordSize, sizeof(uint32_t), 1, m_analysisFile);<br>
@@ -1774,12 +1780,14 @@<br>
X265_FWRITE(((analysis_inter_data*)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU, m_analysisFile);<br>
X265_FWRITE(((analysis_inter_data*)analysis->interData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);<br>
X265_FWRITE(((analysis_inter_data*)analysis->interData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);<br>
+ X265_FWRITE(((analysis_inter_data*)analysis->interData)->bestMergeCand, sizeof(uint32_t), analysis->numCUsInFrame * CUGeom::MAX_GEOMS, m_analysisFile);<br>
}<br>
else<br>
{<br>
X265_FWRITE(((analysis_inter_data*)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2, m_analysisFile);<br>
X265_FWRITE(((analysis_inter_data*)analysis->interData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);<br>
X265_FWRITE(((analysis_inter_data*)analysis->interData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);<br>
+ X265_FWRITE(((analysis_inter_data*)analysis->interData)->bestMergeCand, sizeof(uint32_t), analysis->numCUsInFrame * CUGeom::MAX_GEOMS, m_analysisFile);<br>
}<br>
#undef X265_FWRITE<br>
}<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div><br></div></div>