[x265] [PATCH] analysis: dump the best depth and re-use it for analysis-mode=load
gopu at multicorewareinc.com
gopu at multicorewareinc.com
Mon Dec 22 12:29:12 CET 2014
# HG changeset patch
# User Gopu Govindaswamy <gopu at multicorewareinc.com>
# Date 1419247700 -19800
# Mon Dec 22 16:58:20 2014 +0530
# Node ID 8606c4019f6b962bec47398ac8f876642ecab747
# Parent 8d2f418829c894c25da79daa861f16c61e5060d7
analysis: dump the best depth and re-use it for analysis-mode=load
For inter frame currently dump the best ref and re-using it, in addition to that
share the best depth and re-use it for analysis mode=load, the best depth can be
shared only the mode is MODE_SKIP, otherwise ignored it
diff -r 8d2f418829c8 -r 8606c4019f6b source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Sat Dec 20 21:27:14 2014 +0900
+++ b/source/encoder/analysis.cpp Mon Dec 22 16:58:20 2014 +0530
@@ -138,9 +138,9 @@
m_reuseInterDataCTU = (analysis_inter_data *)m_frame->m_analysisData.interData + ctu.m_cuAddr * X265_MAX_PRED_MODE_PER_CTU * numPredDir;
}
+ uint32_t zOrder = 0;
if (m_slice->m_sliceType == I_SLICE)
{
- uint32_t zOrder = 0;
compressIntraCU(ctu, cuGeom, m_reuseIntraDataCTU, zOrder);
if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_frame->m_analysisData.intraData)
{
@@ -158,7 +158,7 @@
* they are available for intra predictions */
m_modeDepth[0].fencYuv.copyToPicYuv(*m_frame->m_reconPic, ctu.m_cuAddr, 0);
- compressInterCU_rd0_4(ctu, cuGeom);
+ compressInterCU_rd0_4(ctu, cuGeom, m_reuseIntraDataCTU, zOrder);
/* generate residual for entire CTU at once and copy to reconPic */
encodeResidue(ctu, cuGeom);
@@ -166,9 +166,17 @@
else if (m_param->bDistributeModeAnalysis && m_param->rdLevel >= 2)
compressInterCU_dist(ctu, cuGeom);
else if (m_param->rdLevel <= 4)
- compressInterCU_rd0_4(ctu, cuGeom);
+ compressInterCU_rd0_4(ctu, cuGeom, m_reuseIntraDataCTU, zOrder);
else
- compressInterCU_rd5_6(ctu, cuGeom);
+ {
+ compressInterCU_rd5_6(ctu, cuGeom, m_reuseIntraDataCTU, zOrder);
+ if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_frame->m_analysisData.intraData)
+ {
+ CUData *bestCU = &m_modeDepth[0].bestMode->cu;
+ memcpy(&m_reuseIntraDataCTU->depth[ctu.m_cuAddr * numPartition], bestCU->m_cuDepth, sizeof(uint8_t) * numPartition);
+ memcpy(&m_reuseIntraDataCTU->modes[ctu.m_cuAddr * numPartition], bestCU->m_predMode, sizeof(uint8_t) * numPartition);
+ }
+ }
}
return *m_modeDepth[0].bestMode;
@@ -748,7 +756,7 @@
md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr, cuGeom.encodeIdx);
}
-void Analysis::compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom)
+void Analysis::compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom, analysis_intra_data* reuseIntraData, uint32_t& zOrder)
{
uint32_t depth = cuGeom.depth;
uint32_t cuAddr = parentCTU.m_cuAddr;
@@ -982,7 +990,7 @@
{
m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.encodeIdx);
m_rqt[nextDepth].cur.load(*nextContext);
- compressInterCU_rd0_4(parentCTU, childGeom);
+ compressInterCU_rd0_4(parentCTU, childGeom, reuseIntraData, zOrder);
// Save best CU and pred data for this sub CU
splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
@@ -1033,7 +1041,7 @@
md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr, cuGeom.encodeIdx);
}
-void Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom)
+void Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, analysis_intra_data* reuseIntraData, uint32_t& zOrder)
{
uint32_t depth = cuGeom.depth;
ModeDepth& md = m_modeDepth[depth];
@@ -1042,6 +1050,50 @@
bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
+ if (m_param->analysisMode == X265_ANALYSIS_LOAD)
+ {
+ uint8_t* reuseDepth = &reuseIntraData->depth[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
+ uint8_t* reuseModes = &reuseIntraData->modes[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
+ if (mightNotSplit && depth == reuseDepth[zOrder] && zOrder == cuGeom.encodeIdx && reuseModes[zOrder] == MODE_SKIP)
+ {
+ md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom);
+ checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
+
+ if ((m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames) &&
+ (!m_param->bEnableCbfFastMode || md.bestMode->cu.getQtRootCbf(0)))
+ {
+ md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
+ checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL);
+ checkBestMode(md.pred[PRED_INTRA], depth);
+
+ if (depth == g_maxCUDepth && cuGeom.log2CUSize > m_slice->m_sps->quadtreeTULog2MinSize)
+ {
+ md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom);
+ checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN, &reuseModes[zOrder]);
+ checkBestMode(md.pred[PRED_INTRA_NxN], depth);
+ }
+ }
+
+ if (m_bTryLossless)
+ tryLossless(cuGeom);
+
+ if (mightSplit)
+ addSplitFlagCost(*md.bestMode, cuGeom.depth);
+
+ mightSplit = false;
+ mightNotSplit = false;
+
+ // increment zOrder offset to point to next best depth in sharedDepth buffer
+ zOrder += g_depthInc[g_maxCUDepth - 1][reuseDepth[zOrder]];
+
+ int numPredDir = m_slice->isInterP() ? 1 : 2;
+ for (int i = 0; i < md.bestMode->cu.getNumPartInter(); i++)
+ for (int l = 0; l < numPredDir; l++)
+ m_reuseInterDataCTU++;
+ }
+ }
+
if (mightNotSplit)
{
md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom);
@@ -1173,7 +1225,7 @@
{
m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.encodeIdx);
m_rqt[nextDepth].cur.load(*nextContext);
- compressInterCU_rd5_6(parentCTU, childGeom);
+ compressInterCU_rd5_6(parentCTU, childGeom, reuseIntraData, zOrder);
// Save best CU and pred data for this sub CU
splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
@@ -1182,7 +1234,10 @@
nextContext = &nd.bestMode->contexts;
}
else
+ {
splitCU->setEmptyPart(childGeom, subPartIdx);
+ zOrder += g_depthInc[g_maxCUDepth - 1][nextDepth];
+ }
}
nextContext->store(splitPred->contexts);
if (mightNotSplit)
diff -r 8d2f418829c8 -r 8606c4019f6b source/encoder/analysis.h
--- a/source/encoder/analysis.h Sat Dec 20 21:27:14 2014 +0900
+++ b/source/encoder/analysis.h Mon Dec 22 16:58:20 2014 +0530
@@ -99,8 +99,8 @@
/* full analysis for a P or B slice CU */
void compressInterCU_dist(const CUData& parentCTU, const CUGeom& cuGeom);
- void compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom);
- void compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom);
+ void compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom, analysis_intra_data* sdata, uint32_t &zOrder);
+ void compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, analysis_intra_data* sdata, uint32_t &zOrder);
/* measure merge and skip */
void checkMerge2Nx2N_rd0_4(Mode& skip, Mode& merge, const CUGeom& cuGeom);
diff -r 8d2f418829c8 -r 8606c4019f6b source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Sat Dec 20 21:27:14 2014 +0900
+++ b/source/encoder/encoder.cpp Mon Dec 22 16:58:20 2014 +0530
@@ -1639,12 +1639,16 @@
else if (analysis->sliceType == X265_TYPE_P)
{
X265_FREAD(analysis->interData, sizeof(analysis_inter_data), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU, m_analysisFile);
+ X265_FREAD(((analysis_intra_data *)analysis->intraData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
+ X265_FREAD(((analysis_intra_data *)analysis->intraData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
consumedBytes += frameRecordSize;
totalConsumedBytes = consumedBytes;
}
else
{
X265_FREAD(analysis->interData, sizeof(analysis_inter_data), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2, m_analysisFile);
+ X265_FREAD(((analysis_intra_data *)analysis->intraData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
+ X265_FREAD(((analysis_intra_data *)analysis->intraData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
consumedBytes += frameRecordSize;
}
#undef X265_FREAD
@@ -1668,9 +1672,15 @@
if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I)
analysis->frameRecordSize += sizeof(uint8_t) * analysis->numCUsInFrame * analysis->numPartitions * 3;
else if (analysis->sliceType == X265_TYPE_P)
+ {
analysis->frameRecordSize += sizeof(analysis_inter_data) * analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU;
+ analysis->frameRecordSize += sizeof(uint8_t) * analysis->numCUsInFrame * analysis->numPartitions * 2;
+ }
else
+ {
analysis->frameRecordSize += sizeof(analysis_inter_data) * analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2;
+ analysis->frameRecordSize += sizeof(uint8_t) * analysis->numCUsInFrame * analysis->numPartitions * 2;
+ }
X265_FWRITE(&analysis->frameRecordSize, sizeof(uint32_t), 1, m_analysisFile);
X265_FWRITE(&analysis->poc, sizeof(int), 1, m_analysisFile);
@@ -1687,10 +1697,14 @@
else if (analysis->sliceType == X265_TYPE_P)
{
X265_FWRITE(analysis->interData, sizeof(analysis_inter_data), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU, m_analysisFile);
+ X265_FWRITE(((analysis_intra_data*)analysis->intraData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
+ X265_FWRITE(((analysis_intra_data*)analysis->intraData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
}
else
{
X265_FWRITE(analysis->interData, sizeof(analysis_inter_data), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2, m_analysisFile);
+ X265_FWRITE(((analysis_intra_data*)analysis->intraData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
+ X265_FWRITE(((analysis_intra_data*)analysis->intraData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
}
#undef X265_FWRITE
}
More information about the x265-devel
mailing list