[x265] [PATCH] analysis: reuse the bestSadCand in rd level 0 to 4 for skip and merge modes
gopu at multicorewareinc.com
gopu at multicorewareinc.com
Tue Feb 3 08:59:00 CET 2015
# HG changeset patch
# User Gopu Govindaswamy <gopu at multicorewareinc.com>
# Date 1422950320 -19800
# Tue Feb 03 13:28:40 2015 +0530
# Node ID 420bb1251dc7b9bf1dbb9e6f8e2655ea8f18fe1c
# Parent c01267c2280b33047bed11d812880d17153040ed
analysis: reuse the bestSadCand in rd level 0 to 4 for skip and merge modes
diff -r c01267c2280b -r 420bb1251dc7 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Tue Feb 03 11:22:12 2015 +0530
+++ b/source/encoder/analysis.cpp Tue Feb 03 13:28:40 2015 +0530
@@ -166,7 +166,7 @@
* they are available for intra predictions */
m_modeDepth[0].fencYuv.copyToPicYuv(*m_frame->m_reconPic, ctu.m_cuAddr, 0);
- compressInterCU_rd0_4(ctu, cuGeom);
+ compressInterCU_rd0_4(ctu, cuGeom, zOrder);
/* generate residual for entire CTU at once and copy to reconPic */
encodeResidue(ctu, cuGeom);
@@ -174,16 +174,15 @@
else if (m_param->bDistributeModeAnalysis && m_param->rdLevel >= 2)
compressInterCU_dist(ctu, cuGeom);
else if (m_param->rdLevel <= 4)
- compressInterCU_rd0_4(ctu, cuGeom);
+ compressInterCU_rd0_4(ctu, cuGeom, zOrder);
else
+ compressInterCU_rd5_6(ctu, cuGeom, zOrder);
+
+ if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_frame->m_analysisData.interData && !m_param->bDistributeModeAnalysis)
{
- compressInterCU_rd5_6(ctu, cuGeom, zOrder);
- if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_frame->m_analysisData.interData)
- {
- CUData *bestCU = &m_modeDepth[0].bestMode->cu;
- memcpy(&m_reuseInterDataCTU->depth[ctu.m_cuAddr * numPartition], bestCU->m_cuDepth, sizeof(uint8_t) * numPartition);
- memcpy(&m_reuseInterDataCTU->modes[ctu.m_cuAddr * numPartition], bestCU->m_predMode, sizeof(uint8_t) * numPartition);
- }
+ CUData *bestCU = &m_modeDepth[0].bestMode->cu;
+ memcpy(&m_reuseInterDataCTU->depth[ctu.m_cuAddr * numPartition], bestCU->m_cuDepth, sizeof(uint8_t) * numPartition);
+ memcpy(&m_reuseInterDataCTU->modes[ctu.m_cuAddr * numPartition], bestCU->m_predMode, sizeof(uint8_t) * numPartition);
}
}
@@ -789,7 +788,7 @@
md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr, cuGeom.encodeIdx);
}
-void Analysis::compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom)
+void Analysis::compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder)
{
uint32_t depth = cuGeom.depth;
uint32_t cuAddr = parentCTU.m_cuAddr;
@@ -800,6 +799,30 @@
bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
uint32_t minDepth = topSkipMinDepth(parentCTU, cuGeom);
+ if (m_param->analysisMode == X265_ANALYSIS_LOAD)
+ {
+ uint8_t* reuseDepth = &m_reuseInterDataCTU->depth[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
+ uint8_t* reuseModes = &m_reuseInterDataCTU->modes[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
+ if (mightNotSplit && depth >= minDepth && depth == reuseDepth[zOrder] && zOrder == cuGeom.encodeIdx && reuseModes[zOrder] == MODE_SKIP)
+ {
+ md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom);
+ checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
+
+ if (m_bTryLossless)
+ tryLossless(cuGeom);
+
+ if (mightSplit)
+ addSplitFlagCost(*md.bestMode, cuGeom.depth);
+
+ // increment zOrder offset to point to next best depth in sharedDepth buffer
+ zOrder += g_depthInc[g_maxCUDepth - 1][reuseDepth[zOrder]];
+
+ mightSplit = false;
+ mightNotSplit = false;
+ }
+ }
+
if (mightNotSplit && depth >= minDepth)
{
bool bTryIntra = m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames;
@@ -1022,7 +1045,7 @@
{
m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.encodeIdx);
m_rqt[nextDepth].cur.load(*nextContext);
- compressInterCU_rd0_4(parentCTU, childGeom);
+ compressInterCU_rd0_4(parentCTU, childGeom, zOrder);
// Save best CU and pred data for this sub CU
splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
@@ -1036,7 +1059,10 @@
nextContext = &nd.bestMode->contexts;
}
else
+ {
splitCU->setEmptyPart(childGeom, subPartIdx);
+ zOrder += g_depthInc[g_maxCUDepth - 1][nextDepth];
+ }
}
nextContext->store(splitPred->contexts);
@@ -1300,10 +1326,18 @@
for (uint32_t i = 0; i < maxNumMergeCand; ++i)
{
- if (m_bFrameParallel &&
- (mvFieldNeighbours[i][0].mv.y >= (m_param->searchRange + 1) * 4 ||
- mvFieldNeighbours[i][1].mv.y >= (m_param->searchRange + 1) * 4))
- continue;
+ if (m_param->analysisMode == X265_ANALYSIS_LOAD)
+ {
+ i = (int)*reuseBestMergeCand;
+ maxNumMergeCand = 1;
+ }
+ else
+ {
+ if (m_bFrameParallel &&
+ (mvFieldNeighbours[i][0].mv.y >= (m_param->searchRange + 1) * 4 ||
+ mvFieldNeighbours[i][1].mv.y >= (m_param->searchRange + 1) * 4))
+ continue;
+ }
tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i; // merge candidate ID is stored in L0 MVP idx
tempPred->cu.m_interDir[0] = interDirNeighbours[i];
@@ -1335,6 +1369,11 @@
if (bestSadCand < 0)
return;
+ if (m_param->analysisMode == X265_ANALYSIS_SAVE)
+ *reuseBestMergeCand = bestSadCand;
+ if (m_param->analysisMode)
+ reuseBestMergeCand++;
+
/* calculate the motion compensation for chroma for the best mode selected */
if (!m_bChromaSa8d) /* Chroma MC was done above */
{
diff -r c01267c2280b -r 420bb1251dc7 source/encoder/analysis.h
--- a/source/encoder/analysis.h Tue Feb 03 11:22:12 2015 +0530
+++ b/source/encoder/analysis.h Tue Feb 03 13:28:40 2015 +0530
@@ -101,7 +101,7 @@
/* full analysis for a P or B slice CU */
void compressInterCU_dist(const CUData& parentCTU, const CUGeom& cuGeom);
- void compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom);
+ void compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder);
void compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder);
/* measure merge and skip */
More information about the x265-devel
mailing list