[x265] [PATCH] analysis: use AVC full CU analysis-info for analysis-reuselevel 8
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Fri Nov 17 12:47:07 CET 2017
# HG changeset patch
# User Praveen Tiwari <praveen at multicorewareinc.com>
# Date 1510908391 -19800
# Fri Nov 17 14:16:31 2017 +0530
# Node ID 9723e8812e63ce51e38ede41f7d5edf73cad0849
# Parent c001df275b946f337ef10b988f2d0397b2c86ef3
analysis: use AVC full CU analysis-info for analysis-reuselevel 8
This patch work implements the functionality for anlysis-reuselevel 8, here we want
to incorpated AVC analysis data from offload in our enocde, HEVC speific sizes
(32x32 and 64x64) CU analysis still will be caluated as per x265 CLI option provided.
diff -r c001df275b94 -r 9723e8812e63 doc/reST/cli.rst
--- a/doc/reST/cli.rst Mon Nov 13 13:54:43 2017 +0530
+++ b/doc/reST/cli.rst Fri Nov 17 14:16:31 2017 +0530
@@ -894,10 +894,12 @@
+--------------+------------------------------------------+
| 2 to 4 | Level 1 + intra/inter modes, ref's |
+--------------+------------------------------------------+
- | 5,6,8 and 9 | Level 2 + rect-amp |
+ | 5,6 and 9 | Level 2 + rect-amp |
+--------------+------------------------------------------+
| 7 | Level 5 + AVC size CU refinement |
+--------------+------------------------------------------+
+ | 8 | Level 5 + AVC size Full CU analysis-info |
+ +--------------+------------------------------------------+
| 10 | Level 5 + Full CU analysis-info |
+--------------+------------------------------------------+
diff -r c001df275b94 -r 9723e8812e63 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Mon Nov 13 13:54:43 2017 +0530
+++ b/source/encoder/analysis.cpp Fri Nov 17 14:16:31 2017 +0530
@@ -235,6 +235,29 @@
}
else
{
+ bool bCopyAnalysis = ((m_param->analysisReuseMode == X265_ANALYSIS_LOAD && m_param->analysisReuseLevel == 10) || (m_param->bMVType && m_param->analysisReuseLevel >= 7 && ctu.m_numPartitions <= 16));
+ bool BCompressInterCUrd0_4 = (m_param->bMVType && m_param->analysisReuseLevel >= 7 && m_param->rdLevel <= 4);
+ bool BCompressInterCUrd5_6 = (m_param->bMVType && m_param->analysisReuseLevel >= 7 && m_param->rdLevel >= 5 && m_param->rdLevel <= 6);
+ bCopyAnalysis = bCopyAnalysis || BCompressInterCUrd0_4 || BCompressInterCUrd5_6;
+
+ if (bCopyAnalysis)
+ {
+ analysis_inter_data* interDataCTU = (analysis_inter_data*)m_frame->m_analysisData.interData;
+ int posCTU = ctu.m_cuAddr * numPartition;
+ memcpy(ctu.m_cuDepth, &interDataCTU->depth[posCTU], sizeof(uint8_t) * numPartition);
+ memcpy(ctu.m_predMode, &interDataCTU->modes[posCTU], sizeof(uint8_t) * numPartition);
+ memcpy(ctu.m_partSize, &interDataCTU->partSize[posCTU], sizeof(uint8_t) * numPartition);
+ if ((m_slice->m_sliceType == P_SLICE || m_param->bIntraInBFrames) && !m_param->bMVType)
+ {
+ analysis_intra_data* intraDataCTU = (analysis_intra_data*)m_frame->m_analysisData.intraData;
+ memcpy(ctu.m_lumaIntraDir, &intraDataCTU->modes[posCTU], sizeof(uint8_t) * numPartition);
+ memcpy(ctu.m_chromaIntraDir, &intraDataCTU->chromaModes[posCTU], sizeof(uint8_t) * numPartition);
+ }
+ //Calculate log2CUSize from depth
+ for (uint32_t i = 0; i < cuGeom.numPartitions; i++)
+ ctu.m_log2CUSize[i] = (uint8_t)m_param->maxLog2CUSize - ctu.m_cuDepth[i];
+ }
+
if (m_param->bIntraRefresh && m_slice->m_sliceType == P_SLICE &&
ctu.m_cuPelX / m_param->maxCUSize >= frame.m_encData->m_pir.pirStartCol
&& ctu.m_cuPelX / m_param->maxCUSize < frame.m_encData->m_pir.pirEndCol)
@@ -1124,7 +1147,7 @@
uint32_t depth = cuGeom.depth;
uint32_t cuAddr = parentCTU.m_cuAddr;
ModeDepth& md = m_modeDepth[depth];
- md.bestMode = NULL;
+
if (m_param->searchMethod == X265_SEA)
{
@@ -1137,427 +1160,917 @@
}
PicYuv& reconPic = *m_frame->m_reconPic;
-
- bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
- bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
- uint32_t minDepth = topSkipMinDepth(parentCTU, cuGeom);
- bool bDecidedDepth = parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth;
- bool skipModes = false; /* Skip any remaining mode analyses at current depth */
- bool skipRecursion = false; /* Skip recursion */
- bool splitIntra = true;
- bool skipRectAmp = false;
- bool chooseMerge = false;
- bool bCtuInfoCheck = false;
- int sameContentRef = 0;
-
- if (m_evaluateInter)
+ SplitData splitCUData;
+
+ if ((m_param->bMVType && cuGeom.numPartitions > 16) || !m_param->bMVType)
{
- if (m_param->interRefine == 2)
+ md.bestMode = NULL;
+ bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
+ bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
+ uint32_t minDepth = topSkipMinDepth(parentCTU, cuGeom);
+ bool bDecidedDepth = parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth;
+ bool skipModes = false; /* Skip any remaining mode analyses at current depth */
+ bool skipRecursion = false; /* Skip recursion */
+ bool splitIntra = true;
+ bool skipRectAmp = false;
+ bool chooseMerge = false;
+ bool bCtuInfoCheck = false;
+ int sameContentRef = 0;
+
+ if (m_evaluateInter)
{
- if (parentCTU.m_predMode[cuGeom.absPartIdx] == MODE_SKIP)
+ if (m_param->interRefine == 2)
+ {
+ if (parentCTU.m_predMode[cuGeom.absPartIdx] == MODE_SKIP)
+ skipModes = true;
+ if (parentCTU.m_partSize[cuGeom.absPartIdx] == SIZE_2Nx2N)
+ skipRectAmp = true;
+ }
+ mightSplit &= false;
+ minDepth = depth;
+ }
+
+ if ((m_limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
+ m_maxTUDepth = loadTUDepth(cuGeom, parentCTU);
+
+ SplitData splitData[4];
+ splitData[0].initSplitCUData();
+ splitData[1].initSplitCUData();
+ splitData[2].initSplitCUData();
+ splitData[3].initSplitCUData();
+
+ // avoid uninitialize value in below reference
+ if (m_param->limitModes)
+ {
+ md.pred[PRED_2Nx2N].bestME[0][0].mvCost = 0; // L0
+ md.pred[PRED_2Nx2N].bestME[0][1].mvCost = 0; // L1
+ md.pred[PRED_2Nx2N].sa8dCost = 0;
+ }
+
+ if (m_param->bCTUInfo && depth <= parentCTU.m_cuDepth[cuGeom.absPartIdx])
+ {
+ if (bDecidedDepth && m_additionalCtuInfo[cuGeom.absPartIdx])
+ sameContentRef = findSameContentRefCount(parentCTU, cuGeom);
+ if (depth < parentCTU.m_cuDepth[cuGeom.absPartIdx])
+ {
+ mightNotSplit &= bDecidedDepth;
+ bCtuInfoCheck = skipRecursion = false;
skipModes = true;
- if (parentCTU.m_partSize[cuGeom.absPartIdx] == SIZE_2Nx2N)
- skipRectAmp = true;
- }
- mightSplit &= false;
- minDepth = depth;
- }
-
- if ((m_limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
- m_maxTUDepth = loadTUDepth(cuGeom, parentCTU);
-
- SplitData splitData[4];
- splitData[0].initSplitCUData();
- splitData[1].initSplitCUData();
- splitData[2].initSplitCUData();
- splitData[3].initSplitCUData();
-
- // avoid uninitialize value in below reference
- if (m_param->limitModes)
- {
- md.pred[PRED_2Nx2N].bestME[0][0].mvCost = 0; // L0
- md.pred[PRED_2Nx2N].bestME[0][1].mvCost = 0; // L1
- md.pred[PRED_2Nx2N].sa8dCost = 0;
- }
-
- if (m_param->bCTUInfo && depth <= parentCTU.m_cuDepth[cuGeom.absPartIdx])
- {
- if (bDecidedDepth && m_additionalCtuInfo[cuGeom.absPartIdx])
- sameContentRef = findSameContentRefCount(parentCTU, cuGeom);
- if (depth < parentCTU.m_cuDepth[cuGeom.absPartIdx])
- {
- mightNotSplit &= bDecidedDepth;
- bCtuInfoCheck = skipRecursion = false;
- skipModes = true;
- }
- else if (mightNotSplit && bDecidedDepth)
- {
- if (m_additionalCtuInfo[cuGeom.absPartIdx])
+ }
+ else if (mightNotSplit && bDecidedDepth)
{
- bCtuInfoCheck = skipRecursion = true;
- md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
- md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
- checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
- if (!sameContentRef)
+ if (m_additionalCtuInfo[cuGeom.absPartIdx])
{
- if ((m_param->bCTUInfo & 2) && (m_slice->m_pps->bUseDQP && depth <= m_slice->m_pps->maxCuDQPDepth))
+ bCtuInfoCheck = skipRecursion = true;
+ md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
+ md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
+ if (!sameContentRef)
{
- qp -= int32_t(0.04 * qp);
- setLambdaFromQP(parentCTU, qp);
+ if ((m_param->bCTUInfo & 2) && (m_slice->m_pps->bUseDQP && depth <= m_slice->m_pps->maxCuDQPDepth))
+ {
+ qp -= int32_t(0.04 * qp);
+ setLambdaFromQP(parentCTU, qp);
+ }
+ if (m_param->bCTUInfo & 4)
+ skipModes = false;
}
- if (m_param->bCTUInfo & 4)
- skipModes = false;
+ if (sameContentRef || (!sameContentRef && !(m_param->bCTUInfo & 4)))
+ {
+ if (m_param->rdLevel)
+ skipModes = m_param->bEnableEarlySkip && md.bestMode && md.bestMode->cu.isSkipped(0);
+ if ((m_param->bCTUInfo & 4) && sameContentRef)
+ skipModes = md.bestMode && true;
+ }
}
- if (sameContentRef || (!sameContentRef && !(m_param->bCTUInfo & 4)))
+ else
{
+ md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
+ md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
if (m_param->rdLevel)
skipModes = m_param->bEnableEarlySkip && md.bestMode && md.bestMode->cu.isSkipped(0);
- if ((m_param->bCTUInfo & 4) && sameContentRef)
- skipModes = md.bestMode && true;
}
+ mightSplit &= !bDecidedDepth;
}
- else
+ }
+ if ((m_param->analysisReuseMode == X265_ANALYSIS_LOAD && m_param->analysisReuseLevel > 1 && m_param->analysisReuseLevel != 10))
+ {
+ if (mightNotSplit && depth == m_reuseDepth[cuGeom.absPartIdx])
{
- md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
- md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
- checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
- if (m_param->rdLevel)
- skipModes = m_param->bEnableEarlySkip && md.bestMode && md.bestMode->cu.isSkipped(0);
- }
- mightSplit &= !bDecidedDepth;
- }
- }
- if ((m_param->analysisReuseMode == X265_ANALYSIS_LOAD && m_param->analysisReuseLevel > 1 && m_param->analysisReuseLevel != 10))
- {
- if (mightNotSplit && depth == m_reuseDepth[cuGeom.absPartIdx])
- {
- if (m_reuseModes[cuGeom.absPartIdx] == MODE_SKIP)
- {
- md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
- md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
- checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
-
- skipRecursion = !!m_param->bEnableRecursionSkip && md.bestMode;
- if (m_param->rdLevel)
- skipModes = m_param->bEnableEarlySkip && md.bestMode;
- }
- if (m_param->analysisReuseLevel > 4 && m_reusePartSize[cuGeom.absPartIdx] == SIZE_2Nx2N)
- {
- if (m_reuseModes[cuGeom.absPartIdx] != MODE_INTRA && m_reuseModes[cuGeom.absPartIdx] != 4)
+ if (m_reuseModes[cuGeom.absPartIdx] == MODE_SKIP)
{
- skipRectAmp = true && !!md.bestMode;
- chooseMerge = !!m_reuseMergeFlag[cuGeom.absPartIdx] && !!md.bestMode;
+ md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
+ md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
+
+ skipRecursion = !!m_param->bEnableRecursionSkip && md.bestMode;
+ if (m_param->rdLevel)
+ skipModes = m_param->bEnableEarlySkip && md.bestMode;
+ }
+ if (m_param->analysisReuseLevel > 4 && m_reusePartSize[cuGeom.absPartIdx] == SIZE_2Nx2N)
+ {
+ if (m_reuseModes[cuGeom.absPartIdx] != MODE_INTRA && m_reuseModes[cuGeom.absPartIdx] != 4)
+ {
+ skipRectAmp = true && !!md.bestMode;
+ chooseMerge = !!m_reuseMergeFlag[cuGeom.absPartIdx] && !!md.bestMode;
+ }
}
}
}
- }
- if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && m_multipassAnalysis)
- {
- if (mightNotSplit && depth == m_multipassDepth[cuGeom.absPartIdx])
+ if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && m_multipassAnalysis)
{
- if (m_multipassModes[cuGeom.absPartIdx] == MODE_SKIP)
+ if (mightNotSplit && depth == m_multipassDepth[cuGeom.absPartIdx])
{
- md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
- md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
- checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
-
- skipRecursion = !!m_param->bEnableRecursionSkip && md.bestMode;
- if (m_param->rdLevel)
- skipModes = m_param->bEnableEarlySkip && md.bestMode;
+ if (m_multipassModes[cuGeom.absPartIdx] == MODE_SKIP)
+ {
+ md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
+ md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
+
+ skipRecursion = !!m_param->bEnableRecursionSkip && md.bestMode;
+ if (m_param->rdLevel)
+ skipModes = m_param->bEnableEarlySkip && md.bestMode;
+ }
+ }
+ }
+
+ /* Step 1. Evaluate Merge/Skip candidates for likely early-outs, if skip mode was not set above */
+ if (mightNotSplit && depth >= minDepth && !md.bestMode && !bCtuInfoCheck) /* TODO: Re-evaluate if analysis load/save still works */
+ {
+ /* Compute Merge Cost */
+ md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
+ md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
+ if (m_param->rdLevel)
+ skipModes = (m_param->bEnableEarlySkip || m_param->interRefine == 2)
+ && md.bestMode && md.bestMode->cu.isSkipped(0); // TODO: sa8d threshold per depth
+ }
+
+ if (md.bestMode && m_param->bEnableRecursionSkip && !bCtuInfoCheck)
+ {
+ skipRecursion = md.bestMode->cu.isSkipped(0);
+ if (mightSplit && depth >= minDepth && !skipRecursion)
+ {
+ if (depth)
+ skipRecursion = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode);
+ if (m_bHD && !skipRecursion && m_param->rdLevel == 2 && md.fencYuv.m_size != MAX_CU_SIZE)
+ skipRecursion = complexityCheckCU(*md.bestMode);
+ }
+ }
+
+ /* Step 2. Evaluate each of the 4 split sub-blocks in series */
+ if (mightSplit && !skipRecursion)
+ {
+ if (bCtuInfoCheck && m_param->bCTUInfo & 2)
+ qp = int((1 / 0.96) * qp + 0.5);
+ Mode* splitPred = &md.pred[PRED_SPLIT];
+ splitPred->initCosts();
+ CUData* splitCU = &splitPred->cu;
+ splitCU->initSubCU(parentCTU, cuGeom, qp);
+
+ uint32_t nextDepth = depth + 1;
+ ModeDepth& nd = m_modeDepth[nextDepth];
+ invalidateContexts(nextDepth);
+ Entropy* nextContext = &m_rqt[depth].cur;
+ int nextQP = qp;
+ splitIntra = false;
+
+ for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
+ {
+ const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
+ if (childGeom.flags & CUGeom::PRESENT)
+ {
+ m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.absPartIdx);
+ m_rqt[nextDepth].cur.load(*nextContext);
+
+ if (m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth)
+ nextQP = setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, childGeom));
+
+ splitData[subPartIdx] = compressInterCU_rd0_4(parentCTU, childGeom, nextQP);
+
+ // Save best CU and pred data for this sub CU
+ splitIntra |= nd.bestMode->cu.isIntra(0);
+ splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
+ splitPred->addSubCosts(*nd.bestMode);
+
+ if (m_param->rdLevel)
+ nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv, childGeom.numPartitions * subPartIdx);
+ else
+ nd.bestMode->predYuv.copyToPartYuv(splitPred->predYuv, childGeom.numPartitions * subPartIdx);
+ if (m_param->rdLevel > 1)
+ nextContext = &nd.bestMode->contexts;
+ }
+ else
+ splitCU->setEmptyPart(childGeom, subPartIdx);
+ }
+ nextContext->store(splitPred->contexts);
+
+ if (mightNotSplit)
+ addSplitFlagCost(*splitPred, cuGeom.depth);
+ else if (m_param->rdLevel > 1)
+ updateModeCost(*splitPred);
+ else
+ splitPred->sa8dCost = m_rdCost.calcRdSADCost((uint32_t)splitPred->distortion, splitPred->sa8dBits);
+ }
+
+ /* Split CUs
+ * 0 1
+ * 2 3 */
+ uint32_t allSplitRefs = splitData[0].splitRefs | splitData[1].splitRefs | splitData[2].splitRefs | splitData[3].splitRefs;
+ /* Step 3. Evaluate ME (2Nx2N, rect, amp) and intra modes at current depth */
+ if (mightNotSplit && (depth >= minDepth || (m_param->bCTUInfo && !md.bestMode)))
+ {
+ if (m_slice->m_pps->bUseDQP && depth <= m_slice->m_pps->maxCuDQPDepth && m_slice->m_pps->maxCuDQPDepth != 0)
+ setLambdaFromQP(parentCTU, qp);
+
+ if (!skipModes)
+ {
+ uint32_t refMasks[2];
+ refMasks[0] = allSplitRefs;
+ md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkInter_rd0_4(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks);
+
+ if (m_param->limitReferences & X265_REF_LIMIT_CU)
+ {
+ CUData& cu = md.pred[PRED_2Nx2N].cu;
+ uint32_t refMask = cu.getBestRefIdx(0);
+ allSplitRefs = splitData[0].splitRefs = splitData[1].splitRefs = splitData[2].splitRefs = splitData[3].splitRefs = refMask;
+ }
+
+ if (m_slice->m_sliceType == B_SLICE)
+ {
+ md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], cuGeom);
+ }
+
+ Mode *bestInter = &md.pred[PRED_2Nx2N];
+ if (!skipRectAmp)
+ {
+ if (m_param->bEnableRectInter)
+ {
+ uint64_t splitCost = splitData[0].sa8dCost + splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
+ uint32_t threshold_2NxN, threshold_Nx2N;
+
+ if (m_slice->m_sliceType == P_SLICE)
+ {
+ threshold_2NxN = splitData[0].mvCost[0] + splitData[1].mvCost[0];
+ threshold_Nx2N = splitData[0].mvCost[0] + splitData[2].mvCost[0];
+ }
+ else
+ {
+ threshold_2NxN = (splitData[0].mvCost[0] + splitData[1].mvCost[0]
+ + splitData[0].mvCost[1] + splitData[1].mvCost[1] + 1) >> 1;
+ threshold_Nx2N = (splitData[0].mvCost[0] + splitData[2].mvCost[0]
+ + splitData[0].mvCost[1] + splitData[2].mvCost[1] + 1) >> 1;
+ }
+
+ int try_2NxN_first = threshold_2NxN < threshold_Nx2N;
+ if (try_2NxN_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxN)
+ {
+ refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* top */
+ refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* bot */
+ md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks);
+ if (md.pred[PRED_2NxN].sa8dCost < bestInter->sa8dCost)
+ bestInter = &md.pred[PRED_2NxN];
+ }
+
+ if (splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_Nx2N)
+ {
+ refMasks[0] = splitData[0].splitRefs | splitData[2].splitRefs; /* left */
+ refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* right */
+ md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkInter_rd0_4(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N, refMasks);
+ if (md.pred[PRED_Nx2N].sa8dCost < bestInter->sa8dCost)
+ bestInter = &md.pred[PRED_Nx2N];
+ }
+
+ if (!try_2NxN_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxN)
+ {
+ refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* top */
+ refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* bot */
+ md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks);
+ if (md.pred[PRED_2NxN].sa8dCost < bestInter->sa8dCost)
+ bestInter = &md.pred[PRED_2NxN];
+ }
+ }
+
+ if (m_slice->m_sps->maxAMPDepth > depth)
+ {
+ uint64_t splitCost = splitData[0].sa8dCost + splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
+ uint32_t threshold_2NxnU, threshold_2NxnD, threshold_nLx2N, threshold_nRx2N;
+
+ if (m_slice->m_sliceType == P_SLICE)
+ {
+ threshold_2NxnU = splitData[0].mvCost[0] + splitData[1].mvCost[0];
+ threshold_2NxnD = splitData[2].mvCost[0] + splitData[3].mvCost[0];
+
+ threshold_nLx2N = splitData[0].mvCost[0] + splitData[2].mvCost[0];
+ threshold_nRx2N = splitData[1].mvCost[0] + splitData[3].mvCost[0];
+ }
+ else
+ {
+ threshold_2NxnU = (splitData[0].mvCost[0] + splitData[1].mvCost[0]
+ + splitData[0].mvCost[1] + splitData[1].mvCost[1] + 1) >> 1;
+ threshold_2NxnD = (splitData[2].mvCost[0] + splitData[3].mvCost[0]
+ + splitData[2].mvCost[1] + splitData[3].mvCost[1] + 1) >> 1;
+
+ threshold_nLx2N = (splitData[0].mvCost[0] + splitData[2].mvCost[0]
+ + splitData[0].mvCost[1] + splitData[2].mvCost[1] + 1) >> 1;
+ threshold_nRx2N = (splitData[1].mvCost[0] + splitData[3].mvCost[0]
+ + splitData[1].mvCost[1] + splitData[3].mvCost[1] + 1) >> 1;
+ }
+
+ bool bHor = false, bVer = false;
+ if (bestInter->cu.m_partSize[0] == SIZE_2NxN)
+ bHor = true;
+ else if (bestInter->cu.m_partSize[0] == SIZE_Nx2N)
+ bVer = true;
+ else if (bestInter->cu.m_partSize[0] == SIZE_2Nx2N &&
+ md.bestMode && md.bestMode->cu.getQtRootCbf(0))
+ {
+ bHor = true;
+ bVer = true;
+ }
+
+ if (bHor)
+ {
+ int try_2NxnD_first = threshold_2NxnD < threshold_2NxnU;
+ if (try_2NxnD_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxnD)
+ {
+ refMasks[0] = allSplitRefs; /* 75% top */
+ refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* 25% bot */
+ md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkInter_rd0_4(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks);
+ if (md.pred[PRED_2NxnD].sa8dCost < bestInter->sa8dCost)
+ bestInter = &md.pred[PRED_2NxnD];
+ }
+
+ if (splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxnU)
+ {
+ refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* 25% top */
+ refMasks[1] = allSplitRefs; /* 75% bot */
+ md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkInter_rd0_4(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU, refMasks);
+ if (md.pred[PRED_2NxnU].sa8dCost < bestInter->sa8dCost)
+ bestInter = &md.pred[PRED_2NxnU];
+ }
+
+ if (!try_2NxnD_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxnD)
+ {
+ refMasks[0] = allSplitRefs; /* 75% top */
+ refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* 25% bot */
+ md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkInter_rd0_4(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks);
+ if (md.pred[PRED_2NxnD].sa8dCost < bestInter->sa8dCost)
+ bestInter = &md.pred[PRED_2NxnD];
+ }
+ }
+ if (bVer)
+ {
+ int try_nRx2N_first = threshold_nRx2N < threshold_nLx2N;
+ if (try_nRx2N_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_nRx2N)
+ {
+ refMasks[0] = allSplitRefs; /* 75% left */
+ refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* 25% right */
+ md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkInter_rd0_4(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks);
+ if (md.pred[PRED_nRx2N].sa8dCost < bestInter->sa8dCost)
+ bestInter = &md.pred[PRED_nRx2N];
+ }
+
+ if (splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_nLx2N)
+ {
+ refMasks[0] = splitData[0].splitRefs | splitData[2].splitRefs; /* 25% left */
+ refMasks[1] = allSplitRefs; /* 75% right */
+ md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkInter_rd0_4(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N, refMasks);
+ if (md.pred[PRED_nLx2N].sa8dCost < bestInter->sa8dCost)
+ bestInter = &md.pred[PRED_nLx2N];
+ }
+
+ if (!try_nRx2N_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_nRx2N)
+ {
+ refMasks[0] = allSplitRefs; /* 75% left */
+ refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* 25% right */
+ md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkInter_rd0_4(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks);
+ if (md.pred[PRED_nRx2N].sa8dCost < bestInter->sa8dCost)
+ bestInter = &md.pred[PRED_nRx2N];
+ }
+ }
+ }
+ }
+ bool bTryIntra = (m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames) && cuGeom.log2CUSize != MAX_LOG2_CU_SIZE && !((m_param->bCTUInfo & 4) && bCtuInfoCheck);
+ if (m_param->rdLevel >= 3)
+ {
+ /* Calculate RD cost of best inter option */
+ if ((!m_bChromaSa8d && (m_csp != X265_CSP_I400)) || (m_frame->m_fencPic->m_picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400)) /* When m_bChromaSa8d is enabled, chroma MC has already been done */
+ {
+ uint32_t numPU = bestInter->cu.getNumPartInter(0);
+ for (uint32_t puIdx = 0; puIdx < numPU; puIdx++)
+ {
+ PredictionUnit pu(bestInter->cu, cuGeom, puIdx);
+ motionCompensation(bestInter->cu, pu, bestInter->predYuv, false, true);
+ }
+ }
+
+ if (!chooseMerge)
+ {
+ encodeResAndCalcRdInterCU(*bestInter, cuGeom);
+ checkBestMode(*bestInter, depth);
+
+ /* If BIDIR is available and within 17/16 of best inter option, choose by RDO */
+ if (m_slice->m_sliceType == B_SLICE && md.pred[PRED_BIDIR].sa8dCost != MAX_INT64 &&
+ md.pred[PRED_BIDIR].sa8dCost * 16 <= bestInter->sa8dCost * 17)
+ {
+ uint32_t numPU = md.pred[PRED_BIDIR].cu.getNumPartInter(0);
+ if (m_frame->m_fencPic->m_picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400)
+ for (uint32_t puIdx = 0; puIdx < numPU; puIdx++)
+ {
+ PredictionUnit pu(md.pred[PRED_BIDIR].cu, cuGeom, puIdx);
+ motionCompensation(md.pred[PRED_BIDIR].cu, pu, md.pred[PRED_BIDIR].predYuv, true, true);
+ }
+ encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR], cuGeom);
+ checkBestMode(md.pred[PRED_BIDIR], depth);
+ }
+ }
+
+ if ((bTryIntra && md.bestMode->cu.getQtRootCbf(0)) ||
+ md.bestMode->sa8dCost == MAX_INT64)
+ {
+ if (!m_param->limitReferences || splitIntra)
+ {
+ ProfileCounter(parentCTU, totalIntraCU[cuGeom.depth]);
+ md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkIntraInInter(md.pred[PRED_INTRA], cuGeom);
+ encodeIntraInInter(md.pred[PRED_INTRA], cuGeom);
+ checkBestMode(md.pred[PRED_INTRA], depth);
+ }
+ else
+ {
+ ProfileCounter(parentCTU, skippedIntraCU[cuGeom.depth]);
+ }
+ }
+ }
+ else
+ {
+ /* SA8D choice between merge/skip, inter, bidir, and intra */
+ if (!md.bestMode || bestInter->sa8dCost < md.bestMode->sa8dCost)
+ md.bestMode = bestInter;
+
+ if (m_slice->m_sliceType == B_SLICE &&
+ md.pred[PRED_BIDIR].sa8dCost < md.bestMode->sa8dCost)
+ md.bestMode = &md.pred[PRED_BIDIR];
+
+ if (bTryIntra || md.bestMode->sa8dCost == MAX_INT64)
+ {
+ if (!m_param->limitReferences || splitIntra)
+ {
+ ProfileCounter(parentCTU, totalIntraCU[cuGeom.depth]);
+ md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkIntraInInter(md.pred[PRED_INTRA], cuGeom);
+ if (md.pred[PRED_INTRA].sa8dCost < md.bestMode->sa8dCost)
+ md.bestMode = &md.pred[PRED_INTRA];
+ }
+ else
+ {
+ ProfileCounter(parentCTU, skippedIntraCU[cuGeom.depth]);
+ }
+ }
+
+ /* finally code the best mode selected by SA8D costs:
+ * RD level 2 - fully encode the best mode
+ * RD level 1 - generate recon pixels
+ * RD level 0 - generate chroma prediction */
+ if (md.bestMode->cu.m_mergeFlag[0] && md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N)
+ {
+ /* prediction already generated for this CU, and if rd level
+ * is not 0, it is already fully encoded */
+ }
+ else if (md.bestMode->cu.isInter(0))
+ {
+ uint32_t numPU = md.bestMode->cu.getNumPartInter(0);
+ if (m_csp != X265_CSP_I400)
+ {
+ for (uint32_t puIdx = 0; puIdx < numPU; puIdx++)
+ {
+ PredictionUnit pu(md.bestMode->cu, cuGeom, puIdx);
+ motionCompensation(md.bestMode->cu, pu, md.bestMode->predYuv, false, true);
+ }
+ }
+ if (m_param->rdLevel == 2)
+ encodeResAndCalcRdInterCU(*md.bestMode, cuGeom);
+ else if (m_param->rdLevel == 1)
+ {
+ /* generate recon pixels with no rate distortion considerations */
+ CUData& cu = md.bestMode->cu;
+
+ uint32_t tuDepthRange[2];
+ cu.getInterTUQtDepthRange(tuDepthRange, 0);
+ m_rqt[cuGeom.depth].tmpResiYuv.subtract(*md.bestMode->fencYuv, md.bestMode->predYuv, cuGeom.log2CUSize, m_frame->m_fencPic->m_picCsp);
+ residualTransformQuantInter(*md.bestMode, cuGeom, 0, 0, tuDepthRange);
+ if (cu.getQtRootCbf(0))
+ md.bestMode->reconYuv.addClip(md.bestMode->predYuv, m_rqt[cuGeom.depth].tmpResiYuv, cu.m_log2CUSize[0], m_frame->m_fencPic->m_picCsp);
+ else
+ {
+ md.bestMode->reconYuv.copyFromYuv(md.bestMode->predYuv);
+ if (cu.m_mergeFlag[0] && cu.m_partSize[0] == SIZE_2Nx2N)
+ cu.setPredModeSubParts(MODE_SKIP);
+ }
+ }
+ }
+ else
+ {
+ if (m_param->rdLevel == 2)
+ encodeIntraInInter(*md.bestMode, cuGeom);
+ else if (m_param->rdLevel == 1)
+ {
+ /* generate recon pixels with no rate distortion considerations */
+ CUData& cu = md.bestMode->cu;
+
+ uint32_t tuDepthRange[2];
+ cu.getIntraTUQtDepthRange(tuDepthRange, 0);
+
+ residualTransformQuantIntra(*md.bestMode, cuGeom, 0, 0, tuDepthRange);
+ if (m_csp != X265_CSP_I400)
+ {
+ getBestIntraModeChroma(*md.bestMode, cuGeom);
+ residualQTIntraChroma(*md.bestMode, cuGeom, 0, 0);
+ }
+ md.bestMode->reconYuv.copyFromPicYuv(reconPic, cu.m_cuAddr, cuGeom.absPartIdx); // TODO:
+ }
+ }
+ }
+ } // !earlyskip
+
+ if (m_bTryLossless)
+ tryLossless(cuGeom);
+
+ if (mightSplit)
+ addSplitFlagCost(*md.bestMode, cuGeom.depth);
+ }
+
+ if (mightSplit && !skipRecursion)
+ {
+ Mode* splitPred = &md.pred[PRED_SPLIT];
+ if (!md.bestMode)
+ md.bestMode = splitPred;
+ else if (m_param->rdLevel > 1)
+ checkBestMode(*splitPred, cuGeom.depth);
+ else if (splitPred->sa8dCost < md.bestMode->sa8dCost)
+ md.bestMode = splitPred;
+
+ checkDQPForSplitPred(*md.bestMode, cuGeom);
+ }
+
+ /* determine which motion references the parent CU should search */
+ splitCUData.initSplitCUData();
+
+ if (m_param->limitReferences & X265_REF_LIMIT_DEPTH)
+ {
+ if (md.bestMode == &md.pred[PRED_SPLIT])
+ splitCUData.splitRefs = allSplitRefs;
+ else
+ {
+ /* use best merge/inter mode, in case of intra use 2Nx2N inter references */
+ CUData& cu = md.bestMode->cu.isIntra(0) ? md.pred[PRED_2Nx2N].cu : md.bestMode->cu;
+ uint32_t numPU = cu.getNumPartInter(0);
+ for (uint32_t puIdx = 0, subPartIdx = 0; puIdx < numPU; puIdx++, subPartIdx += cu.getPUOffset(puIdx, 0))
+ splitCUData.splitRefs |= cu.getBestRefIdx(subPartIdx);
+ }
+ }
+
+ if (m_param->limitModes)
+ {
+ splitCUData.mvCost[0] = md.pred[PRED_2Nx2N].bestME[0][0].mvCost; // L0
+ splitCUData.mvCost[1] = md.pred[PRED_2Nx2N].bestME[0][1].mvCost; // L1
+ splitCUData.sa8dCost = md.pred[PRED_2Nx2N].sa8dCost;
+ }
+
+ if (mightNotSplit && md.bestMode->cu.isSkipped(0))
+ {
+ FrameData& curEncData = *m_frame->m_encData;
+ FrameData::RCStatCU& cuStat = curEncData.m_cuStat[parentCTU.m_cuAddr];
+ uint64_t temp = cuStat.avgCost[depth] * cuStat.count[depth];
+ cuStat.count[depth] += 1;
+ cuStat.avgCost[depth] = (temp + md.bestMode->rdCost) / cuStat.count[depth];
+ }
+
+ /* Copy best data to encData CTU and recon */
+ md.bestMode->cu.copyToPic(depth);
+ if (m_param->rdLevel)
+ md.bestMode->reconYuv.copyToPicYuv(reconPic, cuAddr, cuGeom.absPartIdx);
+
+ if ((m_limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
+ {
+ if (mightNotSplit)
+ {
+ CUData* ctu = md.bestMode->cu.m_encData->getPicCTU(parentCTU.m_cuAddr);
+ int8_t maxTUDepth = -1;
+ for (uint32_t i = 0; i < cuGeom.numPartitions; i++)
+ maxTUDepth = X265_MAX(maxTUDepth, md.bestMode->cu.m_tuDepth[i]);
+ ctu->m_refTuDepth[cuGeom.geomRecurId] = maxTUDepth;
}
}
}
-
- /* Step 1. Evaluate Merge/Skip candidates for likely early-outs, if skip mode was not set above */
- if (mightNotSplit && depth >= minDepth && !md.bestMode && !bCtuInfoCheck) /* TODO: Re-evaluate if analysis load/save still works */
+ else
{
- /* Compute Merge Cost */
- md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
- md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
- checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
- if (m_param->rdLevel)
- skipModes = (m_param->bEnableEarlySkip || m_param->interRefine == 2)
- && md.bestMode && md.bestMode->cu.isSkipped(0); // TODO: sa8d threshold per depth
- }
-
- if (md.bestMode && m_param->bEnableRecursionSkip && !bCtuInfoCheck)
- {
- skipRecursion = md.bestMode->cu.isSkipped(0);
- if (mightSplit && depth >= minDepth && !skipRecursion)
+ if (m_param->bMVType && cuGeom.numPartitions <= 16)
{
- if (depth)
- skipRecursion = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode);
- if (m_bHD && !skipRecursion && m_param->rdLevel == 2 && md.fencYuv.m_size != MAX_CU_SIZE)
- skipRecursion = complexityCheckCU(*md.bestMode);
+ qprdRefine(parentCTU, cuGeom, qp, qp);
+
+ SplitData splitData[4];
+ splitData[0].initSplitCUData();
+ splitData[1].initSplitCUData();
+ splitData[2].initSplitCUData();
+ splitData[3].initSplitCUData();
+
+ uint32_t allSplitRefs = splitData[0].splitRefs | splitData[1].splitRefs | splitData[2].splitRefs | splitData[3].splitRefs;
+
+ splitCUData.initSplitCUData();
+
+ if (m_param->limitReferences & X265_REF_LIMIT_DEPTH)
+ {
+ if (md.bestMode == &md.pred[PRED_SPLIT])
+ splitCUData.splitRefs = allSplitRefs;
+ else
+ {
+ /* use best merge/inter mode, in case of intra use 2Nx2N inter references */
+ CUData& cu = md.bestMode->cu.isIntra(0) ? md.pred[PRED_2Nx2N].cu : md.bestMode->cu;
+ uint32_t numPU = cu.getNumPartInter(0);
+ for (uint32_t puIdx = 0, subPartIdx = 0; puIdx < numPU; puIdx++, subPartIdx += cu.getPUOffset(puIdx, 0))
+ splitCUData.splitRefs |= cu.getBestRefIdx(subPartIdx);
+ }
+ }
+
+ if (m_param->limitModes)
+ {
+ splitCUData.mvCost[0] = md.pred[PRED_2Nx2N].bestME[0][0].mvCost; // L0
+ splitCUData.mvCost[1] = md.pred[PRED_2Nx2N].bestME[0][1].mvCost; // L1
+ splitCUData.sa8dCost = md.pred[PRED_2Nx2N].sa8dCost;
+ }
}
}
- /* Step 2. Evaluate each of the 4 split sub-blocks in series */
- if (mightSplit && !skipRecursion)
+ return splitCUData;
+}
+
+SplitData Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp)
+{
+ if (parentCTU.m_vbvAffected && !calculateQpforCuSize(parentCTU, cuGeom, 1))
+ return compressInterCU_rd0_4(parentCTU, cuGeom, qp);
+
+ uint32_t depth = cuGeom.depth;
+ ModeDepth& md = m_modeDepth[depth];
+ md.bestMode = NULL;
+
+ if (m_param->searchMethod == X265_SEA)
{
- if (bCtuInfoCheck && m_param->bCTUInfo & 2)
- qp = int((1 / 0.96) * qp + 0.5);
- Mode* splitPred = &md.pred[PRED_SPLIT];
- splitPred->initCosts();
- CUData* splitCU = &splitPred->cu;
- splitCU->initSubCU(parentCTU, cuGeom, qp);
-
- uint32_t nextDepth = depth + 1;
- ModeDepth& nd = m_modeDepth[nextDepth];
- invalidateContexts(nextDepth);
- Entropy* nextContext = &m_rqt[depth].cur;
- int nextQP = qp;
- splitIntra = false;
-
- for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
+ int numPredDir = m_slice->isInterP() ? 1 : 2;
+ int offset = (int)(m_frame->m_reconPic->m_cuOffsetY[parentCTU.m_cuAddr] + m_frame->m_reconPic->m_buOffsetY[cuGeom.absPartIdx]);
+ for (int list = 0; list < numPredDir; list++)
+ for (int i = 0; i < m_frame->m_encData->m_slice->m_numRefIdx[list]; i++)
+ for (int planes = 0; planes < INTEGRAL_PLANE_NUM; planes++)
+ m_modeDepth[depth].fencYuv.m_integral[list][i][planes] = m_frame->m_encData->m_slice->m_refFrameList[list][i]->m_encData->m_meIntegral[planes] + offset;
+ }
+
+ SplitData splitCUData;
+ if ((m_param->bMVType && cuGeom.numPartitions > 16) || !m_param->bMVType)
+ {
+ bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
+ bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
+ bool bDecidedDepth = parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth;
+ bool skipRecursion = false;
+ bool skipModes = false;
+ bool splitIntra = true;
+ bool skipRectAmp = false;
+ bool bCtuInfoCheck = false;
+ int sameContentRef = 0;
+
+ if (m_evaluateInter)
{
- const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
- if (childGeom.flags & CUGeom::PRESENT)
+ if (m_param->interRefine == 2)
{
- m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.absPartIdx);
- m_rqt[nextDepth].cur.load(*nextContext);
-
- if (m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth)
- nextQP = setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, childGeom));
-
- splitData[subPartIdx] = compressInterCU_rd0_4(parentCTU, childGeom, nextQP);
-
- // Save best CU and pred data for this sub CU
- splitIntra |= nd.bestMode->cu.isIntra(0);
- splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
- splitPred->addSubCosts(*nd.bestMode);
-
- if (m_param->rdLevel)
- nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv, childGeom.numPartitions * subPartIdx);
+ if (parentCTU.m_predMode[cuGeom.absPartIdx] == MODE_SKIP)
+ skipModes = true;
+ if (parentCTU.m_partSize[cuGeom.absPartIdx] == SIZE_2Nx2N)
+ skipRectAmp = true;
+ }
+ mightSplit &= false;
+ }
+
+ // avoid uninitialize value in below reference
+ if (m_param->limitModes)
+ {
+ md.pred[PRED_2Nx2N].bestME[0][0].mvCost = 0; // L0
+ md.pred[PRED_2Nx2N].bestME[0][1].mvCost = 0; // L1
+ md.pred[PRED_2Nx2N].rdCost = 0;
+ }
+
+ if ((m_limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
+ m_maxTUDepth = loadTUDepth(cuGeom, parentCTU);
+
+ SplitData splitData[4];
+ splitData[0].initSplitCUData();
+ splitData[1].initSplitCUData();
+ splitData[2].initSplitCUData();
+ splitData[3].initSplitCUData();
+ uint32_t allSplitRefs = splitData[0].splitRefs | splitData[1].splitRefs | splitData[2].splitRefs | splitData[3].splitRefs;
+ uint32_t refMasks[2];
+ if (m_param->bCTUInfo && depth <= parentCTU.m_cuDepth[cuGeom.absPartIdx])
+ {
+ if (bDecidedDepth && m_additionalCtuInfo[cuGeom.absPartIdx])
+ sameContentRef = findSameContentRefCount(parentCTU, cuGeom);
+ if (depth < parentCTU.m_cuDepth[cuGeom.absPartIdx])
+ {
+ mightNotSplit &= bDecidedDepth;
+ bCtuInfoCheck = skipRecursion = false;
+ skipModes = true;
+ }
+ else if (mightNotSplit && bDecidedDepth)
+ {
+ if (m_additionalCtuInfo[cuGeom.absPartIdx])
+ {
+ bCtuInfoCheck = skipRecursion = true;
+ refMasks[0] = allSplitRefs;
+ md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks);
+ checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
+ if (!sameContentRef)
+ {
+ if ((m_param->bCTUInfo & 2) && (m_slice->m_pps->bUseDQP && depth <= m_slice->m_pps->maxCuDQPDepth))
+ {
+ qp -= int32_t(0.04 * qp);
+ setLambdaFromQP(parentCTU, qp);
+ }
+ if (m_param->bCTUInfo & 4)
+ skipModes = false;
+ }
+ if (sameContentRef || (!sameContentRef && !(m_param->bCTUInfo & 4)))
+ {
+ if (m_param->rdLevel)
+ skipModes = m_param->bEnableEarlySkip && md.bestMode && md.bestMode->cu.isSkipped(0);
+ if ((m_param->bCTUInfo & 4) && sameContentRef)
+ skipModes = md.bestMode && true;
+ }
+ }
else
- nd.bestMode->predYuv.copyToPartYuv(splitPred->predYuv, childGeom.numPartitions * subPartIdx);
- if (m_param->rdLevel > 1)
- nextContext = &nd.bestMode->contexts;
+ {
+ md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
+ md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
+ skipModes = !!m_param->bEnableEarlySkip && md.bestMode;
+ refMasks[0] = allSplitRefs;
+ md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks);
+ checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
+ }
+ mightSplit &= !bDecidedDepth;
}
- else
- splitCU->setEmptyPart(childGeom, subPartIdx);
}
- nextContext->store(splitPred->contexts);
-
- if (mightNotSplit)
- addSplitFlagCost(*splitPred, cuGeom.depth);
- else if (m_param->rdLevel > 1)
- updateModeCost(*splitPred);
- else
- splitPred->sa8dCost = m_rdCost.calcRdSADCost((uint32_t)splitPred->distortion, splitPred->sa8dBits);
- }
-
- /* Split CUs
- * 0 1
- * 2 3 */
- uint32_t allSplitRefs = splitData[0].splitRefs | splitData[1].splitRefs | splitData[2].splitRefs | splitData[3].splitRefs;
- /* Step 3. Evaluate ME (2Nx2N, rect, amp) and intra modes at current depth */
- if (mightNotSplit && (depth >= minDepth || (m_param->bCTUInfo && !md.bestMode)))
- {
- if (m_slice->m_pps->bUseDQP && depth <= m_slice->m_pps->maxCuDQPDepth && m_slice->m_pps->maxCuDQPDepth != 0)
- setLambdaFromQP(parentCTU, qp);
-
- if (!skipModes)
+ if (m_param->analysisReuseMode == X265_ANALYSIS_LOAD && m_param->analysisReuseLevel > 1 && m_param->analysisReuseLevel != 10)
{
- uint32_t refMasks[2];
+ if (mightNotSplit && depth == m_reuseDepth[cuGeom.absPartIdx])
+ {
+ if (m_reuseModes[cuGeom.absPartIdx] == MODE_SKIP)
+ {
+ md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
+ md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
+ skipModes = !!m_param->bEnableEarlySkip && md.bestMode;
+ refMasks[0] = allSplitRefs;
+ md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks);
+ checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
+
+ if (m_param->bEnableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode)
+ skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0);
+ }
+ if (m_param->analysisReuseLevel > 4 && m_reusePartSize[cuGeom.absPartIdx] == SIZE_2Nx2N)
+ skipRectAmp = true && !!md.bestMode;
+ }
+ }
+
+ if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && m_multipassAnalysis)
+ {
+ if (mightNotSplit && depth == m_multipassDepth[cuGeom.absPartIdx])
+ {
+ if (m_multipassModes[cuGeom.absPartIdx] == MODE_SKIP)
+ {
+ md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
+ md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
+
+ skipModes = !!m_param->bEnableEarlySkip && md.bestMode;
+ refMasks[0] = allSplitRefs;
+ md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks);
+ checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
+
+ if (m_param->bEnableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode)
+ skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0);
+ }
+ }
+ }
+
+ /* Step 1. Evaluate Merge/Skip candidates for likely early-outs */
+ if (mightNotSplit && !md.bestMode && !bCtuInfoCheck)
+ {
+ md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
+ md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
+ skipModes = (m_param->bEnableEarlySkip || m_param->interRefine == 2) &&
+ md.bestMode && !md.bestMode->cu.getQtRootCbf(0);
refMasks[0] = allSplitRefs;
md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
- checkInter_rd0_4(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks);
-
- if (m_param->limitReferences & X265_REF_LIMIT_CU)
+ checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks);
+ checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
+
+ if (m_param->bEnableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode)
+ skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0);
+ }
+
+ // estimate split cost
+ /* Step 2. Evaluate each of the 4 split sub-blocks in series */
+ if (mightSplit && !skipRecursion)
+ {
+ if (bCtuInfoCheck && m_param->bCTUInfo & 2)
+ qp = int((1 / 0.96) * qp + 0.5);
+ Mode* splitPred = &md.pred[PRED_SPLIT];
+ splitPred->initCosts();
+ CUData* splitCU = &splitPred->cu;
+ splitCU->initSubCU(parentCTU, cuGeom, qp);
+
+ uint32_t nextDepth = depth + 1;
+ ModeDepth& nd = m_modeDepth[nextDepth];
+ invalidateContexts(nextDepth);
+ Entropy* nextContext = &m_rqt[depth].cur;
+ int nextQP = qp;
+ splitIntra = false;
+
+ for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
{
- CUData& cu = md.pred[PRED_2Nx2N].cu;
- uint32_t refMask = cu.getBestRefIdx(0);
- allSplitRefs = splitData[0].splitRefs = splitData[1].splitRefs = splitData[2].splitRefs = splitData[3].splitRefs = refMask;
- }
-
- if (m_slice->m_sliceType == B_SLICE)
- {
- md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom, qp);
- checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], cuGeom);
- }
-
- Mode *bestInter = &md.pred[PRED_2Nx2N];
- if (!skipRectAmp)
- {
- if (m_param->bEnableRectInter)
+ const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
+ if (childGeom.flags & CUGeom::PRESENT)
{
- uint64_t splitCost = splitData[0].sa8dCost + splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
- uint32_t threshold_2NxN, threshold_Nx2N;
-
- if (m_slice->m_sliceType == P_SLICE)
- {
- threshold_2NxN = splitData[0].mvCost[0] + splitData[1].mvCost[0];
- threshold_Nx2N = splitData[0].mvCost[0] + splitData[2].mvCost[0];
- }
- else
- {
- threshold_2NxN = (splitData[0].mvCost[0] + splitData[1].mvCost[0]
- + splitData[0].mvCost[1] + splitData[1].mvCost[1] + 1) >> 1;
- threshold_Nx2N = (splitData[0].mvCost[0] + splitData[2].mvCost[0]
- + splitData[0].mvCost[1] + splitData[2].mvCost[1] + 1) >> 1;
- }
-
- int try_2NxN_first = threshold_2NxN < threshold_Nx2N;
- if (try_2NxN_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxN)
- {
- refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* top */
- refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* bot */
- md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp);
- checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks);
- if (md.pred[PRED_2NxN].sa8dCost < bestInter->sa8dCost)
- bestInter = &md.pred[PRED_2NxN];
- }
-
- if (splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_Nx2N)
- {
- refMasks[0] = splitData[0].splitRefs | splitData[2].splitRefs; /* left */
- refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* right */
- md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
- checkInter_rd0_4(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N, refMasks);
- if (md.pred[PRED_Nx2N].sa8dCost < bestInter->sa8dCost)
- bestInter = &md.pred[PRED_Nx2N];
- }
-
- if (!try_2NxN_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxN)
- {
- refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* top */
- refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* bot */
- md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp);
- checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks);
- if (md.pred[PRED_2NxN].sa8dCost < bestInter->sa8dCost)
- bestInter = &md.pred[PRED_2NxN];
- }
+ m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.absPartIdx);
+ m_rqt[nextDepth].cur.load(*nextContext);
+
+ if (m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth)
+ nextQP = setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, childGeom));
+
+ splitData[subPartIdx] = compressInterCU_rd5_6(parentCTU, childGeom, nextQP);
+
+ // Save best CU and pred data for this sub CU
+ splitIntra |= nd.bestMode->cu.isIntra(0);
+ splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
+ splitPred->addSubCosts(*nd.bestMode);
+ nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv, childGeom.numPartitions * subPartIdx);
+ nextContext = &nd.bestMode->contexts;
}
-
- if (m_slice->m_sps->maxAMPDepth > depth)
+ else
{
- uint64_t splitCost = splitData[0].sa8dCost + splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
- uint32_t threshold_2NxnU, threshold_2NxnD, threshold_nLx2N, threshold_nRx2N;
-
- if (m_slice->m_sliceType == P_SLICE)
- {
- threshold_2NxnU = splitData[0].mvCost[0] + splitData[1].mvCost[0];
- threshold_2NxnD = splitData[2].mvCost[0] + splitData[3].mvCost[0];
-
- threshold_nLx2N = splitData[0].mvCost[0] + splitData[2].mvCost[0];
- threshold_nRx2N = splitData[1].mvCost[0] + splitData[3].mvCost[0];
- }
- else
- {
- threshold_2NxnU = (splitData[0].mvCost[0] + splitData[1].mvCost[0]
- + splitData[0].mvCost[1] + splitData[1].mvCost[1] + 1) >> 1;
- threshold_2NxnD = (splitData[2].mvCost[0] + splitData[3].mvCost[0]
- + splitData[2].mvCost[1] + splitData[3].mvCost[1] + 1) >> 1;
-
- threshold_nLx2N = (splitData[0].mvCost[0] + splitData[2].mvCost[0]
- + splitData[0].mvCost[1] + splitData[2].mvCost[1] + 1) >> 1;
- threshold_nRx2N = (splitData[1].mvCost[0] + splitData[3].mvCost[0]
- + splitData[1].mvCost[1] + splitData[3].mvCost[1] + 1) >> 1;
- }
-
- bool bHor = false, bVer = false;
- if (bestInter->cu.m_partSize[0] == SIZE_2NxN)
- bHor = true;
- else if (bestInter->cu.m_partSize[0] == SIZE_Nx2N)
- bVer = true;
- else if (bestInter->cu.m_partSize[0] == SIZE_2Nx2N &&
- md.bestMode && md.bestMode->cu.getQtRootCbf(0))
- {
- bHor = true;
- bVer = true;
- }
-
- if (bHor)
- {
- int try_2NxnD_first = threshold_2NxnD < threshold_2NxnU;
- if (try_2NxnD_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxnD)
- {
- refMasks[0] = allSplitRefs; /* 75% top */
- refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* 25% bot */
- md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp);
- checkInter_rd0_4(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks);
- if (md.pred[PRED_2NxnD].sa8dCost < bestInter->sa8dCost)
- bestInter = &md.pred[PRED_2NxnD];
- }
-
- if (splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxnU)
- {
- refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* 25% top */
- refMasks[1] = allSplitRefs; /* 75% bot */
- md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom, qp);
- checkInter_rd0_4(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU, refMasks);
- if (md.pred[PRED_2NxnU].sa8dCost < bestInter->sa8dCost)
- bestInter = &md.pred[PRED_2NxnU];
- }
-
- if (!try_2NxnD_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxnD)
- {
- refMasks[0] = allSplitRefs; /* 75% top */
- refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* 25% bot */
- md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp);
- checkInter_rd0_4(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks);
- if (md.pred[PRED_2NxnD].sa8dCost < bestInter->sa8dCost)
- bestInter = &md.pred[PRED_2NxnD];
- }
- }
- if (bVer)
- {
- int try_nRx2N_first = threshold_nRx2N < threshold_nLx2N;
- if (try_nRx2N_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_nRx2N)
- {
- refMasks[0] = allSplitRefs; /* 75% left */
- refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* 25% right */
- md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp);
- checkInter_rd0_4(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks);
- if (md.pred[PRED_nRx2N].sa8dCost < bestInter->sa8dCost)
- bestInter = &md.pred[PRED_nRx2N];
- }
-
- if (splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_nLx2N)
- {
- refMasks[0] = splitData[0].splitRefs | splitData[2].splitRefs; /* 25% left */
- refMasks[1] = allSplitRefs; /* 75% right */
- md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom, qp);
- checkInter_rd0_4(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N, refMasks);
- if (md.pred[PRED_nLx2N].sa8dCost < bestInter->sa8dCost)
- bestInter = &md.pred[PRED_nLx2N];
- }
-
- if (!try_nRx2N_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_nRx2N)
- {
- refMasks[0] = allSplitRefs; /* 75% left */
- refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* 25% right */
- md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp);
- checkInter_rd0_4(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks);
- if (md.pred[PRED_nRx2N].sa8dCost < bestInter->sa8dCost)
- bestInter = &md.pred[PRED_nRx2N];
- }
- }
+ splitCU->setEmptyPart(childGeom, subPartIdx);
}
}
- bool bTryIntra = (m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames) && cuGeom.log2CUSize != MAX_LOG2_CU_SIZE && !((m_param->bCTUInfo & 4) && bCtuInfoCheck);
- if (m_param->rdLevel >= 3)
+ nextContext->store(splitPred->contexts);
+ if (mightNotSplit)
+ addSplitFlagCost(*splitPred, cuGeom.depth);
+ else
+ updateModeCost(*splitPred);
+
+ checkDQPForSplitPred(*splitPred, cuGeom);
+ }
+
+ /* Split CUs
+ * 0 1
+ * 2 3 */
+ allSplitRefs = splitData[0].splitRefs | splitData[1].splitRefs | splitData[2].splitRefs | splitData[3].splitRefs;
+ /* Step 3. Evaluate ME (2Nx2N, rect, amp) and intra modes at current depth */
+ if (mightNotSplit)
+ {
+ if (m_slice->m_pps->bUseDQP && depth <= m_slice->m_pps->maxCuDQPDepth && m_slice->m_pps->maxCuDQPDepth != 0)
+ setLambdaFromQP(parentCTU, qp);
+
+ if (!skipModes)
{
- /* Calculate RD cost of best inter option */
- if ((!m_bChromaSa8d && (m_csp != X265_CSP_I400)) || (m_frame->m_fencPic->m_picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400)) /* When m_bChromaSa8d is enabled, chroma MC has already been done */
+ refMasks[0] = allSplitRefs;
+
+ if (m_param->limitReferences & X265_REF_LIMIT_CU)
{
- uint32_t numPU = bestInter->cu.getNumPartInter(0);
- for (uint32_t puIdx = 0; puIdx < numPU; puIdx++)
- {
- PredictionUnit pu(bestInter->cu, cuGeom, puIdx);
- motionCompensation(bestInter->cu, pu, bestInter->predYuv, false, true);
- }
+ CUData& cu = md.pred[PRED_2Nx2N].cu;
+ uint32_t refMask = cu.getBestRefIdx(0);
+ allSplitRefs = splitData[0].splitRefs = splitData[1].splitRefs = splitData[2].splitRefs = splitData[3].splitRefs = refMask;
}
- if (!chooseMerge)
+ if (m_slice->m_sliceType == B_SLICE)
{
- encodeResAndCalcRdInterCU(*bestInter, cuGeom);
- checkBestMode(*bestInter, depth);
-
- /* If BIDIR is available and within 17/16 of best inter option, choose by RDO */
- if (m_slice->m_sliceType == B_SLICE && md.pred[PRED_BIDIR].sa8dCost != MAX_INT64 &&
- md.pred[PRED_BIDIR].sa8dCost * 16 <= bestInter->sa8dCost * 17)
+ md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], cuGeom);
+ if (md.pred[PRED_BIDIR].sa8dCost < MAX_INT64)
{
uint32_t numPU = md.pred[PRED_BIDIR].cu.getNumPartInter(0);
if (m_frame->m_fencPic->m_picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400)
@@ -1567,20 +2080,176 @@
motionCompensation(md.pred[PRED_BIDIR].cu, pu, md.pred[PRED_BIDIR].predYuv, true, true);
}
encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR], cuGeom);
- checkBestMode(md.pred[PRED_BIDIR], depth);
+ checkBestMode(md.pred[PRED_BIDIR], cuGeom.depth);
}
}
- if ((bTryIntra && md.bestMode->cu.getQtRootCbf(0)) ||
- md.bestMode->sa8dCost == MAX_INT64)
+ if (!skipRectAmp)
+ {
+ if (m_param->bEnableRectInter)
+ {
+ uint64_t splitCost = splitData[0].sa8dCost + splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
+ uint32_t threshold_2NxN, threshold_Nx2N;
+
+ if (m_slice->m_sliceType == P_SLICE)
+ {
+ threshold_2NxN = splitData[0].mvCost[0] + splitData[1].mvCost[0];
+ threshold_Nx2N = splitData[0].mvCost[0] + splitData[2].mvCost[0];
+ }
+ else
+ {
+ threshold_2NxN = (splitData[0].mvCost[0] + splitData[1].mvCost[0]
+ + splitData[0].mvCost[1] + splitData[1].mvCost[1] + 1) >> 1;
+ threshold_Nx2N = (splitData[0].mvCost[0] + splitData[2].mvCost[0]
+ + splitData[0].mvCost[1] + splitData[2].mvCost[1] + 1) >> 1;
+ }
+
+ int try_2NxN_first = threshold_2NxN < threshold_Nx2N;
+ if (try_2NxN_first && splitCost < md.bestMode->rdCost + threshold_2NxN)
+ {
+ refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* top */
+ refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* bot */
+ md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks);
+ checkBestMode(md.pred[PRED_2NxN], cuGeom.depth);
+ }
+
+ if (splitCost < md.bestMode->rdCost + threshold_Nx2N)
+ {
+ refMasks[0] = splitData[0].splitRefs | splitData[2].splitRefs; /* left */
+ refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* right */
+ md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkInter_rd5_6(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N, refMasks);
+ checkBestMode(md.pred[PRED_Nx2N], cuGeom.depth);
+ }
+
+ if (!try_2NxN_first && splitCost < md.bestMode->rdCost + threshold_2NxN)
+ {
+ refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* top */
+ refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* bot */
+ md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks);
+ checkBestMode(md.pred[PRED_2NxN], cuGeom.depth);
+ }
+ }
+
+ // Try AMP (SIZE_2NxnU, SIZE_2NxnD, SIZE_nLx2N, SIZE_nRx2N)
+ if (m_slice->m_sps->maxAMPDepth > depth)
+ {
+ uint64_t splitCost = splitData[0].sa8dCost + splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
+ uint32_t threshold_2NxnU, threshold_2NxnD, threshold_nLx2N, threshold_nRx2N;
+
+ if (m_slice->m_sliceType == P_SLICE)
+ {
+ threshold_2NxnU = splitData[0].mvCost[0] + splitData[1].mvCost[0];
+ threshold_2NxnD = splitData[2].mvCost[0] + splitData[3].mvCost[0];
+
+ threshold_nLx2N = splitData[0].mvCost[0] + splitData[2].mvCost[0];
+ threshold_nRx2N = splitData[1].mvCost[0] + splitData[3].mvCost[0];
+ }
+ else
+ {
+ threshold_2NxnU = (splitData[0].mvCost[0] + splitData[1].mvCost[0]
+ + splitData[0].mvCost[1] + splitData[1].mvCost[1] + 1) >> 1;
+ threshold_2NxnD = (splitData[2].mvCost[0] + splitData[3].mvCost[0]
+ + splitData[2].mvCost[1] + splitData[3].mvCost[1] + 1) >> 1;
+
+ threshold_nLx2N = (splitData[0].mvCost[0] + splitData[2].mvCost[0]
+ + splitData[0].mvCost[1] + splitData[2].mvCost[1] + 1) >> 1;
+ threshold_nRx2N = (splitData[1].mvCost[0] + splitData[3].mvCost[0]
+ + splitData[1].mvCost[1] + splitData[3].mvCost[1] + 1) >> 1;
+ }
+
+ bool bHor = false, bVer = false;
+ if (md.bestMode->cu.m_partSize[0] == SIZE_2NxN)
+ bHor = true;
+ else if (md.bestMode->cu.m_partSize[0] == SIZE_Nx2N)
+ bVer = true;
+ else if (md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N && !md.bestMode->cu.m_mergeFlag[0])
+ {
+ bHor = true;
+ bVer = true;
+ }
+
+ if (bHor)
+ {
+ int try_2NxnD_first = threshold_2NxnD < threshold_2NxnU;
+ if (try_2NxnD_first && splitCost < md.bestMode->rdCost + threshold_2NxnD)
+ {
+ refMasks[0] = allSplitRefs; /* 75% top */
+ refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* 25% bot */
+ md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks);
+ checkBestMode(md.pred[PRED_2NxnD], cuGeom.depth);
+ }
+
+ if (splitCost < md.bestMode->rdCost + threshold_2NxnU)
+ {
+ refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* 25% top */
+ refMasks[1] = allSplitRefs; /* 75% bot */
+ md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkInter_rd5_6(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU, refMasks);
+ checkBestMode(md.pred[PRED_2NxnU], cuGeom.depth);
+ }
+
+ if (!try_2NxnD_first && splitCost < md.bestMode->rdCost + threshold_2NxnD)
+ {
+ refMasks[0] = allSplitRefs; /* 75% top */
+ refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* 25% bot */
+ md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks);
+ checkBestMode(md.pred[PRED_2NxnD], cuGeom.depth);
+ }
+ }
+
+ if (bVer)
+ {
+ int try_nRx2N_first = threshold_nRx2N < threshold_nLx2N;
+ if (try_nRx2N_first && splitCost < md.bestMode->rdCost + threshold_nRx2N)
+ {
+ refMasks[0] = allSplitRefs; /* 75% left */
+ refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* 25% right */
+ md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks);
+ checkBestMode(md.pred[PRED_nRx2N], cuGeom.depth);
+ }
+
+ if (splitCost < md.bestMode->rdCost + threshold_nLx2N)
+ {
+ refMasks[0] = splitData[0].splitRefs | splitData[2].splitRefs; /* 25% left */
+ refMasks[1] = allSplitRefs; /* 75% right */
+ md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkInter_rd5_6(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N, refMasks);
+ checkBestMode(md.pred[PRED_nLx2N], cuGeom.depth);
+ }
+
+ if (!try_nRx2N_first && splitCost < md.bestMode->rdCost + threshold_nRx2N)
+ {
+ refMasks[0] = allSplitRefs; /* 75% left */
+ refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* 25% right */
+ md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks);
+ checkBestMode(md.pred[PRED_nRx2N], cuGeom.depth);
+ }
+ }
+ }
+ }
+
+ if ((m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames) && (cuGeom.log2CUSize != MAX_LOG2_CU_SIZE) && !((m_param->bCTUInfo & 4) && bCtuInfoCheck))
{
if (!m_param->limitReferences || splitIntra)
{
ProfileCounter(parentCTU, totalIntraCU[cuGeom.depth]);
md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
- checkIntraInInter(md.pred[PRED_INTRA], cuGeom);
- encodeIntraInInter(md.pred[PRED_INTRA], cuGeom);
+ checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N);
checkBestMode(md.pred[PRED_INTRA], depth);
+
+ if (cuGeom.log2CUSize == 3 && m_slice->m_sps->quadtreeTULog2MinSize < 3)
+ {
+ md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom, qp);
+ checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN);
+ checkBestMode(md.pred[PRED_INTRA_NxN], depth);
+ }
}
else
{
@@ -1588,676 +2257,112 @@
}
}
}
+
+ if ((md.bestMode->cu.isInter(0) && !(md.bestMode->cu.m_mergeFlag[0] && md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N)) && (m_frame->m_fencPic->m_picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400))
+ {
+ uint32_t numPU = md.bestMode->cu.getNumPartInter(0);
+
+ for (uint32_t puIdx = 0; puIdx < numPU; puIdx++)
+ {
+ PredictionUnit pu(md.bestMode->cu, cuGeom, puIdx);
+ motionCompensation(md.bestMode->cu, pu, md.bestMode->predYuv, false, m_csp != X265_CSP_I400);
+ }
+ encodeResAndCalcRdInterCU(*md.bestMode, cuGeom);
+ }
+ if (m_bTryLossless)
+ tryLossless(cuGeom);
+
+ if (mightSplit)
+ addSplitFlagCost(*md.bestMode, cuGeom.depth);
+ }
+
+ if ((m_limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
+ {
+ if (mightNotSplit)
+ {
+ CUData* ctu = md.bestMode->cu.m_encData->getPicCTU(parentCTU.m_cuAddr);
+ int8_t maxTUDepth = -1;
+ for (uint32_t i = 0; i < cuGeom.numPartitions; i++)
+ maxTUDepth = X265_MAX(maxTUDepth, md.bestMode->cu.m_tuDepth[i]);
+ ctu->m_refTuDepth[cuGeom.geomRecurId] = maxTUDepth;
+ }
+ }
+
+ /* compare split RD cost against best cost */
+ if (mightSplit && !skipRecursion)
+ checkBestMode(md.pred[PRED_SPLIT], depth);
+
+ if (m_param->bEnableRdRefine && depth <= m_slice->m_pps->maxCuDQPDepth)
+ {
+ int cuIdx = (cuGeom.childOffset - 1) / 3;
+ cacheCost[cuIdx] = md.bestMode->rdCost;
+ }
+
+ /* determine which motion references the parent CU should search */
+ splitCUData.initSplitCUData();
+ if (m_param->limitReferences & X265_REF_LIMIT_DEPTH)
+ {
+ if (md.bestMode == &md.pred[PRED_SPLIT])
+ splitCUData.splitRefs = allSplitRefs;
else
{
- /* SA8D choice between merge/skip, inter, bidir, and intra */
- if (!md.bestMode || bestInter->sa8dCost < md.bestMode->sa8dCost)
- md.bestMode = bestInter;
-
- if (m_slice->m_sliceType == B_SLICE &&
- md.pred[PRED_BIDIR].sa8dCost < md.bestMode->sa8dCost)
- md.bestMode = &md.pred[PRED_BIDIR];
-
- if (bTryIntra || md.bestMode->sa8dCost == MAX_INT64)
- {
- if (!m_param->limitReferences || splitIntra)
- {
- ProfileCounter(parentCTU, totalIntraCU[cuGeom.depth]);
- md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
- checkIntraInInter(md.pred[PRED_INTRA], cuGeom);
- if (md.pred[PRED_INTRA].sa8dCost < md.bestMode->sa8dCost)
- md.bestMode = &md.pred[PRED_INTRA];
- }
- else
- {
- ProfileCounter(parentCTU, skippedIntraCU[cuGeom.depth]);
- }
- }
-
- /* finally code the best mode selected by SA8D costs:
- * RD level 2 - fully encode the best mode
- * RD level 1 - generate recon pixels
- * RD level 0 - generate chroma prediction */
- if (md.bestMode->cu.m_mergeFlag[0] && md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N)
- {
- /* prediction already generated for this CU, and if rd level
- * is not 0, it is already fully encoded */
- }
- else if (md.bestMode->cu.isInter(0))
- {
- uint32_t numPU = md.bestMode->cu.getNumPartInter(0);
- if (m_csp != X265_CSP_I400)
- {
- for (uint32_t puIdx = 0; puIdx < numPU; puIdx++)
- {
- PredictionUnit pu(md.bestMode->cu, cuGeom, puIdx);
- motionCompensation(md.bestMode->cu, pu, md.bestMode->predYuv, false, true);
- }
- }
- if (m_param->rdLevel == 2)
- encodeResAndCalcRdInterCU(*md.bestMode, cuGeom);
- else if (m_param->rdLevel == 1)
- {
- /* generate recon pixels with no rate distortion considerations */
- CUData& cu = md.bestMode->cu;
-
- uint32_t tuDepthRange[2];
- cu.getInterTUQtDepthRange(tuDepthRange, 0);
- m_rqt[cuGeom.depth].tmpResiYuv.subtract(*md.bestMode->fencYuv, md.bestMode->predYuv, cuGeom.log2CUSize, m_frame->m_fencPic->m_picCsp);
- residualTransformQuantInter(*md.bestMode, cuGeom, 0, 0, tuDepthRange);
- if (cu.getQtRootCbf(0))
- md.bestMode->reconYuv.addClip(md.bestMode->predYuv, m_rqt[cuGeom.depth].tmpResiYuv, cu.m_log2CUSize[0], m_frame->m_fencPic->m_picCsp);
- else
- {
- md.bestMode->reconYuv.copyFromYuv(md.bestMode->predYuv);
- if (cu.m_mergeFlag[0] && cu.m_partSize[0] == SIZE_2Nx2N)
- cu.setPredModeSubParts(MODE_SKIP);
- }
- }
- }
+ /* use best merge/inter mode, in case of intra use 2Nx2N inter references */
+ CUData& cu = md.bestMode->cu.isIntra(0) ? md.pred[PRED_2Nx2N].cu : md.bestMode->cu;
+ uint32_t numPU = cu.getNumPartInter(0);
+ for (uint32_t puIdx = 0, subPartIdx = 0; puIdx < numPU; puIdx++, subPartIdx += cu.getPUOffset(puIdx, 0))
+ splitCUData.splitRefs |= cu.getBestRefIdx(subPartIdx);
+ }
+ }
+
+ if (m_param->limitModes)
+ {
+ splitCUData.mvCost[0] = md.pred[PRED_2Nx2N].bestME[0][0].mvCost; // L0
+ splitCUData.mvCost[1] = md.pred[PRED_2Nx2N].bestME[0][1].mvCost; // L1
+ splitCUData.sa8dCost = md.pred[PRED_2Nx2N].rdCost;
+ }
+
+ /* Copy best data to encData CTU and recon */
+ md.bestMode->cu.copyToPic(depth);
+ md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, parentCTU.m_cuAddr, cuGeom.absPartIdx);
+ }
+ else
+ {
+ if (m_param->bMVType && cuGeom.numPartitions <= 16)
+ {
+ qprdRefine(parentCTU, cuGeom, qp, qp);
+
+ SplitData splitData[4];
+ splitData[0].initSplitCUData();
+ splitData[1].initSplitCUData();
+ splitData[2].initSplitCUData();
+ splitData[3].initSplitCUData();
+
+ uint32_t allSplitRefs = splitData[0].splitRefs | splitData[1].splitRefs | splitData[2].splitRefs | splitData[3].splitRefs;
+
+ splitCUData.initSplitCUData();
+ if (m_param->limitReferences & X265_REF_LIMIT_DEPTH)
+ {
+ if (md.bestMode == &md.pred[PRED_SPLIT])
+ splitCUData.splitRefs = allSplitRefs;
else
{
- if (m_param->rdLevel == 2)
- encodeIntraInInter(*md.bestMode, cuGeom);
- else if (m_param->rdLevel == 1)
- {
- /* generate recon pixels with no rate distortion considerations */
- CUData& cu = md.bestMode->cu;
-
- uint32_t tuDepthRange[2];
- cu.getIntraTUQtDepthRange(tuDepthRange, 0);
-
- residualTransformQuantIntra(*md.bestMode, cuGeom, 0, 0, tuDepthRange);
- if (m_csp != X265_CSP_I400)
- {
- getBestIntraModeChroma(*md.bestMode, cuGeom);
- residualQTIntraChroma(*md.bestMode, cuGeom, 0, 0);
- }
- md.bestMode->reconYuv.copyFromPicYuv(reconPic, cu.m_cuAddr, cuGeom.absPartIdx); // TODO:
- }
+ /* use best merge/inter mode, in case of intra use 2Nx2N inter references */
+ CUData& cu = md.bestMode->cu.isIntra(0) ? md.pred[PRED_2Nx2N].cu : md.bestMode->cu;
+ uint32_t numPU = cu.getNumPartInter(0);
+ for (uint32_t puIdx = 0, subPartIdx = 0; puIdx < numPU; puIdx++, subPartIdx += cu.getPUOffset(puIdx, 0))
+ splitCUData.splitRefs |= cu.getBestRefIdx(subPartIdx);
}
}
- } // !earlyskip
-
- if (m_bTryLossless)
- tryLossless(cuGeom);
-
- if (mightSplit)
- addSplitFlagCost(*md.bestMode, cuGeom.depth);
- }
-
- if (mightSplit && !skipRecursion)
- {
- Mode* splitPred = &md.pred[PRED_SPLIT];
- if (!md.bestMode)
- md.bestMode = splitPred;
- else if (m_param->rdLevel > 1)
- checkBestMode(*splitPred, cuGeom.depth);
- else if (splitPred->sa8dCost < md.bestMode->sa8dCost)
- md.bestMode = splitPred;
-
- checkDQPForSplitPred(*md.bestMode, cuGeom);
- }
-
- /* determine which motion references the parent CU should search */
- SplitData splitCUData;
- splitCUData.initSplitCUData();
-
- if (m_param->limitReferences & X265_REF_LIMIT_DEPTH)
- {
- if (md.bestMode == &md.pred[PRED_SPLIT])
- splitCUData.splitRefs = allSplitRefs;
- else
- {
- /* use best merge/inter mode, in case of intra use 2Nx2N inter references */
- CUData& cu = md.bestMode->cu.isIntra(0) ? md.pred[PRED_2Nx2N].cu : md.bestMode->cu;
- uint32_t numPU = cu.getNumPartInter(0);
- for (uint32_t puIdx = 0, subPartIdx = 0; puIdx < numPU; puIdx++, subPartIdx += cu.getPUOffset(puIdx, 0))
- splitCUData.splitRefs |= cu.getBestRefIdx(subPartIdx);
- }
- }
-
- if (m_param->limitModes)
- {
- splitCUData.mvCost[0] = md.pred[PRED_2Nx2N].bestME[0][0].mvCost; // L0
- splitCUData.mvCost[1] = md.pred[PRED_2Nx2N].bestME[0][1].mvCost; // L1
- splitCUData.sa8dCost = md.pred[PRED_2Nx2N].sa8dCost;
- }
-
- if (mightNotSplit && md.bestMode->cu.isSkipped(0))
- {
- FrameData& curEncData = *m_frame->m_encData;
- FrameData::RCStatCU& cuStat = curEncData.m_cuStat[parentCTU.m_cuAddr];
- uint64_t temp = cuStat.avgCost[depth] * cuStat.count[depth];
- cuStat.count[depth] += 1;
- cuStat.avgCost[depth] = (temp + md.bestMode->rdCost) / cuStat.count[depth];
- }
-
- /* Copy best data to encData CTU and recon */
- md.bestMode->cu.copyToPic(depth);
- if (m_param->rdLevel)
- md.bestMode->reconYuv.copyToPicYuv(reconPic, cuAddr, cuGeom.absPartIdx);
-
- if ((m_limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
- {
- if (mightNotSplit)
- {
- CUData* ctu = md.bestMode->cu.m_encData->getPicCTU(parentCTU.m_cuAddr);
- int8_t maxTUDepth = -1;
- for (uint32_t i = 0; i < cuGeom.numPartitions; i++)
- maxTUDepth = X265_MAX(maxTUDepth, md.bestMode->cu.m_tuDepth[i]);
- ctu->m_refTuDepth[cuGeom.geomRecurId] = maxTUDepth;
- }
- }
-
- return splitCUData;
-}
-
-SplitData Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp)
-{
- if (parentCTU.m_vbvAffected && !calculateQpforCuSize(parentCTU, cuGeom, 1))
- return compressInterCU_rd0_4(parentCTU, cuGeom, qp);
-
- uint32_t depth = cuGeom.depth;
- ModeDepth& md = m_modeDepth[depth];
- md.bestMode = NULL;
-
- if (m_param->searchMethod == X265_SEA)
- {
- int numPredDir = m_slice->isInterP() ? 1 : 2;
- int offset = (int)(m_frame->m_reconPic->m_cuOffsetY[parentCTU.m_cuAddr] + m_frame->m_reconPic->m_buOffsetY[cuGeom.absPartIdx]);
- for (int list = 0; list < numPredDir; list++)
- for (int i = 0; i < m_frame->m_encData->m_slice->m_numRefIdx[list]; i++)
- for (int planes = 0; planes < INTEGRAL_PLANE_NUM; planes++)
- m_modeDepth[depth].fencYuv.m_integral[list][i][planes] = m_frame->m_encData->m_slice->m_refFrameList[list][i]->m_encData->m_meIntegral[planes] + offset;
- }
-
- bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
- bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
- bool bDecidedDepth = parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth;
- bool skipRecursion = false;
- bool skipModes = false;
- bool splitIntra = true;
- bool skipRectAmp = false;
- bool bCtuInfoCheck = false;
- int sameContentRef = 0;
-
- if (m_evaluateInter)
- {
- if (m_param->interRefine == 2)
- {
- if (parentCTU.m_predMode[cuGeom.absPartIdx] == MODE_SKIP)
- skipModes = true;
- if (parentCTU.m_partSize[cuGeom.absPartIdx] == SIZE_2Nx2N)
- skipRectAmp = true;
- }
- mightSplit &= false;
- }
-
- // avoid uninitialize value in below reference
- if (m_param->limitModes)
- {
- md.pred[PRED_2Nx2N].bestME[0][0].mvCost = 0; // L0
- md.pred[PRED_2Nx2N].bestME[0][1].mvCost = 0; // L1
- md.pred[PRED_2Nx2N].rdCost = 0;
- }
-
- if ((m_limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
- m_maxTUDepth = loadTUDepth(cuGeom, parentCTU);
-
- SplitData splitData[4];
- splitData[0].initSplitCUData();
- splitData[1].initSplitCUData();
- splitData[2].initSplitCUData();
- splitData[3].initSplitCUData();
- uint32_t allSplitRefs = splitData[0].splitRefs | splitData[1].splitRefs | splitData[2].splitRefs | splitData[3].splitRefs;
- uint32_t refMasks[2];
- if (m_param->bCTUInfo && depth <= parentCTU.m_cuDepth[cuGeom.absPartIdx])
- {
- if (bDecidedDepth && m_additionalCtuInfo[cuGeom.absPartIdx])
- sameContentRef = findSameContentRefCount(parentCTU, cuGeom);
- if (depth < parentCTU.m_cuDepth[cuGeom.absPartIdx])
- {
- mightNotSplit &= bDecidedDepth;
- bCtuInfoCheck = skipRecursion = false;
- skipModes = true;
- }
- else if (mightNotSplit && bDecidedDepth)
- {
- if (m_additionalCtuInfo[cuGeom.absPartIdx])
+
+ if (m_param->limitModes)
{
- bCtuInfoCheck = skipRecursion = true;
- refMasks[0] = allSplitRefs;
- md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
- checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks);
- checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
- if (!sameContentRef)
- {
- if ((m_param->bCTUInfo & 2) && (m_slice->m_pps->bUseDQP && depth <= m_slice->m_pps->maxCuDQPDepth))
- {
- qp -= int32_t(0.04 * qp);
- setLambdaFromQP(parentCTU, qp);
- }
- if (m_param->bCTUInfo & 4)
- skipModes = false;
- }
- if (sameContentRef || (!sameContentRef && !(m_param->bCTUInfo & 4)))
- {
- if (m_param->rdLevel)
- skipModes = m_param->bEnableEarlySkip && md.bestMode && md.bestMode->cu.isSkipped(0);
- if ((m_param->bCTUInfo & 4) && sameContentRef)
- skipModes = md.bestMode && true;
- }
- }
- else
- {
- md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
- md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
- checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
- skipModes = !!m_param->bEnableEarlySkip && md.bestMode;
- refMasks[0] = allSplitRefs;
- md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
- checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks);
- checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
- }
- mightSplit &= !bDecidedDepth;
- }
- }
- if (m_param->analysisReuseMode == X265_ANALYSIS_LOAD && m_param->analysisReuseLevel > 1 && m_param->analysisReuseLevel != 10)
- {
- if (mightNotSplit && depth == m_reuseDepth[cuGeom.absPartIdx])
- {
- if (m_reuseModes[cuGeom.absPartIdx] == MODE_SKIP)
- {
- md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
- md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
- checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
- skipModes = !!m_param->bEnableEarlySkip && md.bestMode;
- refMasks[0] = allSplitRefs;
- md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
- checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks);
- checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
-
- if (m_param->bEnableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode)
- skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0);
- }
- if (m_param->analysisReuseLevel > 4 && m_reusePartSize[cuGeom.absPartIdx] == SIZE_2Nx2N)
- skipRectAmp = true && !!md.bestMode;
- }
- }
-
- if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && m_multipassAnalysis)
- {
- if (mightNotSplit && depth == m_multipassDepth[cuGeom.absPartIdx])
- {
- if (m_multipassModes[cuGeom.absPartIdx] == MODE_SKIP)
- {
- md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
- md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
- checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
-
- skipModes = !!m_param->bEnableEarlySkip && md.bestMode;
- refMasks[0] = allSplitRefs;
- md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
- checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks);
- checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
-
- if (m_param->bEnableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode)
- skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0);
+ splitCUData.mvCost[0] = md.pred[PRED_2Nx2N].bestME[0][0].mvCost; // L0
+ splitCUData.mvCost[1] = md.pred[PRED_2Nx2N].bestME[0][1].mvCost; // L1
+ splitCUData.sa8dCost = md.pred[PRED_2Nx2N].rdCost;
}
}
}
- /* Step 1. Evaluate Merge/Skip candidates for likely early-outs */
- if (mightNotSplit && !md.bestMode && !bCtuInfoCheck)
- {
- md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
- md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
- checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
- skipModes = (m_param->bEnableEarlySkip || m_param->interRefine == 2) &&
- md.bestMode && !md.bestMode->cu.getQtRootCbf(0);
- refMasks[0] = allSplitRefs;
- md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
- checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks);
- checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
-
- if (m_param->bEnableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode)
- skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0);
- }
-
- // estimate split cost
- /* Step 2. Evaluate each of the 4 split sub-blocks in series */
- if (mightSplit && !skipRecursion)
- {
- if (bCtuInfoCheck && m_param->bCTUInfo & 2)
- qp = int((1 / 0.96) * qp + 0.5);
- Mode* splitPred = &md.pred[PRED_SPLIT];
- splitPred->initCosts();
- CUData* splitCU = &splitPred->cu;
- splitCU->initSubCU(parentCTU, cuGeom, qp);
-
- uint32_t nextDepth = depth + 1;
- ModeDepth& nd = m_modeDepth[nextDepth];
- invalidateContexts(nextDepth);
- Entropy* nextContext = &m_rqt[depth].cur;
- int nextQP = qp;
- splitIntra = false;
-
- for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
- {
- const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
- if (childGeom.flags & CUGeom::PRESENT)
- {
- m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.absPartIdx);
- m_rqt[nextDepth].cur.load(*nextContext);
-
- if (m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth)
- nextQP = setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, childGeom));
-
- splitData[subPartIdx] = compressInterCU_rd5_6(parentCTU, childGeom, nextQP);
-
- // Save best CU and pred data for this sub CU
- splitIntra |= nd.bestMode->cu.isIntra(0);
- splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
- splitPred->addSubCosts(*nd.bestMode);
- nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv, childGeom.numPartitions * subPartIdx);
- nextContext = &nd.bestMode->contexts;
- }
- else
- {
- splitCU->setEmptyPart(childGeom, subPartIdx);
- }
- }
- nextContext->store(splitPred->contexts);
- if (mightNotSplit)
- addSplitFlagCost(*splitPred, cuGeom.depth);
- else
- updateModeCost(*splitPred);
-
- checkDQPForSplitPred(*splitPred, cuGeom);
- }
-
- /* Split CUs
- * 0 1
- * 2 3 */
- allSplitRefs = splitData[0].splitRefs | splitData[1].splitRefs | splitData[2].splitRefs | splitData[3].splitRefs;
- /* Step 3. Evaluate ME (2Nx2N, rect, amp) and intra modes at current depth */
- if (mightNotSplit)
- {
- if (m_slice->m_pps->bUseDQP && depth <= m_slice->m_pps->maxCuDQPDepth && m_slice->m_pps->maxCuDQPDepth != 0)
- setLambdaFromQP(parentCTU, qp);
-
- if (!skipModes)
- {
- refMasks[0] = allSplitRefs;
-
- if (m_param->limitReferences & X265_REF_LIMIT_CU)
- {
- CUData& cu = md.pred[PRED_2Nx2N].cu;
- uint32_t refMask = cu.getBestRefIdx(0);
- allSplitRefs = splitData[0].splitRefs = splitData[1].splitRefs = splitData[2].splitRefs = splitData[3].splitRefs = refMask;
- }
-
- if (m_slice->m_sliceType == B_SLICE)
- {
- md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom, qp);
- checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], cuGeom);
- if (md.pred[PRED_BIDIR].sa8dCost < MAX_INT64)
- {
- uint32_t numPU = md.pred[PRED_BIDIR].cu.getNumPartInter(0);
- if (m_frame->m_fencPic->m_picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400)
- for (uint32_t puIdx = 0; puIdx < numPU; puIdx++)
- {
- PredictionUnit pu(md.pred[PRED_BIDIR].cu, cuGeom, puIdx);
- motionCompensation(md.pred[PRED_BIDIR].cu, pu, md.pred[PRED_BIDIR].predYuv, true, true);
- }
- encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR], cuGeom);
- checkBestMode(md.pred[PRED_BIDIR], cuGeom.depth);
- }
- }
-
- if (!skipRectAmp)
- {
- if (m_param->bEnableRectInter)
- {
- uint64_t splitCost = splitData[0].sa8dCost + splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
- uint32_t threshold_2NxN, threshold_Nx2N;
-
- if (m_slice->m_sliceType == P_SLICE)
- {
- threshold_2NxN = splitData[0].mvCost[0] + splitData[1].mvCost[0];
- threshold_Nx2N = splitData[0].mvCost[0] + splitData[2].mvCost[0];
- }
- else
- {
- threshold_2NxN = (splitData[0].mvCost[0] + splitData[1].mvCost[0]
- + splitData[0].mvCost[1] + splitData[1].mvCost[1] + 1) >> 1;
- threshold_Nx2N = (splitData[0].mvCost[0] + splitData[2].mvCost[0]
- + splitData[0].mvCost[1] + splitData[2].mvCost[1] + 1) >> 1;
- }
-
- int try_2NxN_first = threshold_2NxN < threshold_Nx2N;
- if (try_2NxN_first && splitCost < md.bestMode->rdCost + threshold_2NxN)
- {
- refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* top */
- refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* bot */
- md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp);
- checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks);
- checkBestMode(md.pred[PRED_2NxN], cuGeom.depth);
- }
-
- if (splitCost < md.bestMode->rdCost + threshold_Nx2N)
- {
- refMasks[0] = splitData[0].splitRefs | splitData[2].splitRefs; /* left */
- refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* right */
- md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
- checkInter_rd5_6(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N, refMasks);
- checkBestMode(md.pred[PRED_Nx2N], cuGeom.depth);
- }
-
- if (!try_2NxN_first && splitCost < md.bestMode->rdCost + threshold_2NxN)
- {
- refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* top */
- refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* bot */
- md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp);
- checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks);
- checkBestMode(md.pred[PRED_2NxN], cuGeom.depth);
- }
- }
-
- // Try AMP (SIZE_2NxnU, SIZE_2NxnD, SIZE_nLx2N, SIZE_nRx2N)
- if (m_slice->m_sps->maxAMPDepth > depth)
- {
- uint64_t splitCost = splitData[0].sa8dCost + splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
- uint32_t threshold_2NxnU, threshold_2NxnD, threshold_nLx2N, threshold_nRx2N;
-
- if (m_slice->m_sliceType == P_SLICE)
- {
- threshold_2NxnU = splitData[0].mvCost[0] + splitData[1].mvCost[0];
- threshold_2NxnD = splitData[2].mvCost[0] + splitData[3].mvCost[0];
-
- threshold_nLx2N = splitData[0].mvCost[0] + splitData[2].mvCost[0];
- threshold_nRx2N = splitData[1].mvCost[0] + splitData[3].mvCost[0];
- }
- else
- {
- threshold_2NxnU = (splitData[0].mvCost[0] + splitData[1].mvCost[0]
- + splitData[0].mvCost[1] + splitData[1].mvCost[1] + 1) >> 1;
- threshold_2NxnD = (splitData[2].mvCost[0] + splitData[3].mvCost[0]
- + splitData[2].mvCost[1] + splitData[3].mvCost[1] + 1) >> 1;
-
- threshold_nLx2N = (splitData[0].mvCost[0] + splitData[2].mvCost[0]
- + splitData[0].mvCost[1] + splitData[2].mvCost[1] + 1) >> 1;
- threshold_nRx2N = (splitData[1].mvCost[0] + splitData[3].mvCost[0]
- + splitData[1].mvCost[1] + splitData[3].mvCost[1] + 1) >> 1;
- }
-
- bool bHor = false, bVer = false;
- if (md.bestMode->cu.m_partSize[0] == SIZE_2NxN)
- bHor = true;
- else if (md.bestMode->cu.m_partSize[0] == SIZE_Nx2N)
- bVer = true;
- else if (md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N && !md.bestMode->cu.m_mergeFlag[0])
- {
- bHor = true;
- bVer = true;
- }
-
- if (bHor)
- {
- int try_2NxnD_first = threshold_2NxnD < threshold_2NxnU;
- if (try_2NxnD_first && splitCost < md.bestMode->rdCost + threshold_2NxnD)
- {
- refMasks[0] = allSplitRefs; /* 75% top */
- refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* 25% bot */
- md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp);
- checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks);
- checkBestMode(md.pred[PRED_2NxnD], cuGeom.depth);
- }
-
- if (splitCost < md.bestMode->rdCost + threshold_2NxnU)
- {
- refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* 25% top */
- refMasks[1] = allSplitRefs; /* 75% bot */
- md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom, qp);
- checkInter_rd5_6(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU, refMasks);
- checkBestMode(md.pred[PRED_2NxnU], cuGeom.depth);
- }
-
- if (!try_2NxnD_first && splitCost < md.bestMode->rdCost + threshold_2NxnD)
- {
- refMasks[0] = allSplitRefs; /* 75% top */
- refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* 25% bot */
- md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp);
- checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks);
- checkBestMode(md.pred[PRED_2NxnD], cuGeom.depth);
- }
- }
-
- if (bVer)
- {
- int try_nRx2N_first = threshold_nRx2N < threshold_nLx2N;
- if (try_nRx2N_first && splitCost < md.bestMode->rdCost + threshold_nRx2N)
- {
- refMasks[0] = allSplitRefs; /* 75% left */
- refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* 25% right */
- md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp);
- checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks);
- checkBestMode(md.pred[PRED_nRx2N], cuGeom.depth);
- }
-
- if (splitCost < md.bestMode->rdCost + threshold_nLx2N)
- {
- refMasks[0] = splitData[0].splitRefs | splitData[2].splitRefs; /* 25% left */
- refMasks[1] = allSplitRefs; /* 75% right */
- md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom, qp);
- checkInter_rd5_6(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N, refMasks);
- checkBestMode(md.pred[PRED_nLx2N], cuGeom.depth);
- }
-
- if (!try_nRx2N_first && splitCost < md.bestMode->rdCost + threshold_nRx2N)
- {
- refMasks[0] = allSplitRefs; /* 75% left */
- refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* 25% right */
- md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp);
- checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks);
- checkBestMode(md.pred[PRED_nRx2N], cuGeom.depth);
- }
- }
- }
- }
-
- if ((m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames) && (cuGeom.log2CUSize != MAX_LOG2_CU_SIZE) && !((m_param->bCTUInfo & 4) && bCtuInfoCheck))
- {
- if (!m_param->limitReferences || splitIntra)
- {
- ProfileCounter(parentCTU, totalIntraCU[cuGeom.depth]);
- md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
- checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N);
- checkBestMode(md.pred[PRED_INTRA], depth);
-
- if (cuGeom.log2CUSize == 3 && m_slice->m_sps->quadtreeTULog2MinSize < 3)
- {
- md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom, qp);
- checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN);
- checkBestMode(md.pred[PRED_INTRA_NxN], depth);
- }
- }
- else
- {
- ProfileCounter(parentCTU, skippedIntraCU[cuGeom.depth]);
- }
- }
- }
-
- if ((md.bestMode->cu.isInter(0) && !(md.bestMode->cu.m_mergeFlag[0] && md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N)) && (m_frame->m_fencPic->m_picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400))
- {
- uint32_t numPU = md.bestMode->cu.getNumPartInter(0);
-
- for (uint32_t puIdx = 0; puIdx < numPU; puIdx++)
- {
- PredictionUnit pu(md.bestMode->cu, cuGeom, puIdx);
- motionCompensation(md.bestMode->cu, pu, md.bestMode->predYuv, false, m_csp != X265_CSP_I400);
- }
- encodeResAndCalcRdInterCU(*md.bestMode, cuGeom);
- }
- if (m_bTryLossless)
- tryLossless(cuGeom);
-
- if (mightSplit)
- addSplitFlagCost(*md.bestMode, cuGeom.depth);
- }
-
- if ((m_limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
- {
- if (mightNotSplit)
- {
- CUData* ctu = md.bestMode->cu.m_encData->getPicCTU(parentCTU.m_cuAddr);
- int8_t maxTUDepth = -1;
- for (uint32_t i = 0; i < cuGeom.numPartitions; i++)
- maxTUDepth = X265_MAX(maxTUDepth, md.bestMode->cu.m_tuDepth[i]);
- ctu->m_refTuDepth[cuGeom.geomRecurId] = maxTUDepth;
- }
- }
-
- /* compare split RD cost against best cost */
- if (mightSplit && !skipRecursion)
- checkBestMode(md.pred[PRED_SPLIT], depth);
-
- if (m_param->bEnableRdRefine && depth <= m_slice->m_pps->maxCuDQPDepth)
- {
- int cuIdx = (cuGeom.childOffset - 1) / 3;
- cacheCost[cuIdx] = md.bestMode->rdCost;
- }
-
- /* determine which motion references the parent CU should search */
- SplitData splitCUData;
- splitCUData.initSplitCUData();
- if (m_param->limitReferences & X265_REF_LIMIT_DEPTH)
- {
- if (md.bestMode == &md.pred[PRED_SPLIT])
- splitCUData.splitRefs = allSplitRefs;
- else
- {
- /* use best merge/inter mode, in case of intra use 2Nx2N inter references */
- CUData& cu = md.bestMode->cu.isIntra(0) ? md.pred[PRED_2Nx2N].cu : md.bestMode->cu;
- uint32_t numPU = cu.getNumPartInter(0);
- for (uint32_t puIdx = 0, subPartIdx = 0; puIdx < numPU; puIdx++, subPartIdx += cu.getPUOffset(puIdx, 0))
- splitCUData.splitRefs |= cu.getBestRefIdx(subPartIdx);
- }
- }
-
- if (m_param->limitModes)
- {
- splitCUData.mvCost[0] = md.pred[PRED_2Nx2N].bestME[0][0].mvCost; // L0
- splitCUData.mvCost[1] = md.pred[PRED_2Nx2N].bestME[0][1].mvCost; // L1
- splitCUData.sa8dCost = md.pred[PRED_2Nx2N].rdCost;
- }
-
- /* Copy best data to encData CTU and recon */
- md.bestMode->cu.copyToPic(depth);
- md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, parentCTU.m_cuAddr, cuGeom.absPartIdx);
-
return splitCUData;
}
More information about the x265-devel
mailing list