[x265] [PATCH 02 of 10 RFC] analysis: at RD 5/6 avoid motion references if not used by split blocks
Steve Borho
steve at borho.org
Tue Mar 31 03:29:38 CEST 2015
# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1426555173 18000
# Mon Mar 16 20:19:33 2015 -0500
# Node ID 7e7bb565c9776e5c46dcbcd25df27e0e0dbfe071
# Parent af259ac3d304009043f95f72d6d5b7d1519a1838
analysis: at RD 5/6 avoid motion references if not used by split blocks
diff -r af259ac3d304 -r 7e7bb565c977 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Mon Mar 16 20:19:33 2015 -0500
+++ b/source/encoder/analysis.cpp Mon Mar 16 20:19:33 2015 -0500
@@ -432,6 +432,8 @@
}
else
{
+ uint32_t refMasks[2] = { 0, 0 };
+
switch (pmode.modes[task])
{
case PRED_INTRA:
@@ -441,7 +443,7 @@
break;
case PRED_2Nx2N:
- slave.checkInter_rd5_6(md.pred[PRED_2Nx2N], pmode.cuGeom, SIZE_2Nx2N, false);
+ slave.checkInter_rd5_6(md.pred[PRED_2Nx2N], pmode.cuGeom, SIZE_2Nx2N, false, refMasks);
md.pred[PRED_BIDIR].rdCost = MAX_INT64;
if (m_slice->m_sliceType == B_SLICE)
{
@@ -452,27 +454,27 @@
break;
case PRED_Nx2N:
- slave.checkInter_rd5_6(md.pred[PRED_Nx2N], pmode.cuGeom, SIZE_Nx2N, false);
+ slave.checkInter_rd5_6(md.pred[PRED_Nx2N], pmode.cuGeom, SIZE_Nx2N, false, refMasks);
break;
case PRED_2NxN:
- slave.checkInter_rd5_6(md.pred[PRED_2NxN], pmode.cuGeom, SIZE_2NxN, false);
+ slave.checkInter_rd5_6(md.pred[PRED_2NxN], pmode.cuGeom, SIZE_2NxN, false, refMasks);
break;
case PRED_2NxnU:
- slave.checkInter_rd5_6(md.pred[PRED_2NxnU], pmode.cuGeom, SIZE_2NxnU, bMergeOnly);
+ slave.checkInter_rd5_6(md.pred[PRED_2NxnU], pmode.cuGeom, SIZE_2NxnU, bMergeOnly, refMasks);
break;
case PRED_2NxnD:
- slave.checkInter_rd5_6(md.pred[PRED_2NxnD], pmode.cuGeom, SIZE_2NxnD, bMergeOnly);
+ slave.checkInter_rd5_6(md.pred[PRED_2NxnD], pmode.cuGeom, SIZE_2NxnD, bMergeOnly, refMasks);
break;
case PRED_nLx2N:
- slave.checkInter_rd5_6(md.pred[PRED_nLx2N], pmode.cuGeom, SIZE_nLx2N, bMergeOnly);
+ slave.checkInter_rd5_6(md.pred[PRED_nLx2N], pmode.cuGeom, SIZE_nLx2N, bMergeOnly, refMasks);
break;
case PRED_nRx2N:
- slave.checkInter_rd5_6(md.pred[PRED_nRx2N], pmode.cuGeom, SIZE_nRx2N, bMergeOnly);
+ slave.checkInter_rd5_6(md.pred[PRED_nRx2N], pmode.cuGeom, SIZE_nRx2N, bMergeOnly, refMasks);
break;
default:
@@ -1025,7 +1027,7 @@
md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr, cuGeom.absPartIdx);
}
-void Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder)
+uint32_t Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder)
{
uint32_t depth = cuGeom.depth;
ModeDepth& md = m_modeDepth[depth];
@@ -1058,6 +1060,7 @@
}
}
+ uint32_t splitRefs[4] = { 0, 0, 0, 0 };
bool foundSkip = false;
/* Step 1. Evaluate Merge/Skip candidates for likely early-outs */
@@ -1089,7 +1092,7 @@
{
m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.absPartIdx);
m_rqt[nextDepth].cur.load(*nextContext);
- compressInterCU_rd5_6(parentCTU, childGeom, zOrder);
+ splitRefs[subPartIdx] = compressInterCU_rd5_6(parentCTU, childGeom, zOrder);
// Save best CU and pred data for this sub CU
splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
@@ -1112,11 +1115,19 @@
checkDQPForSplitPred(splitPred->cu, cuGeom);
}
+ /* Split CUs
+ * 0 1
+ * 2 3 */
+ uint32_t allSplitRefs = splitRefs[0] | splitRefs[1] | splitRefs[2] | splitRefs[3];
+
/* Step 3. Evaluate ME (2Nx2N, rect, amp) and intra modes at current depth */
if (mightNotSplit && !(foundSkip && m_param->bEnableEarlySkip))
{
+ uint32_t refMasks[2];
+
+ refMasks[0] = allSplitRefs;
md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom);
- checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, false);
+ checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, false, refMasks);
checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
if (m_slice->m_sliceType == B_SLICE)
@@ -1133,12 +1144,16 @@
if (m_param->bEnableRectInter)
{
+ refMasks[0] = splitRefs[0] | splitRefs[2]; /* left */
+ refMasks[1] = splitRefs[1] | splitRefs[3]; /* right */
md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom);
- checkInter_rd5_6(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N, false);
+ checkInter_rd5_6(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N, false, refMasks);
checkBestMode(md.pred[PRED_Nx2N], cuGeom.depth);
+ refMasks[0] = splitRefs[0] | splitRefs[1]; /* top */
+ refMasks[1] = splitRefs[2] | splitRefs[3]; /* bot */
md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom);
- checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, false);
+ checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, false, refMasks);
checkBestMode(md.pred[PRED_2NxN], cuGeom.depth);
}
@@ -1160,22 +1175,30 @@
if (bHor)
{
+ refMasks[0] = splitRefs[0] | splitRefs[1]; /* 25% top */
+ refMasks[1] = allSplitRefs; /* 75% bot */
md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom);
- checkInter_rd5_6(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU, bMergeOnly);
+ checkInter_rd5_6(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU, bMergeOnly, refMasks);
checkBestMode(md.pred[PRED_2NxnU], cuGeom.depth);
+ refMasks[0] = allSplitRefs; /* 75% top */
+ refMasks[1] = splitRefs[2] | splitRefs[3]; /* 25% bot */
md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom);
- checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, bMergeOnly);
+ checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, bMergeOnly, refMasks);
checkBestMode(md.pred[PRED_2NxnD], cuGeom.depth);
}
if (bVer)
{
+ refMasks[0] = splitRefs[0] | splitRefs[2]; /* 25% left */
+ refMasks[1] = allSplitRefs; /* 75% right */
md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom);
- checkInter_rd5_6(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N, bMergeOnly);
+ checkInter_rd5_6(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N, bMergeOnly, refMasks);
checkBestMode(md.pred[PRED_nLx2N], cuGeom.depth);
+ refMasks[0] = allSplitRefs; /* 75% left */
+ refMasks[1] = splitRefs[1] | splitRefs[3]; /* 25% right */
md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom);
- checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, bMergeOnly);
+ checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, bMergeOnly, refMasks);
checkBestMode(md.pred[PRED_nRx2N], cuGeom.depth);
}
}
@@ -1206,9 +1229,63 @@
checkBestMode(md.pred[PRED_SPLIT], depth);
+ /* determine which motion references the parent CU should search */
+ uint32_t refMask;
+ if (md.bestMode == &md.pred[PRED_SPLIT])
+ refMask = allSplitRefs;
+ else if (md.bestMode->cu.isIntra(0))
+ {
+ /* use 2Nx2N inter references */
+ CUData& cu = md.pred[PRED_2Nx2N].cu;
+ switch (cu.m_interDir[0])
+ {
+ case 1:
+ refMask = 1 << cu.m_refIdx[0][0];
+ break;
+ case 2:
+ refMask = 1 << (cu.m_refIdx[1][0] + 16);
+ break;
+ case 3:
+ refMask = 1 << cu.m_refIdx[0][0];
+ refMask |= 1 << (cu.m_refIdx[1][0] + 16);
+ break;
+ }
+ }
+ else if (md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N)
+ {
+ /* use best merge/inter 2Nx2N mode */
+ CUData& cu = md.bestMode->cu;
+ switch (cu.m_interDir[0])
+ {
+ case 1:
+ refMask = 1 << cu.m_refIdx[0][0];
+ break;
+ case 2:
+ refMask = 1 << (cu.m_refIdx[1][0] + 16);
+ break;
+ case 3:
+ refMask = 1 << cu.m_refIdx[0][0];
+ refMask |= 1 << (cu.m_refIdx[1][0] + 16);
+ break;
+ }
+ }
+ else
+ {
+ /* Else this CU has two inter parts */
+ Mode& m = *md.bestMode;
+ refMask = (1 << m.bestME[0][0].ref) | (1 << m.bestME[1][0].ref);
+ if (m_slice->m_sliceType == B_SLICE)
+ {
+ refMask |= 1 << (m.bestME[0][1].ref + 16);
+ refMask |= 1 << (m.bestME[1][1].ref + 16);
+ }
+ }
+
/* Copy best data to encData CTU and recon */
md.bestMode->cu.copyToPic(depth);
md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, parentCTU.m_cuAddr, cuGeom.absPartIdx);
+
+ return refMask;
}
/* sets md.bestMode if a valid merge candidate is found, else leaves it NULL */
@@ -1479,8 +1556,8 @@
}
}
}
-
- predInterSearch(interMode, cuGeom, false, m_bChromaSa8d);
+ uint32_t refMask[2] = { 0, 0 };
+ predInterSearch(interMode, cuGeom, false, m_bChromaSa8d, refMask);
/* predInterSearch sets interMode.sa8dBits */
const Yuv& fencYuv = *interMode.fencYuv;
@@ -1508,7 +1585,7 @@
}
}
-void Analysis::checkInter_rd5_6(Mode& interMode, const CUGeom& cuGeom, PartSize partSize, bool bMergeOnly)
+void Analysis::checkInter_rd5_6(Mode& interMode, const CUGeom& cuGeom, PartSize partSize, bool bMergeOnly, uint32_t refMask[2])
{
interMode.initCosts();
interMode.cu.setPartSizeSubParts(partSize);
@@ -1528,7 +1605,7 @@
}
}
- predInterSearch(interMode, cuGeom, bMergeOnly, true);
+ predInterSearch(interMode, cuGeom, bMergeOnly, true, refMask);
/* predInterSearch sets interMode.sa8dBits, but this is ignored */
encodeResAndCalcRdInterCU(interMode, cuGeom);
diff -r af259ac3d304 -r 7e7bb565c977 source/encoder/analysis.h
--- a/source/encoder/analysis.h Mon Mar 16 20:19:33 2015 -0500
+++ b/source/encoder/analysis.h Mon Mar 16 20:19:33 2015 -0500
@@ -114,7 +114,7 @@
/* full analysis for a P or B slice CU */
void compressInterCU_dist(const CUData& parentCTU, const CUGeom& cuGeom);
void compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom);
- void compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder);
+ uint32_t compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder);
/* measure merge and skip */
void checkMerge2Nx2N_rd0_4(Mode& skip, Mode& merge, const CUGeom& cuGeom);
@@ -122,7 +122,7 @@
/* measure inter options */
void checkInter_rd0_4(Mode& interMode, const CUGeom& cuGeom, PartSize partSize);
- void checkInter_rd5_6(Mode& interMode, const CUGeom& cuGeom, PartSize partSize, bool bMergeOnly);
+ void checkInter_rd5_6(Mode& interMode, const CUGeom& cuGeom, PartSize partSize, bool bMergeOnly, uint32_t refmask[2]);
void checkBidir2Nx2N(Mode& inter2Nx2N, Mode& bidir2Nx2N, const CUGeom& cuGeom);
diff -r af259ac3d304 -r 7e7bb565c977 source/encoder/search.cpp
--- a/source/encoder/search.cpp Mon Mar 16 20:19:33 2015 -0500
+++ b/source/encoder/search.cpp Mon Mar 16 20:19:33 2015 -0500
@@ -1977,7 +1977,7 @@
}
/* find the best inter prediction for each PU of specified mode */
-void Search::predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bMergeOnly, bool bChromaSA8D)
+void Search::predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bMergeOnly, bool bChromaSA8D, uint32_t refMasks[2])
{
ProfileCUScope(interMode.cu, motionEstimationElapsedTime, countMotionEstimate);
@@ -2124,10 +2124,15 @@
}
if (bDoUnidir)
{
+ uint32_t refMask = refMasks[puIdx] ? refMasks[puIdx] : (uint32_t)-1;
+
for (int list = 0; list < numPredDir; list++)
{
for (int ref = 0; ref < numRefIdx[list]; ref++)
{
+ if (!(refMask & (1 << ref)))
+ continue;
+
uint32_t bits = m_listSelBits[list] + MVP_IDX_BITS;
bits += getTUBits(ref, numRefIdx[list]);
@@ -2182,6 +2187,9 @@
bestME[list].bits = bits;
}
}
+
+ /* the second list ref bits start at bit 16 */
+ refMask >>= 16;
}
}
diff -r af259ac3d304 -r 7e7bb565c977 source/encoder/search.h
--- a/source/encoder/search.h Mon Mar 16 20:19:33 2015 -0500
+++ b/source/encoder/search.h Mon Mar 16 20:19:33 2015 -0500
@@ -301,7 +301,7 @@
void encodeIntraInInter(Mode& intraMode, const CUGeom& cuGeom);
// estimation inter prediction (non-skip)
- void predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bMergeOnly, bool bChroma);
+ void predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bMergeOnly, bool bChroma, uint32_t masks[2]);
// encode residual and compute rd-cost for inter mode
void encodeResAndCalcRdInterCU(Mode& interMode, const CUGeom& cuGeom);
More information about the x265-devel
mailing list