[x265] [PATCH] analysis: RDO based BIDIR decisions
Steve Borho
steve at borho.org
Sun Nov 9 07:44:16 CET 2014
# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1415484653 21600
# Sat Nov 08 16:10:53 2014 -0600
# Node ID 3bb94141e18be23d86a35b9fd3a3f800b9a2fb54
# Parent 1e04e178a349ff3a27ed0207cca7bdd9f0db4ff8
analysis: RDO based BIDIR decisions
diff -r 1e04e178a349 -r 3bb94141e18b source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Sun Nov 09 00:30:09 2014 -0600
+++ b/source/encoder/analysis.cpp Sat Nov 08 16:10:53 2014 -0600
@@ -399,6 +399,8 @@
case 1:
slave->checkInter_rd0_4(md.pred[PRED_2Nx2N], *m_curGeom, SIZE_2Nx2N);
+ if (m_slice->m_sliceType == B_SLICE)
+ slave->checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], *m_curGeom);
break;
case 2:
@@ -449,6 +451,13 @@
case 1:
slave->checkInter_rd5_6(md.pred[PRED_2Nx2N], *m_curGeom, SIZE_2Nx2N, false);
+ md.pred[PRED_BIDIR].rdCost = MAX_INT64;
+ if (m_slice->m_sliceType == B_SLICE)
+ {
+ slave->checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], *m_curGeom);
+ if (md.pred[PRED_BIDIR].sa8dCost < MAX_INT64)
+ encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR], *m_curGeom);
+ }
break;
case 2:
@@ -504,6 +513,7 @@
/* Initialize all prediction CUs based on parentCTU */
md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom);
+ md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom);
md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom);
md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom);
if (m_param->bEnableRectInter)
@@ -595,16 +605,22 @@
if (m_param->rdLevel > 2)
{
- /* encode best inter */
+ /* RD selection between merge, inter, bidir and intra */
for (uint32_t puIdx = 0; puIdx < bestInter->cu.getNumPartInter(); puIdx++)
{
prepMotionCompensation(bestInter->cu, cuGeom, puIdx);
motionCompensation(bestInter->predYuv, false, true);
}
encodeResAndCalcRdInterCU(*bestInter, cuGeom);
+ checkBestMode(*bestInter, depth);
- /* RD selection between merge, inter and intra */
- checkBestMode(*bestInter, depth);
+ /* If BIDIR is available and within 17/16 of best inter option, choose by RDO */
+ if (m_slice->m_sliceType == B_SLICE &&
+ md.pred[PRED_BIDIR].sa8dCost * 16 <= bestInter->sa8dCost * 17)
+ {
+ encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR], cuGeom);
+ checkBestMode(md.pred[PRED_BIDIR], depth);
+ }
#if MATCH_NON_PMODE
if ((bTryIntra && md.bestMode->cu.getQtRootCbf(0)) || md.bestMode->sa8dCost == MAX_INT64)
@@ -618,6 +634,9 @@
if (!md.bestMode || bestInter->sa8dCost < md.bestMode->sa8dCost)
md.bestMode = bestInter;
+ if (m_slice->m_sliceType == B_SLICE && md.pred[PRED_BIDIR].sa8dCost < md.bestMode->sa8dCost)
+ md.bestMode = &md.pred[PRED_BIDIR];
+
if (bTryIntra && md.pred[PRED_INTRA].sa8dCost < md.bestMode->sa8dCost)
{
md.bestMode = &md.pred[PRED_INTRA];
@@ -641,6 +660,7 @@
m_modeCompletionEvent.wait();
checkBestMode(md.pred[PRED_2Nx2N], depth);
+ checkBestMode(md.pred[PRED_BIDIR], depth);
if (m_param->bEnableRectInter)
{
@@ -790,8 +810,14 @@
{
md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom);
checkInter_rd0_4(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N);
+
+ if (m_slice->m_sliceType == B_SLICE)
+ {
+ md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom);
+ checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], cuGeom);
+ }
+
Mode *bestInter = &md.pred[PRED_2Nx2N];
-
if (m_param->bEnableRectInter)
{
md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom);
@@ -853,11 +879,18 @@
prepMotionCompensation(bestInter->cu, cuGeom, puIdx);
motionCompensation(bestInter->predYuv, false, true);
}
+ encodeResAndCalcRdInterCU(*bestInter, cuGeom);
+ checkBestMode(*bestInter, depth);
- encodeResAndCalcRdInterCU(*bestInter, cuGeom);
-
- if (!md.bestMode || bestInter->rdCost < md.bestMode->rdCost)
- md.bestMode = bestInter;
+ /* If BIDIR is available and within 17/16 of best inter option, choose by RDO */
+ if (m_slice->m_sliceType == B_SLICE)
+ {
+ if (md.pred[PRED_BIDIR].sa8dCost * 16 <= bestInter->sa8dCost * 17)
+ {
+ encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR], cuGeom);
+ checkBestMode(md.pred[PRED_BIDIR], depth);
+ }
+ }
if ((bTryIntra && md.bestMode->cu.getQtRootCbf(0)) ||
md.bestMode->sa8dCost == MAX_INT64)
@@ -865,16 +898,19 @@
md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
checkIntraInInter(md.pred[PRED_INTRA], cuGeom);
encodeIntraInInter(md.pred[PRED_INTRA], cuGeom);
- if (md.pred[PRED_INTRA].rdCost < md.bestMode->rdCost)
- md.bestMode = &md.pred[PRED_INTRA];
+ checkBestMode(md.pred[PRED_INTRA], depth);
}
}
else
{
- /* SA8D choice between merge/skip, inter, and intra */
+ /* SA8D choice between merge/skip, inter, bidir, and intra */
if (!md.bestMode || bestInter->sa8dCost < md.bestMode->sa8dCost)
md.bestMode = bestInter;
+ if (m_slice->m_sliceType == B_SLICE &&
+ md.pred[PRED_BIDIR].sa8dCost < md.bestMode->sa8dCost)
+ md.bestMode = &md.pred[PRED_BIDIR];
+
if (bTryIntra || md.bestMode->sa8dCost == MAX_INT64)
{
md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
@@ -1052,9 +1088,19 @@
checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, false);
checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
+ if (m_slice->m_sliceType == B_SLICE)
+ {
+ md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom);
+ checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], cuGeom);
+ if (md.pred[PRED_BIDIR].sa8dCost < MAX_INT64)
+ {
+ encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR], cuGeom);
+ checkBestMode(md.pred[PRED_BIDIR], cuGeom.depth);
+ }
+ }
+
if (m_param->bEnableRectInter)
{
- // Nx2N rect
if (!m_param->bEnableCbfFastMode || md.bestMode->cu.getQtRootCbf(0))
{
md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom);
@@ -1486,6 +1532,110 @@
}
}
+void Analysis::checkBidir2Nx2N(Mode& inter2Nx2N, Mode& bidir2Nx2N, const CUGeom& cuGeom)
+{
+ CUData& cu = bidir2Nx2N.cu;
+
+ if (cu.isBipredRestriction() || inter2Nx2N.bestME[0].cost == MAX_UINT || inter2Nx2N.bestME[1].cost == MAX_UINT)
+ {
+ bidir2Nx2N.sa8dCost = MAX_INT64;
+ bidir2Nx2N.rdCost = MAX_INT64;
+ return;
+ }
+
+ const Yuv& fencYuv = *bidir2Nx2N.fencYuv;
+ MV mvzero(0, 0);
+ int partEnum = cuGeom.log2CUSize - 2;
+
+ bidir2Nx2N.bestME[0] = inter2Nx2N.bestME[0];
+ bidir2Nx2N.bestME[1] = inter2Nx2N.bestME[1];
+ int ref0 = bidir2Nx2N.bestME[0].ref;
+ MV mvp0 = bidir2Nx2N.bestME[0].mvp;
+ int mvpIdx0 = bidir2Nx2N.bestME[0].mvpIdx;
+ int ref1 = bidir2Nx2N.bestME[1].ref;
+ MV mvp1 = bidir2Nx2N.bestME[1].mvp;
+ int mvpIdx1 = bidir2Nx2N.bestME[1].mvpIdx;
+
+ bidir2Nx2N.initCosts();
+ cu.setPartSizeSubParts(SIZE_2Nx2N);
+ cu.setPredModeSubParts(MODE_INTER);
+ cu.setPUInterDir(3, 0, 0);
+ cu.setPURefIdx(0, (char)ref0, 0, 0);
+ cu.setPURefIdx(1, (char)ref1, 0, 0);
+ cu.m_mvpIdx[0][0] = (uint8_t)mvpIdx0;
+ cu.m_mvpIdx[1][0] = (uint8_t)mvpIdx1;
+ cu.m_mergeFlag[0] = 0;
+
+ /* Estimate cost of BIDIR using best 2Nx2N L0 and L1 motion vectors */
+ cu.setPUMv(0, bidir2Nx2N.bestME[0].mv, 0, 0);
+ cu.m_mvd[0][0] = bidir2Nx2N.bestME[0].mv - mvp0;
+
+ cu.setPUMv(1, bidir2Nx2N.bestME[1].mv, 0, 0);
+ cu.m_mvd[1][0] = bidir2Nx2N.bestME[1].mv - mvp1;
+
+ prepMotionCompensation(cu, cuGeom, 0);
+ motionCompensation(bidir2Nx2N.predYuv, true, true);
+
+ int sa8d = primitives.sa8d[partEnum](fencYuv.m_buf[0], fencYuv.m_size, bidir2Nx2N.predYuv.m_buf[0], bidir2Nx2N.predYuv.m_size);
+ bidir2Nx2N.sa8dBits = bidir2Nx2N.bestME[0].bits + bidir2Nx2N.bestME[1].bits + m_listSelBits[2] - (m_listSelBits[0] + m_listSelBits[1]);
+ bidir2Nx2N.sa8dCost = sa8d + m_rdCost.getCost(bidir2Nx2N.sa8dBits);
+
+ bool bTryZero = bidir2Nx2N.bestME[0].mv.notZero() || bidir2Nx2N.bestME[1].mv.notZero();
+ if (bTryZero)
+ {
+ /* Do not try zero MV if unidir motion predictors are beyond
+ * valid search area */
+ MV mvmin, mvmax;
+ int merange = X265_MAX(m_param->sourceWidth, m_param->sourceHeight);
+ setSearchRange(cu, mvzero, merange, mvmin, mvmax);
+ mvmax.y += 2; // there is some pad for subpel refine
+ mvmin <<= 2;
+ mvmax <<= 2;
+
+ bTryZero &= bidir2Nx2N.bestME[0].mvp.checkRange(mvmin, mvmax);
+ bTryZero &= bidir2Nx2N.bestME[1].mvp.checkRange(mvmin, mvmax);
+ }
+ if (bTryZero)
+ {
+ /* Estimate cost of BIDIR using coincident blocks */
+ Yuv& tmpPredYuv = m_rqt[cuGeom.depth].tmpPredYuv;
+ pixel *fref0 = m_slice->m_mref[0][ref0].getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx);
+ pixel *fref1 = m_slice->m_mref[1][ref1].getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx);
+ intptr_t refStride = m_slice->m_mref[0][0].lumaStride;
+
+ primitives.pixelavg_pp[partEnum](tmpPredYuv.m_buf[0], tmpPredYuv.m_size, fref0, refStride, fref1, refStride, 32);
+ int sa8dCost = primitives.sa8d[partEnum](fencYuv.m_buf[0], fencYuv.m_size, tmpPredYuv.m_buf[0], tmpPredYuv.m_size);
+
+ uint32_t bits0 = bidir2Nx2N.bestME[0].bits - m_me.bitcost(bidir2Nx2N.bestME[0].mv, mvp0) + m_me.bitcost(mvzero, mvp0);
+ uint32_t bits1 = bidir2Nx2N.bestME[1].bits - m_me.bitcost(bidir2Nx2N.bestME[1].mv, mvp1) + m_me.bitcost(mvzero, mvp1);
+ uint32_t zcost = sa8dCost + m_rdCost.getCost(bits0) + m_rdCost.getCost(bits1);
+
+ /* refine MVP selection for zero mv, updates: mvp, mvpidx, bits, cost */
+ checkBestMVP(inter2Nx2N.amvpCand[0][ref0], mvzero, mvp0, mvpIdx0, bits0, zcost);
+ checkBestMVP(inter2Nx2N.amvpCand[1][ref1], mvzero, mvp1, mvpIdx1, bits1, zcost);
+
+ uint32_t zbits = bits0 + bits1 + m_listSelBits[2] - (m_listSelBits[0] + m_listSelBits[1]);
+ zcost = sa8dCost + m_rdCost.getCost(zbits);
+
+ if (zcost < bidir2Nx2N.sa8dCost)
+ {
+ bidir2Nx2N.sa8dBits = zbits;
+ bidir2Nx2N.sa8dCost = zcost;
+
+ cu.setPUMv(0, mvzero, 0, 0);
+ cu.m_mvd[0][0] = mvzero - mvp0;
+ cu.m_mvpIdx[0][0] = (uint8_t)mvpIdx0;
+
+ cu.setPUMv(1, mvzero, 0, 0);
+ cu.m_mvd[1][0] = mvzero - mvp1;
+ cu.m_mvpIdx[1][0] = (uint8_t)mvpIdx1;
+
+ prepMotionCompensation(cu, cuGeom, 0);
+ motionCompensation(bidir2Nx2N.predYuv, true, true);
+ }
+ }
+}
+
void Analysis::encodeResidue(const CUData& ctu, const CUGeom& cuGeom)
{
if (cuGeom.depth < ctu.m_cuDepth[cuGeom.encodeIdx] && cuGeom.depth < g_maxCUDepth)
diff -r 1e04e178a349 -r 3bb94141e18b source/encoder/analysis.h
--- a/source/encoder/analysis.h Sun Nov 09 00:30:09 2014 -0600
+++ b/source/encoder/analysis.h Sat Nov 08 16:10:53 2014 -0600
@@ -49,6 +49,7 @@
PRED_SKIP,
PRED_INTRA,
PRED_2Nx2N,
+ PRED_BIDIR,
PRED_Nx2N,
PRED_2NxN,
PRED_SPLIT,
@@ -105,6 +106,8 @@
void checkInter_rd0_4(Mode& interMode, const CUGeom& cuGeom, PartSize partSize);
void checkInter_rd5_6(Mode& interMode, const CUGeom& cuGeom, PartSize partSize, bool bMergeOnly);
+ void checkBidir2Nx2N(Mode& inter2Nx2N, Mode& bidir2Nx2N, const CUGeom& cuGeom);
+
/* encode current bestMode losslessly, pick best RD cost */
void tryLossless(const CUGeom& cuGeom);
diff -r 1e04e178a349 -r 3bb94141e18b source/encoder/search.cpp
--- a/source/encoder/search.cpp Sun Nov 09 00:30:09 2014 -0600
+++ b/source/encoder/search.cpp Sat Nov 08 16:10:53 2014 -0600
@@ -1984,10 +1984,6 @@
}
}
- MotionData bidir[2];
- uint32_t bidirCost = MAX_UINT;
- int bidirBits = 0;
-
interMode.bestME[0].cost = MAX_UINT;
interMode.bestME[1].cost = MAX_UINT;
@@ -2162,12 +2158,18 @@
}
/* Bi-directional prediction */
- if (slice->isInterB() && !cu.isBipredRestriction() && interMode.bestME[0].cost != MAX_UINT && interMode.bestME[1].cost != MAX_UINT)
+ MotionData bidir[2];
+ uint32_t bidirCost = MAX_UINT;
+ int bidirBits = 0;
+
+ if (slice->isInterB() && !cu.isBipredRestriction() && /* biprediction is possible for this PU */
+ cu.m_partSize[m_puAbsPartIdx] != SIZE_2Nx2N && /* 2Nx2N biprediction is handled elsewhere */
+ interMode.bestME[0].cost != MAX_UINT && interMode.bestME[1].cost != MAX_UINT)
{
bidir[0] = interMode.bestME[0];
bidir[1] = interMode.bestME[1];
- /* Generate reference subpels */
+ /* Generate reference subpels. TODO: This ignores reference slice weights */
PicYuv* refPic0 = slice->m_refPicList[0][interMode.bestME[0].ref]->m_reconPic;
PicYuv* refPic1 = slice->m_refPicList[1][interMode.bestME[1].ref]->m_reconPic;
Yuv* bidirYuv = m_rqt[cuGeom.depth].bidirPredYuv;
@@ -2235,11 +2237,6 @@
bidirBits = bits0 + bits1 + m_listSelBits[2] - (m_listSelBits[0] + m_listSelBits[1]);
}
}
-
- /* Ugly hack - since BIDIR is not yet an RD decision, add a penalty
- * if psy-rd is enabled */
- if (m_rdCost.m_psyRd)
- bidirCost += (m_rdCost.m_psyRd * bidirCost) >> 8;
}
/* select best option and store into CU */
More information about the x265-devel
mailing list