[x265] [PATCH] analysis: at RD 0/4 avoid motion references if not used by split blocks
ashok at multicorewareinc.com
ashok at multicorewareinc.com
Mon Apr 13 15:31:38 CEST 2015
# HG changeset patch
# User Ashok Kumar Mishra<ashok at multicorewareinc.com>
# Date 1428931239 -19800
# Mon Apr 13 18:50:39 2015 +0530
# Node ID c4e0725a3a687438eab8185a9f3fe38dab72ca43
# Parent 79f68b7eda3fb6ab2c25982c362159f9a07cb472
analysis: at RD 0/4 avoid motion references if not used by split blocks
diff -r 79f68b7eda3f -r c4e0725a3a68 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Sat Apr 11 23:57:37 2015 +0530
+++ b/source/encoder/analysis.cpp Mon Apr 13 18:50:39 2015 +0530
@@ -385,6 +385,8 @@
{
if (m_param->rdLevel <= 4)
{
+ uint32_t refMasks[2] = { 0, 0 };
+
switch (pmode.modes[task])
{
case PRED_INTRA:
@@ -396,33 +398,33 @@
break;
case PRED_2Nx2N:
- slave.checkInter_rd0_4(md.pred[PRED_2Nx2N], pmode.cuGeom, SIZE_2Nx2N);
+ slave.checkInter_rd0_4(md.pred[PRED_2Nx2N], pmode.cuGeom, SIZE_2Nx2N, refMasks);
if (m_slice->m_sliceType == B_SLICE)
slave.checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], pmode.cuGeom);
break;
case PRED_Nx2N:
- slave.checkInter_rd0_4(md.pred[PRED_Nx2N], pmode.cuGeom, SIZE_Nx2N);
+ slave.checkInter_rd0_4(md.pred[PRED_Nx2N], pmode.cuGeom, SIZE_Nx2N, refMasks);
break;
case PRED_2NxN:
- slave.checkInter_rd0_4(md.pred[PRED_2NxN], pmode.cuGeom, SIZE_2NxN);
+ slave.checkInter_rd0_4(md.pred[PRED_2NxN], pmode.cuGeom, SIZE_2NxN, refMasks);
break;
case PRED_2NxnU:
- slave.checkInter_rd0_4(md.pred[PRED_2NxnU], pmode.cuGeom, SIZE_2NxnU);
+ slave.checkInter_rd0_4(md.pred[PRED_2NxnU], pmode.cuGeom, SIZE_2NxnU, refMasks);
break;
case PRED_2NxnD:
- slave.checkInter_rd0_4(md.pred[PRED_2NxnD], pmode.cuGeom, SIZE_2NxnD);
+ slave.checkInter_rd0_4(md.pred[PRED_2NxnD], pmode.cuGeom, SIZE_2NxnD, refMasks);
break;
case PRED_nLx2N:
- slave.checkInter_rd0_4(md.pred[PRED_nLx2N], pmode.cuGeom, SIZE_nLx2N);
+ slave.checkInter_rd0_4(md.pred[PRED_nLx2N], pmode.cuGeom, SIZE_nLx2N, refMasks);
break;
case PRED_nRx2N:
- slave.checkInter_rd0_4(md.pred[PRED_nRx2N], pmode.cuGeom, SIZE_nRx2N);
+ slave.checkInter_rd0_4(md.pred[PRED_nRx2N], pmode.cuGeom, SIZE_nRx2N, refMasks);
break;
default:
@@ -741,7 +743,7 @@
md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr, cuGeom.absPartIdx);
}
-void Analysis::compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom)
+uint32_t Analysis::compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom)
{
uint32_t depth = cuGeom.depth;
uint32_t cuAddr = parentCTU.m_cuAddr;
@@ -751,7 +753,10 @@
bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
uint32_t minDepth = topSkipMinDepth(parentCTU, cuGeom);
+
bool earlyskip = false;
+ uint32_t splitRefs[4] = { 0, 0, 0, 0 };
+ /* Step 1. Evaluate Merge/Skip candidates for likely early-outs */
if (mightNotSplit && depth >= minDepth)
{
/* Compute Merge Cost */
@@ -761,6 +766,7 @@
if (m_param->rdLevel)
earlyskip = m_param->bEnableEarlySkip && md.bestMode && md.bestMode->cu.isSkipped(0); // TODO: sa8d threshold per depth
}
+
bool bNoSplit = false;
if (md.bestMode)
{
@@ -769,6 +775,7 @@
bNoSplit = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode);
}
+ /* Step 2. Evaluate each of the 4 split sub-blocks in series */
if (mightSplit && !bNoSplit)
{
Mode* splitPred = &md.pred[PRED_SPLIT];
@@ -788,7 +795,7 @@
{
m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.absPartIdx);
m_rqt[nextDepth].cur.load(*nextContext);
- compressInterCU_rd0_4(parentCTU, childGeom);
+ splitRefs[subPartIdx] = compressInterCU_rd0_4(parentCTU, childGeom);
// Save best CU and pred data for this sub CU
splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
@@ -814,12 +821,20 @@
splitPred->sa8dCost = m_rdCost.calcRdSADCost(splitPred->distortion, splitPred->sa8dBits);
}
+ /* Split CUs
+ * 0 1
+ * 2 3 */
+ uint32_t allSplitRefs = splitRefs[0] | splitRefs[1] | splitRefs[2] | splitRefs[3];
+ /* Step 3. Evaluate ME (2Nx2N, rect, amp) and intra modes at current depth */
if (mightNotSplit && depth >= minDepth)
{
if (!earlyskip)
{
+ uint32_t refMasks[2];
+ refMasks[0] = allSplitRefs;
+
md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom);
- checkInter_rd0_4(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N);
+ checkInter_rd0_4(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks);
if (m_slice->m_sliceType == B_SLICE)
{
@@ -830,13 +845,17 @@
Mode *bestInter = &md.pred[PRED_2Nx2N];
if (m_param->bEnableRectInter)
{
+ refMasks[0] = splitRefs[0] | splitRefs[2]; /* left */
+ refMasks[1] = splitRefs[1] | splitRefs[3]; /* right */
md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom);
- checkInter_rd0_4(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N);
+ checkInter_rd0_4(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N, refMasks);
if (md.pred[PRED_Nx2N].sa8dCost < bestInter->sa8dCost)
bestInter = &md.pred[PRED_Nx2N];
+ refMasks[0] = splitRefs[0] | splitRefs[1]; /* top */
+ refMasks[1] = splitRefs[2] | splitRefs[3]; /* bot */
md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom);
- checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN);
+ checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks);
if (md.pred[PRED_2NxN].sa8dCost < bestInter->sa8dCost)
bestInter = &md.pred[PRED_2NxN];
}
@@ -857,29 +876,38 @@
if (bHor)
{
+ refMasks[0] = splitRefs[0] | splitRefs[1]; /* 25% top */
+ refMasks[1] = allSplitRefs; /* 75% bot */
md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom);
- checkInter_rd0_4(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU);
+ checkInter_rd0_4(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU, refMasks);
if (md.pred[PRED_2NxnU].sa8dCost < bestInter->sa8dCost)
bestInter = &md.pred[PRED_2NxnU];
+ refMasks[0] = allSplitRefs; /* 75% top */
+ refMasks[1] = splitRefs[2] | splitRefs[3]; /* 25% bot */
md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom);
- checkInter_rd0_4(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD);
+ checkInter_rd0_4(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks);
if (md.pred[PRED_2NxnD].sa8dCost < bestInter->sa8dCost)
bestInter = &md.pred[PRED_2NxnD];
}
if (bVer)
{
+ refMasks[0] = splitRefs[0] | splitRefs[2]; /* 25% left */
+ refMasks[1] = allSplitRefs; /* 75% right */
md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom);
- checkInter_rd0_4(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N);
+ checkInter_rd0_4(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N, refMasks);
if (md.pred[PRED_nLx2N].sa8dCost < bestInter->sa8dCost)
bestInter = &md.pred[PRED_nLx2N];
+ refMasks[0] = allSplitRefs; /* 75% left */
+ refMasks[1] = splitRefs[1] | splitRefs[3]; /* 25% right */
md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom);
- checkInter_rd0_4(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N);
+ checkInter_rd0_4(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks);
if (md.pred[PRED_nRx2N].sa8dCost < bestInter->sa8dCost)
bestInter = &md.pred[PRED_nRx2N];
}
}
+
bool bTryIntra = m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames;
if (m_param->rdLevel >= 3)
{
@@ -997,6 +1025,7 @@
if (mightSplit)
addSplitFlagCost(*md.bestMode, cuGeom.depth);
}
+
if (mightNotSplit && md.bestMode)
{
/* early-out statistics */
@@ -1006,6 +1035,7 @@
cuStat.count[depth] += 1;
cuStat.avgCost[depth] = (temp + md.bestMode->rdCost) / cuStat.count[depth];
}
+
if (mightSplit && !bNoSplit)
{
Mode* splitPred = &md.pred[PRED_SPLIT];
@@ -1015,14 +1045,68 @@
checkBestMode(*splitPred, cuGeom.depth);
else if (splitPred->sa8dCost < md.bestMode->sa8dCost)
md.bestMode = splitPred;
+
checkDQPForSplitPred(md.bestMode->cu, cuGeom);
}
+ /* determine which motion references the parent CU should search */
+ uint32_t refMask;
+ if (md.bestMode == &md.pred[PRED_SPLIT])
+ refMask = allSplitRefs;
+ else if (md.bestMode->cu.isIntra(0))
+ {
+ /* use 2Nx2N inter references */
+ CUData& cu = md.pred[PRED_2Nx2N].cu;
+ switch (cu.m_interDir[0])
+ {
+ case 1:
+ refMask = 1 << cu.m_refIdx[0][0];
+ break;
+ case 2:
+ refMask = 1 << (cu.m_refIdx[1][0] + 16);
+ break;
+ case 3:
+ refMask = 1 << cu.m_refIdx[0][0];
+ refMask |= 1 << (cu.m_refIdx[1][0] + 16);
+ break;
+ }
+ }
+ else
+ {
+ /* use best merge/inter mode */
+ CUData& cu = md.bestMode->cu;
+ PartSize partSize = (PartSize)cu.m_partSize[0];
+ uint32_t numPU = (partSize == SIZE_2Nx2N) ? 1 : 2;
+ uint32_t depth = cu.m_cuDepth[0];
+ uint32_t puOffset = (g_puOffset[uint32_t(partSize)] << (g_unitSizeDepth - depth) * 2) >> 4;
+ refMask = 0;
+ for (uint32_t puIdx = 0, subPartIdx = 0; puIdx < numPU; puIdx++, subPartIdx += puOffset)
+ {
+ uint32_t interDir = cu.m_interDir[subPartIdx];
+ switch (interDir)
+ {
+ case 1:
+ refMask |= 1 << cu.m_refIdx[0][subPartIdx];
+ break;
+ case 2:
+ refMask |= 1 << (cu.m_refIdx[1][subPartIdx] + 16);
+ break;
+ case 3:
+ refMask |= 1 << cu.m_refIdx[0][subPartIdx];
+ refMask |= 1 << (cu.m_refIdx[1][subPartIdx] + 16);
+ break;
+ }
+ }
+ }
+
/* Copy best data to encData CTU and recon */
X265_CHECK(md.bestMode->ok(), "best mode is not ok");
md.bestMode->cu.copyToPic(depth);
md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr, cuGeom.absPartIdx);
+
+ return refMask;
}
+
void Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder)
{
uint32_t depth = cuGeom.depth;
@@ -1450,7 +1534,7 @@
}
}
-void Analysis::checkInter_rd0_4(Mode& interMode, const CUGeom& cuGeom, PartSize partSize)
+void Analysis::checkInter_rd0_4(Mode& interMode, const CUGeom& cuGeom, PartSize partSize, uint32_t refMask[2])
{
interMode.initCosts();
interMode.cu.setPartSizeSubParts(partSize);
@@ -1470,7 +1554,7 @@
}
}
- predInterSearch(interMode, cuGeom, false, m_bChromaSa8d);
+ predInterSearch(interMode, cuGeom, false, m_bChromaSa8d, refMask);
/* predInterSearch sets interMode.sa8dBits */
const Yuv& fencYuv = *interMode.fencYuv;
@@ -1518,7 +1602,8 @@
}
}
- predInterSearch(interMode, cuGeom, bMergeOnly, true);
+ uint32_t refMask[2] = { 0, 0 };
+ predInterSearch(interMode, cuGeom, bMergeOnly, true, refMask);
/* predInterSearch sets interMode.sa8dBits, but this is ignored */
encodeResAndCalcRdInterCU(interMode, cuGeom);
diff -r 79f68b7eda3f -r c4e0725a3a68 source/encoder/analysis.h
--- a/source/encoder/analysis.h Sat Apr 11 23:57:37 2015 +0530
+++ b/source/encoder/analysis.h Mon Apr 13 18:50:39 2015 +0530
@@ -113,7 +113,7 @@
/* full analysis for a P or B slice CU */
void compressInterCU_dist(const CUData& parentCTU, const CUGeom& cuGeom);
- void compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom);
+ uint32_t compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom);
void compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder);
/* measure merge and skip */
@@ -121,7 +121,7 @@
void checkMerge2Nx2N_rd5_6(Mode& skip, Mode& merge, const CUGeom& cuGeom, bool isSkipMode);
/* measure inter options */
- void checkInter_rd0_4(Mode& interMode, const CUGeom& cuGeom, PartSize partSize);
+ void checkInter_rd0_4(Mode& interMode, const CUGeom& cuGeom, PartSize partSize, uint32_t refmask[2]);
void checkInter_rd5_6(Mode& interMode, const CUGeom& cuGeom, PartSize partSize, bool bMergeOnly);
void checkBidir2Nx2N(Mode& inter2Nx2N, Mode& bidir2Nx2N, const CUGeom& cuGeom);
diff -r 79f68b7eda3f -r c4e0725a3a68 source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp Sat Apr 11 23:57:37 2015 +0530
+++ b/source/encoder/entropy.cpp Mon Apr 13 18:50:39 2015 +0530
@@ -35,8 +35,6 @@
#define CU_DQP_EG_k 0 // exp-golomb order
#define START_VALUE 8 // start value for dpcm mode
-static const uint32_t g_puOffset[8] = { 0, 8, 4, 4, 2, 10, 1, 5 };
-
namespace x265 {
Entropy::Entropy()
diff -r 79f68b7eda3f -r c4e0725a3a68 source/encoder/entropy.h
--- a/source/encoder/entropy.h Sat Apr 11 23:57:37 2015 +0530
+++ b/source/encoder/entropy.h Mon Apr 13 18:50:39 2015 +0530
@@ -38,6 +38,8 @@
struct EstBitsSbac;
class ScalingList;
+static const uint32_t g_puOffset[8] = { 0, 8, 4, 4, 2, 10, 1, 5 };
+
enum SplitType
{
DONT_SPLIT = 0,
diff -r 79f68b7eda3f -r c4e0725a3a68 source/encoder/search.cpp
--- a/source/encoder/search.cpp Sat Apr 11 23:57:37 2015 +0530
+++ b/source/encoder/search.cpp Mon Apr 13 18:50:39 2015 +0530
@@ -1977,7 +1977,7 @@
}
/* find the best inter prediction for each PU of specified mode */
-void Search::predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bMergeOnly, bool bChromaSA8D)
+void Search::predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bMergeOnly, bool bChromaSA8D, uint32_t refMasks[2])
{
ProfileCUScope(interMode.cu, motionEstimationElapsedTime, countMotionEstimate);
@@ -2124,10 +2124,15 @@
}
if (bDoUnidir)
{
+ uint32_t refMask = refMasks[puIdx] ? refMasks[puIdx] : (uint32_t)-1;
+
for (int list = 0; list < numPredDir; list++)
{
for (int ref = 0; ref < numRefIdx[list]; ref++)
{
+ if (!(refMask & (1 << ref)))
+ continue;
+
uint32_t bits = m_listSelBits[list] + MVP_IDX_BITS;
bits += getTUBits(ref, numRefIdx[list]);
@@ -2182,6 +2187,8 @@
bestME[list].bits = bits;
}
}
+ /* the second list ref bits start at bit 16 */
+ refMask >>= 16;
}
}
diff -r 79f68b7eda3f -r c4e0725a3a68 source/encoder/search.h
--- a/source/encoder/search.h Sat Apr 11 23:57:37 2015 +0530
+++ b/source/encoder/search.h Mon Apr 13 18:50:39 2015 +0530
@@ -301,7 +301,7 @@
void encodeIntraInInter(Mode& intraMode, const CUGeom& cuGeom);
// estimation inter prediction (non-skip)
- void predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bMergeOnly, bool bChroma);
+ void predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bMergeOnly, bool bChroma, uint32_t masks[2]);
// encode residual and compute rd-cost for inter mode
void encodeResAndCalcRdInterCU(Mode& interMode, const CUGeom& cuGeom);
More information about the x265-devel
mailing list