[x265] [PATCH 2 of 2] no-rdo: Move residual encoding at depth 0
deepthidevaki at multicorewareinc.com
deepthidevaki at multicorewareinc.com
Fri Nov 22 12:13:52 CET 2013
# HG changeset patch
# User Deepthi Devaki <deepthidevaki at multicorewareinc.com>
# Date 1385118778 -19800
# Node ID aaa803dfbe6f334db45e943dbfb40a7d4ac38142
# Parent 883d9279fde14aad2f2042c5aaaa2c98700a9b8b
no-rdo: Move residual encoding at depth 0.
During mode decision residual encoding done with no-rdoq. At depth 0, on the final best modes residual encoding is done with rdoq.
Intra Resisual encoding is not done at depth 0. Hence the changes are disabled with a macro NORESENC
diff -r 883d9279fde1 -r aaa803dfbe6f source/Lib/TLibEncoder/TEncCu.h
--- a/source/Lib/TLibEncoder/TEncCu.h Fri Nov 22 16:41:56 2013 +0530
+++ b/source/Lib/TLibEncoder/TEncCu.h Fri Nov 22 16:42:58 2013 +0530
@@ -191,6 +191,8 @@
bool &bTestMergeAMP_Hor, bool &bTestMergeAMP_Ver);
void xFillPCMBuffer(TComDataCU* outCU, TComYuv* origYuv);
+
+ void xEncodeResidue(TComDataCU* lcu, TComDataCU* cu, uint32_t absPartIdx, UChar depth, uint32_t partIndex = 0);
};
}
//! \}
diff -r 883d9279fde1 -r aaa803dfbe6f source/Lib/TLibEncoder/TEncSearch.h
--- a/source/Lib/TLibEncoder/TEncSearch.h Fri Nov 22 16:41:56 2013 +0530
+++ b/source/Lib/TLibEncoder/TEncSearch.h Fri Nov 22 16:42:58 2013 +0530
@@ -186,6 +186,11 @@
uint32_t xSymbolBitsInter(TComDataCU* cu);
+ void xEstimateResidualQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t absTUPartIdx, TShortYUV* resiYuv, uint32_t depth,
+ uint64_t &rdCost, uint32_t &outBits, uint32_t &outDist, uint32_t *puiZeroDist, bool curUseRDOQ = true);
+
+ void xSetResidualQTData(TComDataCU* cu, uint32_t absPartIdx, uint32_t absTUPartIdx, TShortYUV* resiYuv, uint32_t depth, bool bSpatial);
+
protected:
// --------------------------------------------------------------------------------------------
@@ -249,9 +254,6 @@
// -------------------------------------------------------------------------------------------------------------------
void xEncodeResidualQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool bSubdivAndCbf, TextType ttype);
- void xEstimateResidualQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t absTUPartIdx, TShortYUV* resiYuv, uint32_t depth,
- uint64_t &rdCost, uint32_t &outBits, uint32_t &outDist, uint32_t *puiZeroDist, bool curUseRDOQ = true);
- void xSetResidualQTData(TComDataCU* cu, uint32_t absPartIdx, uint32_t absTUPartIdx, TShortYUV* resiYuv, uint32_t depth, bool bSpatial);
void setWpScalingDistParam(TComDataCU* cu, int refIdx, int picList);
};
diff -r 883d9279fde1 -r aaa803dfbe6f source/encoder/compress.cpp
--- a/source/encoder/compress.cpp Fri Nov 22 16:41:56 2013 +0530
+++ b/source/encoder/compress.cpp Fri Nov 22 16:42:58 2013 +0530
@@ -27,6 +27,7 @@
/* Lambda Partition Select adjusts the threshold value for Early Exit in No-RDO flow */
#define LAMBDA_PARTITION_SELECT 0.9
#define EARLY_EXIT 1
+#define NORESENC 0
using namespace x265;
@@ -137,7 +138,7 @@
// Filtered and Unfiltered refAbove and refLeft pointing to above and left.
above = aboveScale;
left = leftScale;
- aboveFiltered = aboveScale;
+ aboveFiltered = aboveScale;
leftFiltered = leftScale;
}
@@ -303,7 +304,11 @@
m_tmpRecoYuv[depth] = yuv;
//Encode with residue
+#if NORESENC
+ m_search->estimateRDInterCU(outTempCU, m_origYuv[depth], bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], false, false);
+#else
m_search->estimateRDInterCU(outTempCU, m_origYuv[depth], bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], false);
+#endif
if (outTempCU->m_totalCost < outBestCU->m_totalCost) //Choose best from no-residue mode and residue mode
{
@@ -458,9 +463,13 @@
m_search->motionCompensation(outBestCU, m_bestPredYuv[depth], REF_PIC_LIST_X, partIdx, false, true);
}
+#if NORESENC
+ m_search->estimateRDInterCU(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth],
+ m_bestResiYuv[depth], m_bestRecoYuv[depth], false, false);
+#else
m_search->estimateRDInterCU(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth],
m_bestResiYuv[depth], m_bestRecoYuv[depth], false);
-
+#endif
if (m_bestMergeCU[depth]->m_totalCost < outBestCU->m_totalCost)
{
@@ -474,7 +483,8 @@
m_bestMergeRecoYuv[depth] = tempYuv;
}
- /* Check for Intra in inter frames only if its a P-slice*/
+#if !NORESENC
+ /*Check for Intra in inter frames only if its a P-slice*/
if (outBestCU->getSlice()->getSliceType() == P_SLICE)
{
/*compute intra cost */
@@ -498,6 +508,7 @@
}
}
}
+#endif // if !NORESENC
}
else
{
@@ -537,7 +548,7 @@
if (bSubBranch && bTrySplitDQP && depth < g_maxCUDepth - g_addCUDepth)
{
#if EARLY_EXIT // turn ON this to enable early exit
- // early exit when the RD cost of best mode at depth n is less than the sum of avgerage of RD cost of the neighbour
+ // early exit when the RD cost of best mode at depth n is less than the sum of avgerage of RD cost of the neighbour
// CU's(above, aboveleft, aboveright, left, colocated) and avg cost of that CU at depth "n" with weightage for each quantity
if (outBestCU != 0)
{
@@ -628,7 +639,12 @@
#endif // if EARLY_EXIT
/* Adding costs from best SUbCUs */
outTempCU->copyPartFrom(subBestPartCU, nextDepth_partIndex, nextDepth, true); // Keep best part data to current temporary data.
+#if !NORESENC
xCopyYuv2Tmp(subBestPartCU->getTotalNumPart() * nextDepth_partIndex, nextDepth);
+#endif
+#if NORESENC
+ m_bestPredYuv[nextDepth]->copyToPartYuv(m_tmpPredYuv[depth], subBestPartCU->getTotalNumPart() * nextDepth_partIndex);
+#endif
}
else if (bInSlice)
{
@@ -727,17 +743,29 @@
if (outTempCU->m_totalCost < outBestCU->m_totalCost)
{
outBestCU = outTempCU;
+#if !NORESENC
tempYuv = m_tmpRecoYuv[depth];
m_tmpRecoYuv[depth] = m_bestRecoYuv[depth];
m_bestRecoYuv[depth] = tempYuv;
+#else
+ tempYuv = m_tmpPredYuv[depth];
+ m_tmpPredYuv[depth] = m_bestPredYuv[depth];
+ m_bestPredYuv[depth] = tempYuv;
+#endif
}
}
else
{
outBestCU = outTempCU;
+#if !NORESENC
tempYuv = m_tmpRecoYuv[depth];
m_tmpRecoYuv[depth] = m_bestRecoYuv[depth];
m_bestRecoYuv[depth] = tempYuv;
+#else
+ tempYuv = m_tmpPredYuv[depth];
+ m_tmpPredYuv[depth] = m_bestPredYuv[depth];
+ m_bestPredYuv[depth] = tempYuv;
+#endif
}
}
@@ -782,6 +810,12 @@
/* Copy Best data to Picture for next partition prediction. */
outBestCU->copyToPic((UChar)depth);
+#if NORESENC
+ if (depth == 0)
+ {
+ xEncodeResidue(outBestCU, outBestCU, 0, 0);
+ }
+#endif
/* Copy Yuv data to picture Yuv */
xCopyYuv2Pic(outBestCU->getPic(), outBestCU->getAddr(), outBestCU->getZorderIdxInCU(), depth, depth, outBestCU, lpelx, tpely);
@@ -797,3 +831,130 @@
assert(outBestCU->getPredictionMode(0) != MODE_NONE);
assert(outBestCU->m_totalCost != MAX_DOUBLE);
}
+
+void TEncCu::xEncodeResidue(TComDataCU* lcu, TComDataCU* cu, uint32_t absPartIdx, UChar depth, uint32_t partIndex)
+{
+ UChar nextDepth = (UChar)(depth + 1);
+ TComDataCU* subTempPartCU = m_tempCU[nextDepth];
+ TComPic* pic = cu->getPic();
+ TComSlice* slice = cu->getPic()->getSlice();
+
+ if (depth != 0)
+ {
+ if (0 == partIndex) //initialize RD with previous depth buffer
+ {
+ m_rdSbacCoders[depth][CI_CURR_BEST]->load(m_rdSbacCoders[depth - 1][CI_CURR_BEST]);
+ }
+ else
+ {
+ m_rdSbacCoders[depth][CI_CURR_BEST]->load(m_rdSbacCoders[depth][CI_NEXT_BEST]);
+ }
+ }
+
+ if (((depth < lcu->getDepth(absPartIdx)) && (depth < (g_maxCUDepth - g_addCUDepth))))
+ {
+ uint32_t qNumParts = (pic->getNumPartInCU() >> (depth << 1)) >> 2;
+ for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++, absPartIdx += qNumParts)
+ {
+ uint32_t lpelx = lcu->getCUPelX() + g_rasterToPelX[g_zscanToRaster[absPartIdx]];
+ uint32_t tpely = lcu->getCUPelY() + g_rasterToPelY[g_zscanToRaster[absPartIdx]];
+ bool bInSlice = lcu->getSCUAddr() + absPartIdx < slice->getSliceCurEndCUAddr();
+ if (bInSlice && (lpelx < slice->getSPS()->getPicWidthInLumaSamples()) && (tpely < slice->getSPS()->getPicHeightInLumaSamples()))
+ {
+ subTempPartCU->copyToSubCU(cu, partUnitIdx, depth + 1);
+ xEncodeResidue(lcu, subTempPartCU, absPartIdx, depth + 1, partUnitIdx);
+ }
+ }
+
+ return;
+ }
+
+ if (lcu->getQtRootCbf(absPartIdx))
+ {
+ uint64_t cost = 0;
+ uint32_t bits = 0;
+ uint32_t distortion = 0;
+
+ m_search->m_rdGoOnSbacCoder->load(m_search->m_rdSbacCoders[cu->getDepth(0)][CI_CURR_BEST]);
+
+ //Calculate Residue
+ Pel* src2 = m_bestPredYuv[0]->getLumaAddr(absPartIdx);
+ Pel* src1 = m_origYuv[0]->getLumaAddr(absPartIdx);
+ int16_t* dst = m_tmpResiYuv[depth]->getLumaAddr(0);
+ uint32_t src2stride = m_bestPredYuv[0]->getStride();
+ uint32_t src1stride = m_origYuv[0]->getStride();
+ uint32_t dststride = m_tmpResiYuv[depth]->m_width;
+ int part = partitionFromSizes(cu->getWidth(0), cu->getWidth(0));
+ primitives.luma_sub_ps[part](dst, dststride, src1, src2, src1stride, src2stride);
+
+ src2 = m_bestPredYuv[0]->getCbAddr(absPartIdx);
+ src1 = m_origYuv[0]->getCbAddr(absPartIdx);
+ dst = m_tmpResiYuv[depth]->getCbAddr(0);
+ src2stride = m_bestPredYuv[0]->getCStride();
+ src1stride = m_origYuv[0]->getCStride();
+ dststride = m_tmpResiYuv[depth]->m_cwidth;
+ primitives.chroma[m_cfg->param.internalCsp].sub_ps[part](dst, dststride, src1, src2, src1stride, src2stride);
+
+ src2 = m_bestPredYuv[0]->getCrAddr(absPartIdx);
+ src1 = m_origYuv[0]->getCrAddr(absPartIdx);
+ dst = m_tmpResiYuv[depth]->getCrAddr(0);
+ dststride = m_tmpResiYuv[depth]->m_cwidth;
+ primitives.chroma[m_cfg->param.internalCsp].sub_ps[part](dst, dststride, src1, src2, src1stride, src2stride);
+
+ //Residual encoding
+ uint32_t zeroDistortion = 0;
+ m_search->xEstimateResidualQT(cu, 0, 0, m_tmpResiYuv[depth], cu->getDepth(0), cost, bits, distortion, &zeroDistortion, true);
+ m_search->xSetResidualQTData(cu, 0, 0, NULL, cu->getDepth(0), false);
+
+ if (lcu->getMergeFlag(absPartIdx) && cu->getPartitionSize(0) == SIZE_2Nx2N && !cu->getQtRootCbf(0))
+ {
+ cu->setSkipFlagSubParts(true, 0, depth);
+ cu->copyCodedToPic(depth);
+ }
+ else
+ {
+ cu->copyCodedToPic(depth);
+ m_search->xSetResidualQTData(cu, 0, 0, m_tmpResiYuv[depth], cu->getDepth(0), true);
+
+ //Generate Recon
+ Pel* pred = m_bestPredYuv[0]->getLumaAddr(absPartIdx);
+ int16_t* res = m_tmpResiYuv[depth]->getLumaAddr(0);
+ Pel* reco = m_bestRecoYuv[0]->getLumaAddr(absPartIdx);
+ dststride = m_bestRecoYuv[0]->getStride();
+ src1stride = m_bestPredYuv[0]->getStride();
+ src2stride = m_tmpResiYuv[depth]->m_width;
+ primitives.luma_add_ps[part](reco, dststride, pred, res, src1stride, src2stride);
+
+ pred = m_bestPredYuv[0]->getCbAddr(absPartIdx);
+ res = m_tmpResiYuv[depth]->getCbAddr(0);
+ reco = m_bestRecoYuv[0]->getCbAddr(absPartIdx);
+ dststride = m_bestRecoYuv[0]->getCStride();
+ src1stride = m_bestPredYuv[0]->getCStride();
+ src2stride = m_tmpResiYuv[depth]->m_cwidth;
+ primitives.chroma[m_cfg->param.internalCsp].add_ps[part](reco, dststride, pred, res, src1stride, src2stride);
+
+ pred = m_bestPredYuv[0]->getCrAddr(absPartIdx);
+ res = m_tmpResiYuv[depth]->getCrAddr(0);
+ reco = m_bestRecoYuv[0]->getCrAddr(absPartIdx);
+ primitives.chroma[m_cfg->param.internalCsp].add_ps[part](reco, dststride, pred, res, src1stride, src2stride);
+ return;
+ }
+ }
+ //Generate Recon
+ int part = partitionFromSizes(cu->getWidth(0), cu->getWidth(0));
+ Pel* src = m_bestPredYuv[0]->getLumaAddr(absPartIdx);
+ Pel* dst = m_bestRecoYuv[0]->getLumaAddr(absPartIdx);
+ uint32_t srcstride = m_bestPredYuv[0]->getStride();
+ uint32_t dststride = m_bestRecoYuv[0]->getStride();
+ primitives.luma_copy_pp[part](dst, dststride, src, srcstride);
+
+ src = m_bestPredYuv[0]->getCbAddr(absPartIdx);
+ dst = m_bestRecoYuv[0]->getCbAddr(absPartIdx);
+ srcstride = m_bestPredYuv[0]->getCStride();
+ dststride = m_bestRecoYuv[0]->getCStride();
+ primitives.chroma[m_cfg->param.internalCsp].copy_pp[part](dst, dststride, src, srcstride);
+
+ src = m_bestPredYuv[0]->getCrAddr(absPartIdx);
+ dst = m_bestRecoYuv[0]->getCrAddr(absPartIdx);
+ primitives.chroma[m_cfg->param.internalCsp].copy_pp[part](dst, dststride, src, srcstride);
+}
More information about the x265-devel
mailing list