[x265] [PATCH] Search: remove redundant encode coefficients in intra for performance
Deepthi Nandakumar
deepthi at multicorewareinc.com
Sun Sep 14 13:05:35 CEST 2014
This significantly changes outputs for P and B frames. Higher bitrates and
higher SSIM. Lets do full regression testing on this - and compare the
bitrate/ssim for all combinations to be reasonably sure there are no bugs.
On Fri, Sep 12, 2014 at 7:47 PM, <ashok at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Ashok Kumar Mishra<ashok at multicorewareinc.com>
> # Date 1410341620 -19800
> # Wed Sep 10 15:03:40 2014 +0530
> # Node ID d8be3c38915d4a628b804522da8946a152041203
> # Parent cd8fd0afd4e873fc940ae3384fac4deed3ec7b4f
> Search: remove redundant encode coefficients in intra for performance
>
> diff -r cd8fd0afd4e8 -r d8be3c38915d source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp Thu Sep 11 17:25:40 2014 -0700
> +++ b/source/encoder/analysis.cpp Wed Sep 10 15:03:40 2014 +0530
> @@ -1840,6 +1840,7 @@
> void Analysis::encodeIntraInInter(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv, ShortYuv* outResiYuv, TComYuv* outReconYuv)
> {
> uint64_t puCost = 0;
> + uint32_t puBits = 0;
> uint32_t depth = cu->getDepth(0);
> uint32_t initTrDepth = cu->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;
>
> @@ -1851,7 +1852,7 @@
> uint32_t tuDepthRange[2];
> cu->getQuadtreeTULog2MinSizeInCU(tuDepthRange, 0);
>
> - uint32_t puDistY = xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv,
> predYuv, outResiYuv, false, puCost, tuDepthRange);
> + uint32_t puDistY = xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv,
> predYuv, outResiYuv, false, puCost, puBits, tuDepthRange);
> xSetIntraResultQT(cu, initTrDepth, 0, outReconYuv);
>
> //=== update PU data ====
> diff -r cd8fd0afd4e8 -r d8be3c38915d source/encoder/search.cpp
> --- a/source/encoder/search.cpp Thu Sep 11 17:25:40 2014 -0700
> +++ b/source/encoder/search.cpp Wed Sep 10 15:03:40 2014 +0530
> @@ -111,47 +111,6 @@
> return false;
> }
>
> -void Search::xEncSubdivCbfQTLuma(TComDataCU* cu, uint32_t trDepth,
> uint32_t absPartIdx, uint32_t depthRange[2])
> -{
> - uint32_t fullDepth = cu->getDepth(0) + trDepth;
> - uint32_t trMode = cu->getTransformIdx(absPartIdx);
> - uint32_t subdiv = (trMode > trDepth ? 1 : 0);
> - uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
> -
> - if (cu->getPredictionMode(0) == MODE_INTRA && cu->getPartitionSize(0)
> == SIZE_NxN && trDepth == 0)
> - {
> - X265_CHECK(subdiv, "subdivision not present\n");
> - }
> - else if (log2TrSize > *(depthRange + 1))
> - {
> - X265_CHECK(subdiv, "subdivision not present\n");
> - }
> - else if (log2TrSize == cu->m_slice->m_sps->quadtreeTULog2MinSize)
> - {
> - X265_CHECK(!subdiv, "subdivision present\n");
> - }
> - else if (log2TrSize == *depthRange)
> - {
> - X265_CHECK(!subdiv, "subdivision present\n");
> - }
> - else
> - {
> - X265_CHECK(log2TrSize > *depthRange, "transform size too
> small\n");
> - m_entropyCoder->codeTransformSubdivFlag(subdiv, 5 - log2TrSize);
> - }
> -
> - if (subdiv)
> - {
> - uint32_t qtPartNum = cu->m_pic->getNumPartInCU() >> ((fullDepth +
> 1) << 1);
> - for (uint32_t part = 0; part < 4; part++)
> - xEncSubdivCbfQTLuma(cu, trDepth + 1, absPartIdx + part *
> qtPartNum, depthRange);
> -
> - return;
> - }
> -
> - m_entropyCoder->codeQtCbf(cu, absPartIdx, TEXT_LUMA, trMode);
> -}
> -
> void Search::xEncSubdivCbfQTChroma(TComDataCU* cu, uint32_t trDepth,
> uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t
> height)
> {
> uint32_t fullDepth = cu->getDepth(0) + trDepth;
> @@ -183,32 +142,6 @@
> }
> }
>
> -void Search::xEncCoeffQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t
> absPartIdx)
> -{
> - const TextType ttype = TEXT_LUMA;
> -
> - if (!cu->getCbf(absPartIdx, ttype, trDepth))
> - return;
> -
> - uint32_t fullDepth = cu->getDepth(0) + trDepth;
> - uint32_t trMode = cu->getTransformIdx(absPartIdx);
> -
> - if (trMode > trDepth)
> - {
> - uint32_t qtPartNum = cu->m_pic->getNumPartInCU() >> ((fullDepth +
> 1) << 1);
> - for (uint32_t part = 0; part < 4; part++)
> - xEncCoeffQTLuma(cu, trDepth + 1, absPartIdx + part *
> qtPartNum);
> -
> - return;
> - }
> -
> - uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
> - uint32_t qtLayer = log2TrSize - 2;
> - uint32_t coeffOffset = absPartIdx << LOG2_UNIT_SIZE * 2;
> - coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset;
> - m_entropyCoder->codeCoeffNxN(cu, coeff, absPartIdx, log2TrSize,
> ttype);
> -}
> -
> void Search::xEncCoeffQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t
> absPartIdx, TextType ttype)
> {
> if (!cu->getCbf(absPartIdx, ttype, trDepth))
> @@ -316,15 +249,6 @@
> }
> }
>
> -uint32_t Search::xGetIntraBitsQTLuma(TComDataCU* cu, uint32_t trDepth,
> uint32_t absPartIdx, uint32_t depthRange[2])
> -{
> - m_entropyCoder->resetBits();
> - xEncIntraHeaderLuma(cu, trDepth, absPartIdx);
> - xEncSubdivCbfQTLuma(cu, trDepth, absPartIdx, depthRange);
> - xEncCoeffQTLuma(cu, trDepth, absPartIdx);
> - return m_entropyCoder->getNumberOfWrittenBits();
> -}
> -
> uint32_t Search::xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t trDepth,
> uint32_t absPartIdx, uint32_t absPartIdxStep)
> {
> int cuSize = 1 << cu->getLog2CUSize(absPartIdx);
> @@ -340,7 +264,14 @@
> {
> m_entropyCoder->resetBits();
> xEncIntraHeaderLuma(cu, trDepth, absPartIdx);
> - xEncSubdivCbfQTLuma(cu, trDepth, absPartIdx, depthRange);
> +
> + //Transform subdiv flag
> + if (log2TrSize != *depthRange)
> + m_entropyCoder->codeTransformSubdivFlag(0, 5 - log2TrSize);
> +
> + //===== Cbfs =====
> + uint32_t trMode = cu->getTransformIdx(absPartIdx);
> + m_entropyCoder->codeQtCbf(cu, absPartIdx, TEXT_LUMA, trMode);
>
> if (cu->getCbf(absPartIdx, TEXT_LUMA, trDepth))
> m_entropyCoder->codeCoeffNxN(cu, coeff, absPartIdx, log2TrSize,
> TEXT_LUMA);
> @@ -463,7 +394,7 @@
>
> /* returns distortion. TODO reorder params */
> uint32_t Search::xRecurIntraCodingQT(TComDataCU* cu, uint32_t trDepth,
> uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv,
> - ShortYuv* resiYuv, bool
> bAllowRQTSplit, uint64_t& rdCost, uint32_t depthRange[2])
> + ShortYuv* resiYuv, bool
> bAllowRQTSplit, uint64_t& rdCost, uint32_t& rdBits, uint32_t depthRange[2])
> {
> uint32_t fullDepth = cu->getDepth(0) + trDepth;
> uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
> @@ -490,8 +421,9 @@
> if (!bAllowRQTSplit && noSplitIntraMaxTuSize)
> bCheckSplit = false;
>
> - uint64_t singleCost = MAX_INT64;
> - uint32_t singleDistY = 0;
> + uint64_t singleCost = MAX_INT64;
> + uint32_t singleDistY = 0;
> + uint32_t singleBits = 0;
> uint32_t singlePsyEnergyY = 0;
> uint32_t singleCbfY = 0;
> int bestModeId = 0;
> @@ -580,7 +512,7 @@
> break;
> else
> {
> - uint32_t singleBits = xGetIntraBitsLuma(cu, trDepth,
> absPartIdx, log2TrSize, coeff, depthRange);
> + singleBits = xGetIntraBitsLuma(cu, trDepth,
> absPartIdx, log2TrSize, coeff, depthRange);
> if (m_rdCost.m_psyRd)
> singleCostTmp =
> m_rdCost.calcPsyRdCost(singleDistYTmp, singleBits, singlePsyEnergyYTmp);
> else
> @@ -634,7 +566,7 @@
> }
> cu->setCbfSubParts(singleCbfY << trDepth, TEXT_LUMA,
> absPartIdx, fullDepth);
>
> - uint32_t singleBits = xGetIntraBitsLuma(cu, trDepth,
> absPartIdx, log2TrSize, coeffY, depthRange);
> + singleBits = xGetIntraBitsLuma(cu, trDepth, absPartIdx,
> log2TrSize, coeffY, depthRange);
> if (m_param->rdPenalty && (log2TrSize == 5) && !isIntraSlice)
> singleBits *= 4;
>
> @@ -663,23 +595,30 @@
> uint32_t qPartsDiv = cu->m_pic->getNumPartInCU() >>
> ((fullDepth + 1) << 1);
> uint32_t absPartIdxSub = absPartIdx;
> uint32_t splitCbfY = 0;
> + uint32_t splitBits = 0;
>
> for (uint32_t part = 0; part < 4; part++, absPartIdxSub +=
> qPartsDiv)
> {
> cu->m_psyEnergy = 0;
> - splitDistY += xRecurIntraCodingQT(cu, trDepth + 1,
> absPartIdxSub, fencYuv, predYuv, resiYuv, bAllowRQTSplit, splitCost,
> depthRange);
> + splitDistY += xRecurIntraCodingQT(cu, trDepth + 1,
> absPartIdxSub, fencYuv, predYuv, resiYuv, bAllowRQTSplit, splitCost,
> splitBits, depthRange);
> splitPsyEnergyY += cu->m_psyEnergy;
> splitCbfY |= cu->getCbf(absPartIdxSub, TEXT_LUMA, trDepth +
> 1);
> }
> +
> + if (bCheckFull)
> + {
> + m_entropyCoder->resetBits();
> +
> + //subdiv
> + if (log2TrSize != *depthRange)
> + m_entropyCoder->codeTransformSubdivFlag(1, 5 -
> log2TrSize);
> +
> + splitBits += m_entropyCoder->getNumberOfWrittenBits();
> + }
>
> for (uint32_t offs = 0; offs < 4 * qPartsDiv; offs++)
> cu->getCbf(TEXT_LUMA)[absPartIdx + offs] |= (splitCbfY <<
> trDepth);
>
> - // restore context states
> -
> m_entropyCoder->load(m_rdEntropyCoders[fullDepth][CI_QT_TRAFO_ROOT]);
> -
> - // determine rate and r-d cost
> - uint32_t splitBits = xGetIntraBitsQTLuma(cu, trDepth, absPartIdx,
> depthRange);
> if (m_rdCost.m_psyRd)
> splitCost = m_rdCost.calcPsyRdCost(splitDistY, splitBits,
> splitPsyEnergyY);
> else
> @@ -689,6 +628,7 @@
> {
> outDist += splitDistY;
> rdCost += splitCost;
> + rdBits += splitBits;
> cu->m_psyEnergy = splitPsyEnergyY;
> return outDist;
> }
> @@ -717,6 +657,7 @@
> }
>
> rdCost += singleCost;
> + rdBits += singleBits;
> cu->m_psyEnergy = singlePsyEnergyY;
> return outDist + singleDistY;
> }
> @@ -1416,6 +1357,7 @@
> uint32_t bestPUDistY = 0;
> uint64_t bestPUCost = MAX_INT64;
> uint32_t puDistY;
> + uint32_t puBits;
> uint64_t puCost;
> for (int mode = 0; mode < numModesForFullRD; mode++)
> {
> @@ -1427,7 +1369,8 @@
>
> // determine residual for partition
> puCost = 0;
> - puDistY = xRecurIntraCodingQT(cu, initTrDepth, partOffset,
> fencYuv, predYuv, resiYuv, false, puCost, depthRange);
> + puBits = 0;
> + puDistY = xRecurIntraCodingQT(cu, initTrDepth, partOffset,
> fencYuv, predYuv, resiYuv, false, puCost, puBits, depthRange);
>
> // check r-d cost
> if (puCost < bestPUCost)
> @@ -1446,7 +1389,8 @@
>
> // determine residual for partition
> puCost = 0;
> - puDistY = xRecurIntraCodingQT(cu, initTrDepth, partOffset,
> fencYuv, predYuv, resiYuv, true, puCost, depthRange);
> + puBits = 0;
> + puDistY = xRecurIntraCodingQT(cu, initTrDepth, partOffset,
> fencYuv, predYuv, resiYuv, true, puCost, puBits, depthRange);
>
> overallDistY += (puCost >= bestPUCost) ? bestPUDistY : puDistY;
>
> diff -r cd8fd0afd4e8 -r d8be3c38915d source/encoder/search.h
> --- a/source/encoder/search.h Thu Sep 11 17:25:40 2014 -0700
> +++ b/source/encoder/search.h Wed Sep 10 15:03:40 2014 +0530
> @@ -129,14 +129,11 @@
> void xSetResidualQTData(TComDataCU* cu, uint32_t absPartIdx,
> ShortYuv* resiYuv, uint32_t depth, bool bSpatial);
> void xSetIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t
> absPartIdx, TComYuv* reconYuv);
>
> - void xEncSubdivCbfQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t
> absPartIdx, uint32_t depthRange[2]);
> void xEncSubdivCbfQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t
> absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t height);
> -
> - void xEncCoeffQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t
> absPartIdx);
> void xEncCoeffQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t
> absPartIdx, TextType ttype);
> void xEncIntraHeaderLuma(TComDataCU* cu, uint32_t trDepth, uint32_t
> absPartIdx);
> void xEncIntraHeaderChroma(TComDataCU* cu, uint32_t absPartIdx);
> - uint32_t xGetIntraBitsQTLuma(TComDataCU* cu, uint32_t trDepth,
> uint32_t absPartIdx, uint32_t depthRange[2]);
> +
> uint32_t xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t trDepth,
> uint32_t absPartIdx, uint32_t absPartIdxStep);
> uint32_t xGetIntraBitsLuma(TComDataCU* cu, uint32_t trDepth, uint32_t
> absPartIdx, uint32_t log2TrSize, coeff_t* coeff, uint32_t depthRange[2]);
> uint32_t xGetIntraBitsChroma(TComDataCU* cu, uint32_t absPartIdx,
> uint32_t log2TrSizeC, uint32_t chromaId, coeff_t* coeff);
> @@ -147,7 +144,7 @@
> uint64_t &rdCost, uint32_t &outBits,
> uint32_t *zeroDist, uint32_t tuDepthRange[2]);
>
> uint32_t xRecurIntraCodingQT(TComDataCU* cu, uint32_t trDepth,
> uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv,
> - ShortYuv* resiYuv, bool bAllowRQTSplit,
> uint64_t& dRDCost, uint32_t depthRange[2]);
> + ShortYuv* resiYuv, bool bAllowRQTSplit,
> uint64_t& dRDCost, uint32_t& puBits, uint32_t depthRange[2]);
>
> uint32_t xRecurIntraChromaCodingQT(TComDataCU* cu, uint32_t trDepth,
> uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv);
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140914/e5d61405/attachment-0001.html>
More information about the x265-devel
mailing list