[x265] [PATCH] search: made a function for null cost calculation in xEstimateResidualQT()
Steve Borho
steve at borho.org
Wed Nov 5 21:49:35 CET 2014
On 11/05, ashok at multicorewareinc.com wrote:
> # HG changeset patch
> # User Ashok Kumar Mishra<ashok at multicorewareinc.com>
> # Date 1415184822 -19800
> # Wed Nov 05 16:23:42 2014 +0530
> # Node ID 18344f74ded0e192bc7177a217e9112c9de31983
> # Parent 2a8f3d5820a6ebe0937ce73fa81154c263df2ae9
> search: made a function for null cost calculation in xEstimateResidualQT()
this patch changes outputs quite a bit, and unambiguously for the
better. it seems there was a bug that was fixed by this.
parkjoy medium preset before:
(2.55 fps), 16644.94 kb/s, SSIM Mean Y: 0.8785235 ( 9.155 dB)
after:
(2.52 fps), 16425.97 kb/s, SSIM Mean Y: 0.8801361 ( 9.213 dB)
This needs to be understood; there's a good chance that there are
similar bugs still lurking in this key function.
> diff -r 2a8f3d5820a6 -r 18344f74ded0 source/encoder/search.cpp
> --- a/source/encoder/search.cpp Tue Nov 04 09:46:14 2014 +0530
> +++ b/source/encoder/search.cpp Wed Nov 05 16:23:42 2014 +0530
> @@ -2714,6 +2714,17 @@
> }
> }
>
> +uint64_t Search::deriveNullCost(uint32_t &dist, uint32_t &psyEnergy, uint32_t tuDepth, TextType compId)
> +{
> + m_entropyCoder.resetBits();
> + m_entropyCoder.codeQtCbfZero(compId, tuDepth);
> + const uint32_t nullBits = m_entropyCoder.getNumberOfWrittenBits();
> + if (m_rdCost.m_psyRd)
> + return m_rdCost.calcPsyRdCost(dist, nullBits, psyEnergy);
> + else
> + return m_rdCost.calcRdCost(dist, nullBits);
> +}
FWIW: this would be better as a macro to avoid extra function overhead,
or at least an inline function.
Would it be possible to do something like this and get the same costs?
int cbf0Bits[3];
m_entropyCoder.resetBits();
m_entropyCoder.codeQtCbfZero(0, tuDepth);
cbf0Bits[0] = m_entropyCoder.getNumberOfWrittenBits();
m_entropyCoder.codeQtCbfZero(1, tuDepth);
cbf0Bits[1] = m_entropyCoder.getNumberOfWrittenBits() - cbf0Bits[0];
m_entropyCoder.codeQtCbfZero(2, tuDepth);
cbf0Bits[2] = m_entropyCoder.getNumberOfWrittenBits() - (cbf0Bits[0] + cbf0Bits[1]);
Also, see how Entropy::bitsIntraModeMPM() measures bits without changing
the state, so it doesn't require re-loading the contexts. That might be
generally useful in a few places here.
> void Search::estimateResidualQT(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, ShortYuv& resiYuv, Cost& outCosts, uint32_t depthRange[2])
> {
> CUData& cu = mode.cu;
> @@ -2828,9 +2839,6 @@
> }
> }
>
> - const uint32_t numCoeffY = 1 << (log2TrSize * 2);
> - const uint32_t numCoeffC = 1 << (log2TrSizeC * 2);
> -
> X265_CHECK(log2TrSize <= 5, "log2TrSize is too large\n");
> uint32_t distY = primitives.ssd_s[partSize](resiYuv.getLumaAddr(absPartIdx), resiYuv.m_size);
> uint32_t psyEnergyY = 0;
> @@ -2861,19 +2869,15 @@
> singleCostY = m_rdCost.calcPsyRdCost(nonZeroDistY, singleBitsComp[TEXT_LUMA][0], nonZeroPsyEnergyY);
> else
> singleCostY = m_rdCost.calcRdCost(nonZeroDistY, singleBitsComp[TEXT_LUMA][0]);
> - m_entropyCoder.resetBits();
> - m_entropyCoder.codeQtCbfZero(TEXT_LUMA, tuDepth);
> - const uint32_t nullBitsY = m_entropyCoder.getNumberOfWrittenBits();
> - uint64_t nullCostY = 0;
> - if (m_rdCost.m_psyRd)
> - nullCostY = m_rdCost.calcPsyRdCost(distY, nullBitsY, psyEnergyY);
> - else
> - nullCostY = m_rdCost.calcRdCost(distY, nullBitsY);
> +
> + uint64_t nullCostY = deriveNullCost(distY, psyEnergyY, tuDepth, TEXT_LUMA);
> if (nullCostY < singleCostY)
> {
> cbfFlag[TEXT_LUMA][0] = 0;
> #if CHECKED_BUILD || _DEBUG
> + uint32_t numCoeffY = 1 << (log2TrSize << 1);
> memset(coeffCurY, 0, sizeof(coeff_t) * numCoeffY);
> + primitives.blockfill_s[partSize](curResiY, strideResiY, 0);
> #endif
> if (checkTransformSkipY)
> minCost[TEXT_LUMA][0] = nullCostY;
> @@ -2887,21 +2891,16 @@
> }
> }
> }
> - else if (checkTransformSkipY)
> + else
> {
> - m_entropyCoder.resetBits();
> - m_entropyCoder.codeQtCbfZero(TEXT_LUMA, tuDepth);
> - const uint32_t nullBitsY = m_entropyCoder.getNumberOfWrittenBits();
> - if (m_rdCost.m_psyRd)
> - minCost[TEXT_LUMA][0] = m_rdCost.calcPsyRdCost(distY, nullBitsY, psyEnergyY);
> - else
> - minCost[TEXT_LUMA][0] = m_rdCost.calcRdCost(distY, nullBitsY);
> + if (checkTransformSkipY)
> + minCost[TEXT_LUMA][0] = deriveNullCost(distY, psyEnergyY, tuDepth, TEXT_LUMA);
> + primitives.blockfill_s[partSize](curResiY, strideResiY, 0);
> }
>
> singleDistComp[TEXT_LUMA][0] = distY;
> singlePsyEnergyComp[TEXT_LUMA][0] = psyEnergyY;
> - if (!cbfFlag[TEXT_LUMA][0])
> - primitives.blockfill_s[partSize](curResiY, strideResiY, 0);
> +
was it these changes?
> cu.setCbfSubParts(cbfFlag[TEXT_LUMA][0] << tuDepth, TEXT_LUMA, absPartIdx, depth);
>
> if (bCodeChroma)
> @@ -2945,19 +2944,16 @@
> singleCostC = m_rdCost.calcPsyRdCost(nonZeroDistC, singleBitsComp[chromaId][tuIterator.section], nonZeroPsyEnergyC);
> else
> singleCostC = m_rdCost.calcRdCost(nonZeroDistC, singleBitsComp[chromaId][tuIterator.section]);
> - m_entropyCoder.resetBits();
> - m_entropyCoder.codeQtCbfZero((TextType)chromaId, tuDepth);
> - const uint32_t nullBitsC = m_entropyCoder.getNumberOfWrittenBits();
> - uint64_t nullCostC = 0;
> - if (m_rdCost.m_psyRd)
> - nullCostC = m_rdCost.calcPsyRdCost(distC, nullBitsC, psyEnergyC);
> - else
> - nullCostC = m_rdCost.calcRdCost(distC, nullBitsC);
> +
> + uint64_t nullCostC = deriveNullCost(distC, psyEnergyC, tuDepth, (TextType)chromaId);
> +
> if (nullCostC < singleCostC)
> {
> cbfFlag[chromaId][tuIterator.section] = 0;
> #if CHECKED_BUILD || _DEBUG
> + uint32_t numCoeffC = 1 << (log2TrSizeC << 1);
> memset(coeffCurC + subTUOffset, 0, sizeof(coeff_t) * numCoeffC);
> + primitives.blockfill_s[partSizeC](curResiC, strideResiC, 0);
> #endif
> if (checkTransformSkipC)
> minCost[chromaId][tuIterator.section] = nullCostC;
> @@ -2971,23 +2967,16 @@
> }
> }
> }
> - else if (checkTransformSkipC)
> + else
> {
> - m_entropyCoder.resetBits();
> - m_entropyCoder.codeQtCbfZero((TextType)chromaId, tuDepthC);
> - const uint32_t nullBitsC = m_entropyCoder.getNumberOfWrittenBits();
> - if (m_rdCost.m_psyRd)
> - minCost[chromaId][tuIterator.section] = m_rdCost.calcPsyRdCost(distC, nullBitsC, psyEnergyC);
> - else
> - minCost[chromaId][tuIterator.section] = m_rdCost.calcRdCost(distC, nullBitsC);
> + if (checkTransformSkipC)
> + minCost[chromaId][tuIterator.section] = deriveNullCost(distC, psyEnergyC, tuDepthC, (TextType)chromaId);
> + primitives.blockfill_s[partSizeC](curResiC, strideResiC, 0);
> }
>
> singleDistComp[chromaId][tuIterator.section] = distC;
> singlePsyEnergyComp[chromaId][tuIterator.section] = psyEnergyC;
>
> - if (!cbfFlag[chromaId][tuIterator.section])
> - primitives.blockfill_s[partSizeC](curResiC, strideResiC, 0);
> -
> cu.setCbfPartRange(cbfFlag[chromaId][tuIterator.section] << tuDepth, (TextType)chromaId, absPartIdxC, tuIterator.absPartIdxStep);
> }
> while (tuIterator.isNextSection());
> @@ -3042,6 +3031,7 @@
> singlePsyEnergyComp[TEXT_LUMA][0] = nonZeroPsyEnergyY;
> cbfFlag[TEXT_LUMA][0] = !!numSigTSkipY;
> bestTransformMode[TEXT_LUMA][0] = 1;
> + uint32_t numCoeffY = 1 << (log2TrSize << 1);
> memcpy(coeffCurY, tsCoeffY, sizeof(coeff_t) * numCoeffY);
> primitives.square_copy_ss[partSize](curResiY, strideResiY, tsResiY, trSize);
> }
> @@ -3112,6 +3102,7 @@
> singlePsyEnergyComp[chromaId][tuIterator.section] = nonZeroPsyEnergyC;
> cbfFlag[chromaId][tuIterator.section] = !!numSigTSkipC;
> bestTransformMode[chromaId][tuIterator.section] = 1;
> + uint32_t numCoeffC = 1 << (log2TrSizeC << 1);
> memcpy(coeffCurC + subTUOffset, tsCoeffC, sizeof(coeff_t) * numCoeffC);
> primitives.square_copy_ss[partSizeC](curResiC, strideResiC, tsResiC, trSizeC);
> }
> diff -r 2a8f3d5820a6 -r 18344f74ded0 source/encoder/search.h
> --- a/source/encoder/search.h Tue Nov 04 09:46:14 2014 +0530
> +++ b/source/encoder/search.h Wed Nov 05 16:23:42 2014 +0530
> @@ -217,6 +217,7 @@
> Cost() { rdcost = 0; bits = 0; distortion = 0; energy = 0; }
> };
>
> + uint64_t deriveNullCost(uint32_t &dist, uint32_t &psyEnergy, uint32_t tuDepth, TextType compId);
> void estimateResidualQT(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, ShortYuv& resiYuv, Cost& costs, uint32_t depthRange[2]);
>
> // estimate bit cost of residual QT
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list