[x265] [PATCH] search: made a function for null cost calculation in xEstimateResidualQT()
Steve Borho
steve at borho.org
Thu Nov 6 06:29:48 CET 2014
On Wed, Nov 5, 2014 at 2:49 PM, Steve Borho <steve at borho.org> wrote:
> On 11/05, ashok at multicorewareinc.com wrote:
>> # HG changeset patch
>> # User Ashok Kumar Mishra<ashok at multicorewareinc.com>
>> # Date 1415184822 -19800
>> # Wed Nov 05 16:23:42 2014 +0530
>> # Node ID 18344f74ded0e192bc7177a217e9112c9de31983
>> # Parent 2a8f3d5820a6ebe0937ce73fa81154c263df2ae9
>> search: made a function for null cost calculation in xEstimateResidualQT()
>
> this patch changes outputs quite a bit, and unambiguously for the
> better. it seems there was a bug that was fixed by this.
>
> parkjoy medium preset before:
>
> (2.55 fps), 16644.94 kb/s, SSIM Mean Y: 0.8785235 ( 9.155 dB)
>
> after:
>
> (2.52 fps), 16425.97 kb/s, SSIM Mean Y: 0.8801361 ( 9.213 dB)
>
> This needs to be understood; there's a good chance that there are
> similar bugs still lurking in this key function.
>
>> diff -r 2a8f3d5820a6 -r 18344f74ded0 source/encoder/search.cpp
>> --- a/source/encoder/search.cpp Tue Nov 04 09:46:14 2014 +0530
>> +++ b/source/encoder/search.cpp Wed Nov 05 16:23:42 2014 +0530
>> @@ -2714,6 +2714,17 @@
>> }
>> }
>>
>> +uint64_t Search::deriveNullCost(uint32_t &dist, uint32_t &psyEnergy, uint32_t tuDepth, TextType compId)
>> +{
>> + m_entropyCoder.resetBits();
>> + m_entropyCoder.codeQtCbfZero(compId, tuDepth);
>> + const uint32_t nullBits = m_entropyCoder.getNumberOfWrittenBits();
>> + if (m_rdCost.m_psyRd)
>> + return m_rdCost.calcPsyRdCost(dist, nullBits, psyEnergy);
>> + else
>> + return m_rdCost.calcRdCost(dist, nullBits);
>> +}
>
> FWIW: this would be better as a macro to avoid extra function overhead,
> or at least an inline function.
>
> Would it be possible to do something like this and get the same costs?
>
> int cbf0Bits[3];
> m_entropyCoder.resetBits();
> m_entropyCoder.codeQtCbfZero(0, tuDepth);
> cbf0Bits[0] = m_entropyCoder.getNumberOfWrittenBits();
> m_entropyCoder.codeQtCbfZero(1, tuDepth);
> cbf0Bits[1] = m_entropyCoder.getNumberOfWrittenBits() - cbf0Bits[0];
> m_entropyCoder.codeQtCbfZero(2, tuDepth);
> cbf0Bits[2] = m_entropyCoder.getNumberOfWrittenBits() - (cbf0Bits[0] + cbf0Bits[1]);
>
> Also, see how Entropy::bitsIntraModeMPM() measures bits without changing
> the state, so it doesn't require re-loading the contexts. That might be
> generally useful in a few places here.
You can ignore these comments, you were obviously trying to do things
like this in the last patch. But we do need to understand why
behavior changed, and see if the same logic bug is in other places in
search.cpp. This file has a lot of repetition in it
>> void Search::estimateResidualQT(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, ShortYuv& resiYuv, Cost& outCosts, uint32_t depthRange[2])
>> {
>> CUData& cu = mode.cu;
>> @@ -2828,9 +2839,6 @@
>> }
>> }
>>
>> - const uint32_t numCoeffY = 1 << (log2TrSize * 2);
>> - const uint32_t numCoeffC = 1 << (log2TrSizeC * 2);
>> -
>> X265_CHECK(log2TrSize <= 5, "log2TrSize is too large\n");
>> uint32_t distY = primitives.ssd_s[partSize](resiYuv.getLumaAddr(absPartIdx), resiYuv.m_size);
>> uint32_t psyEnergyY = 0;
>> @@ -2861,19 +2869,15 @@
>> singleCostY = m_rdCost.calcPsyRdCost(nonZeroDistY, singleBitsComp[TEXT_LUMA][0], nonZeroPsyEnergyY);
>> else
>> singleCostY = m_rdCost.calcRdCost(nonZeroDistY, singleBitsComp[TEXT_LUMA][0]);
>> - m_entropyCoder.resetBits();
>> - m_entropyCoder.codeQtCbfZero(TEXT_LUMA, tuDepth);
>> - const uint32_t nullBitsY = m_entropyCoder.getNumberOfWrittenBits();
>> - uint64_t nullCostY = 0;
>> - if (m_rdCost.m_psyRd)
>> - nullCostY = m_rdCost.calcPsyRdCost(distY, nullBitsY, psyEnergyY);
>> - else
>> - nullCostY = m_rdCost.calcRdCost(distY, nullBitsY);
>> +
>> + uint64_t nullCostY = deriveNullCost(distY, psyEnergyY, tuDepth, TEXT_LUMA);
>> if (nullCostY < singleCostY)
>> {
>> cbfFlag[TEXT_LUMA][0] = 0;
>> #if CHECKED_BUILD || _DEBUG
>> + uint32_t numCoeffY = 1 << (log2TrSize << 1);
>> memset(coeffCurY, 0, sizeof(coeff_t) * numCoeffY);
>> + primitives.blockfill_s[partSize](curResiY, strideResiY, 0);
>> #endif
>> if (checkTransformSkipY)
>> minCost[TEXT_LUMA][0] = nullCostY;
>> @@ -2887,21 +2891,16 @@
>> }
>> }
>> }
>> - else if (checkTransformSkipY)
>> + else
>> {
>> - m_entropyCoder.resetBits();
>> - m_entropyCoder.codeQtCbfZero(TEXT_LUMA, tuDepth);
>> - const uint32_t nullBitsY = m_entropyCoder.getNumberOfWrittenBits();
>> - if (m_rdCost.m_psyRd)
>> - minCost[TEXT_LUMA][0] = m_rdCost.calcPsyRdCost(distY, nullBitsY, psyEnergyY);
>> - else
>> - minCost[TEXT_LUMA][0] = m_rdCost.calcRdCost(distY, nullBitsY);
>> + if (checkTransformSkipY)
>> + minCost[TEXT_LUMA][0] = deriveNullCost(distY, psyEnergyY, tuDepth, TEXT_LUMA);
>> + primitives.blockfill_s[partSize](curResiY, strideResiY, 0);
>> }
>>
>> singleDistComp[TEXT_LUMA][0] = distY;
>> singlePsyEnergyComp[TEXT_LUMA][0] = psyEnergyY;
>> - if (!cbfFlag[TEXT_LUMA][0])
>> - primitives.blockfill_s[partSize](curResiY, strideResiY, 0);
>> +
>
> was it these changes?
>
>> cu.setCbfSubParts(cbfFlag[TEXT_LUMA][0] << tuDepth, TEXT_LUMA, absPartIdx, depth);
>>
>> if (bCodeChroma)
>> @@ -2945,19 +2944,16 @@
>> singleCostC = m_rdCost.calcPsyRdCost(nonZeroDistC, singleBitsComp[chromaId][tuIterator.section], nonZeroPsyEnergyC);
>> else
>> singleCostC = m_rdCost.calcRdCost(nonZeroDistC, singleBitsComp[chromaId][tuIterator.section]);
>> - m_entropyCoder.resetBits();
>> - m_entropyCoder.codeQtCbfZero((TextType)chromaId, tuDepth);
>> - const uint32_t nullBitsC = m_entropyCoder.getNumberOfWrittenBits();
>> - uint64_t nullCostC = 0;
>> - if (m_rdCost.m_psyRd)
>> - nullCostC = m_rdCost.calcPsyRdCost(distC, nullBitsC, psyEnergyC);
>> - else
>> - nullCostC = m_rdCost.calcRdCost(distC, nullBitsC);
>> +
>> + uint64_t nullCostC = deriveNullCost(distC, psyEnergyC, tuDepth, (TextType)chromaId);
>> +
>> if (nullCostC < singleCostC)
>> {
>> cbfFlag[chromaId][tuIterator.section] = 0;
>> #if CHECKED_BUILD || _DEBUG
>> + uint32_t numCoeffC = 1 << (log2TrSizeC << 1);
>> memset(coeffCurC + subTUOffset, 0, sizeof(coeff_t) * numCoeffC);
>> + primitives.blockfill_s[partSizeC](curResiC, strideResiC, 0);
>> #endif
>> if (checkTransformSkipC)
>> minCost[chromaId][tuIterator.section] = nullCostC;
>> @@ -2971,23 +2967,16 @@
>> }
>> }
>> }
>> - else if (checkTransformSkipC)
>> + else
>> {
>> - m_entropyCoder.resetBits();
>> - m_entropyCoder.codeQtCbfZero((TextType)chromaId, tuDepthC);
>> - const uint32_t nullBitsC = m_entropyCoder.getNumberOfWrittenBits();
>> - if (m_rdCost.m_psyRd)
>> - minCost[chromaId][tuIterator.section] = m_rdCost.calcPsyRdCost(distC, nullBitsC, psyEnergyC);
>> - else
>> - minCost[chromaId][tuIterator.section] = m_rdCost.calcRdCost(distC, nullBitsC);
>> + if (checkTransformSkipC)
>> + minCost[chromaId][tuIterator.section] = deriveNullCost(distC, psyEnergyC, tuDepthC, (TextType)chromaId);
>> + primitives.blockfill_s[partSizeC](curResiC, strideResiC, 0);
>> }
>>
>> singleDistComp[chromaId][tuIterator.section] = distC;
>> singlePsyEnergyComp[chromaId][tuIterator.section] = psyEnergyC;
>>
>> - if (!cbfFlag[chromaId][tuIterator.section])
>> - primitives.blockfill_s[partSizeC](curResiC, strideResiC, 0);
>> -
>> cu.setCbfPartRange(cbfFlag[chromaId][tuIterator.section] << tuDepth, (TextType)chromaId, absPartIdxC, tuIterator.absPartIdxStep);
>> }
>> while (tuIterator.isNextSection());
>> @@ -3042,6 +3031,7 @@
>> singlePsyEnergyComp[TEXT_LUMA][0] = nonZeroPsyEnergyY;
>> cbfFlag[TEXT_LUMA][0] = !!numSigTSkipY;
>> bestTransformMode[TEXT_LUMA][0] = 1;
>> + uint32_t numCoeffY = 1 << (log2TrSize << 1);
>> memcpy(coeffCurY, tsCoeffY, sizeof(coeff_t) * numCoeffY);
>> primitives.square_copy_ss[partSize](curResiY, strideResiY, tsResiY, trSize);
>> }
>> @@ -3112,6 +3102,7 @@
>> singlePsyEnergyComp[chromaId][tuIterator.section] = nonZeroPsyEnergyC;
>> cbfFlag[chromaId][tuIterator.section] = !!numSigTSkipC;
>> bestTransformMode[chromaId][tuIterator.section] = 1;
>> + uint32_t numCoeffC = 1 << (log2TrSizeC << 1);
>> memcpy(coeffCurC + subTUOffset, tsCoeffC, sizeof(coeff_t) * numCoeffC);
>> primitives.square_copy_ss[partSizeC](curResiC, strideResiC, tsResiC, trSizeC);
>> }
>> diff -r 2a8f3d5820a6 -r 18344f74ded0 source/encoder/search.h
>> --- a/source/encoder/search.h Tue Nov 04 09:46:14 2014 +0530
>> +++ b/source/encoder/search.h Wed Nov 05 16:23:42 2014 +0530
>> @@ -217,6 +217,7 @@
>> Cost() { rdcost = 0; bits = 0; distortion = 0; energy = 0; }
>> };
>>
>> + uint64_t deriveNullCost(uint32_t &dist, uint32_t &psyEnergy, uint32_t tuDepth, TextType compId);
>> void estimateResidualQT(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, ShortYuv& resiYuv, Cost& costs, uint32_t depthRange[2]);
>>
>> // estimate bit cost of residual QT
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>
> --
> Steve Borho
--
Steve Borho
More information about the x265-devel
mailing list