[x265] [PATCH] search: made a function for null cost calculation in xEstimateResidualQT()
ashok at multicorewareinc.com
ashok at multicorewareinc.com
Wed Nov 5 16:05:51 CET 2014
# HG changeset patch
# User Ashok Kumar Mishra<ashok at multicorewareinc.com>
# Date 1415184822 -19800
# Wed Nov 05 16:23:42 2014 +0530
# Node ID 18344f74ded0e192bc7177a217e9112c9de31983
# Parent 2a8f3d5820a6ebe0937ce73fa81154c263df2ae9
search: made a function for null cost calculation in xEstimateResidualQT()
diff -r 2a8f3d5820a6 -r 18344f74ded0 source/encoder/search.cpp
--- a/source/encoder/search.cpp Tue Nov 04 09:46:14 2014 +0530
+++ b/source/encoder/search.cpp Wed Nov 05 16:23:42 2014 +0530
@@ -2714,6 +2714,17 @@
}
}
+uint64_t Search::deriveNullCost(uint32_t &dist, uint32_t &psyEnergy, uint32_t tuDepth, TextType compId)
+{
+ m_entropyCoder.resetBits();
+ m_entropyCoder.codeQtCbfZero(compId, tuDepth);
+ const uint32_t nullBits = m_entropyCoder.getNumberOfWrittenBits();
+ if (m_rdCost.m_psyRd)
+ return m_rdCost.calcPsyRdCost(dist, nullBits, psyEnergy);
+ else
+ return m_rdCost.calcRdCost(dist, nullBits);
+}
+
void Search::estimateResidualQT(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, ShortYuv& resiYuv, Cost& outCosts, uint32_t depthRange[2])
{
CUData& cu = mode.cu;
@@ -2828,9 +2839,6 @@
}
}
- const uint32_t numCoeffY = 1 << (log2TrSize * 2);
- const uint32_t numCoeffC = 1 << (log2TrSizeC * 2);
-
X265_CHECK(log2TrSize <= 5, "log2TrSize is too large\n");
uint32_t distY = primitives.ssd_s[partSize](resiYuv.getLumaAddr(absPartIdx), resiYuv.m_size);
uint32_t psyEnergyY = 0;
@@ -2861,19 +2869,15 @@
singleCostY = m_rdCost.calcPsyRdCost(nonZeroDistY, singleBitsComp[TEXT_LUMA][0], nonZeroPsyEnergyY);
else
singleCostY = m_rdCost.calcRdCost(nonZeroDistY, singleBitsComp[TEXT_LUMA][0]);
- m_entropyCoder.resetBits();
- m_entropyCoder.codeQtCbfZero(TEXT_LUMA, tuDepth);
- const uint32_t nullBitsY = m_entropyCoder.getNumberOfWrittenBits();
- uint64_t nullCostY = 0;
- if (m_rdCost.m_psyRd)
- nullCostY = m_rdCost.calcPsyRdCost(distY, nullBitsY, psyEnergyY);
- else
- nullCostY = m_rdCost.calcRdCost(distY, nullBitsY);
+
+ uint64_t nullCostY = deriveNullCost(distY, psyEnergyY, tuDepth, TEXT_LUMA);
if (nullCostY < singleCostY)
{
cbfFlag[TEXT_LUMA][0] = 0;
#if CHECKED_BUILD || _DEBUG
+ uint32_t numCoeffY = 1 << (log2TrSize << 1);
memset(coeffCurY, 0, sizeof(coeff_t) * numCoeffY);
+ primitives.blockfill_s[partSize](curResiY, strideResiY, 0);
#endif
if (checkTransformSkipY)
minCost[TEXT_LUMA][0] = nullCostY;
@@ -2887,21 +2891,16 @@
}
}
}
- else if (checkTransformSkipY)
+ else
{
- m_entropyCoder.resetBits();
- m_entropyCoder.codeQtCbfZero(TEXT_LUMA, tuDepth);
- const uint32_t nullBitsY = m_entropyCoder.getNumberOfWrittenBits();
- if (m_rdCost.m_psyRd)
- minCost[TEXT_LUMA][0] = m_rdCost.calcPsyRdCost(distY, nullBitsY, psyEnergyY);
- else
- minCost[TEXT_LUMA][0] = m_rdCost.calcRdCost(distY, nullBitsY);
+ if (checkTransformSkipY)
+ minCost[TEXT_LUMA][0] = deriveNullCost(distY, psyEnergyY, tuDepth, TEXT_LUMA);
+ primitives.blockfill_s[partSize](curResiY, strideResiY, 0);
}
singleDistComp[TEXT_LUMA][0] = distY;
singlePsyEnergyComp[TEXT_LUMA][0] = psyEnergyY;
- if (!cbfFlag[TEXT_LUMA][0])
- primitives.blockfill_s[partSize](curResiY, strideResiY, 0);
+
cu.setCbfSubParts(cbfFlag[TEXT_LUMA][0] << tuDepth, TEXT_LUMA, absPartIdx, depth);
if (bCodeChroma)
@@ -2945,19 +2944,16 @@
singleCostC = m_rdCost.calcPsyRdCost(nonZeroDistC, singleBitsComp[chromaId][tuIterator.section], nonZeroPsyEnergyC);
else
singleCostC = m_rdCost.calcRdCost(nonZeroDistC, singleBitsComp[chromaId][tuIterator.section]);
- m_entropyCoder.resetBits();
- m_entropyCoder.codeQtCbfZero((TextType)chromaId, tuDepth);
- const uint32_t nullBitsC = m_entropyCoder.getNumberOfWrittenBits();
- uint64_t nullCostC = 0;
- if (m_rdCost.m_psyRd)
- nullCostC = m_rdCost.calcPsyRdCost(distC, nullBitsC, psyEnergyC);
- else
- nullCostC = m_rdCost.calcRdCost(distC, nullBitsC);
+
+ uint64_t nullCostC = deriveNullCost(distC, psyEnergyC, tuDepth, (TextType)chromaId);
+
if (nullCostC < singleCostC)
{
cbfFlag[chromaId][tuIterator.section] = 0;
#if CHECKED_BUILD || _DEBUG
+ uint32_t numCoeffC = 1 << (log2TrSizeC << 1);
memset(coeffCurC + subTUOffset, 0, sizeof(coeff_t) * numCoeffC);
+ primitives.blockfill_s[partSizeC](curResiC, strideResiC, 0);
#endif
if (checkTransformSkipC)
minCost[chromaId][tuIterator.section] = nullCostC;
@@ -2971,23 +2967,16 @@
}
}
}
- else if (checkTransformSkipC)
+ else
{
- m_entropyCoder.resetBits();
- m_entropyCoder.codeQtCbfZero((TextType)chromaId, tuDepthC);
- const uint32_t nullBitsC = m_entropyCoder.getNumberOfWrittenBits();
- if (m_rdCost.m_psyRd)
- minCost[chromaId][tuIterator.section] = m_rdCost.calcPsyRdCost(distC, nullBitsC, psyEnergyC);
- else
- minCost[chromaId][tuIterator.section] = m_rdCost.calcRdCost(distC, nullBitsC);
+ if (checkTransformSkipC)
+ minCost[chromaId][tuIterator.section] = deriveNullCost(distC, psyEnergyC, tuDepthC, (TextType)chromaId);
+ primitives.blockfill_s[partSizeC](curResiC, strideResiC, 0);
}
singleDistComp[chromaId][tuIterator.section] = distC;
singlePsyEnergyComp[chromaId][tuIterator.section] = psyEnergyC;
- if (!cbfFlag[chromaId][tuIterator.section])
- primitives.blockfill_s[partSizeC](curResiC, strideResiC, 0);
-
cu.setCbfPartRange(cbfFlag[chromaId][tuIterator.section] << tuDepth, (TextType)chromaId, absPartIdxC, tuIterator.absPartIdxStep);
}
while (tuIterator.isNextSection());
@@ -3042,6 +3031,7 @@
singlePsyEnergyComp[TEXT_LUMA][0] = nonZeroPsyEnergyY;
cbfFlag[TEXT_LUMA][0] = !!numSigTSkipY;
bestTransformMode[TEXT_LUMA][0] = 1;
+ uint32_t numCoeffY = 1 << (log2TrSize << 1);
memcpy(coeffCurY, tsCoeffY, sizeof(coeff_t) * numCoeffY);
primitives.square_copy_ss[partSize](curResiY, strideResiY, tsResiY, trSize);
}
@@ -3112,6 +3102,7 @@
singlePsyEnergyComp[chromaId][tuIterator.section] = nonZeroPsyEnergyC;
cbfFlag[chromaId][tuIterator.section] = !!numSigTSkipC;
bestTransformMode[chromaId][tuIterator.section] = 1;
+ uint32_t numCoeffC = 1 << (log2TrSizeC << 1);
memcpy(coeffCurC + subTUOffset, tsCoeffC, sizeof(coeff_t) * numCoeffC);
primitives.square_copy_ss[partSizeC](curResiC, strideResiC, tsResiC, trSizeC);
}
diff -r 2a8f3d5820a6 -r 18344f74ded0 source/encoder/search.h
--- a/source/encoder/search.h Tue Nov 04 09:46:14 2014 +0530
+++ b/source/encoder/search.h Wed Nov 05 16:23:42 2014 +0530
@@ -217,6 +217,7 @@
Cost() { rdcost = 0; bits = 0; distortion = 0; energy = 0; }
};
+ uint64_t deriveNullCost(uint32_t &dist, uint32_t &psyEnergy, uint32_t tuDepth, TextType compId);
void estimateResidualQT(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, ShortYuv& resiYuv, Cost& costs, uint32_t depthRange[2]);
// estimate bit cost of residual QT
More information about the x265-devel
mailing list