[x265] [PATCH] [REVIEW PATCH/OUTPUT CHANGED]search: removed multiple encode Coefficients from estimateResidualQT()
ashok at multicorewareinc.com
ashok at multicorewareinc.com
Wed Nov 5 16:06:49 CET 2014
# HG changeset patch
# User Ashok Kumar Mishra<ashok at multicorewareinc.com>
# Date 1415199866 -19800
# Wed Nov 05 20:34:26 2014 +0530
# Node ID 3e415f2dc7377a4a8093128c017df9ff329d6eda
# Parent a1902139ce82796caa570405b63cbb78fc6fd442
[REVIEW PATCH/OUTPUT CHANGED]search: removed multiple encode Coefficients from estimateResidualQT()
Tried to remove multiple encode coefficients from estimateResidualQT() function.
Coefficients are encoded in three stages: Once for calculation of distortion and twice for split and unsplit
block cost calculation. I have given comments where I have changed the code.
diff -r a1902139ce82 -r 3e415f2dc737 source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp Wed Nov 05 20:34:26 2014 +0530
+++ b/source/encoder/entropy.cpp Wed Nov 05 20:34:26 2014 +0530
@@ -1501,6 +1501,13 @@
encodeBin(cbf, m_contextState[OFF_QT_ROOT_CBF_CTX]);
}
+uint32_t Entropy::estimateCbfBits(uint32_t cbf,TextType ttype, uint32_t trDepth)
+{
+ //This is an approximation. Encode the context bin to estimate the bits
+ uint32_t ctx = ctxCbf[ttype][trDepth];
+ return encodeBinContext(cbf, m_contextState[OFF_QT_CBF_CTX + ctx]);
+}
+
void Entropy::codeQtCbfZero(TextType ttype, uint32_t trDepth)
{
// this function is only used to estimate the bits when cbf is 0
@@ -2026,6 +2033,17 @@
writeOut();
}
+/** Return the bits of encoding the context bin specified without encoding it.*/
+uint32_t Entropy::encodeBinContext(uint32_t binValue, uint8_t &ctxModel)
+{
+ uint64_t fracBits = m_fracBits;
+ fracBits &= 32767;
+
+ fracBits += sbacGetEntropyBits(ctxModel, binValue);
+
+ return (uint32_t)(fracBits >> 15);
+}
+
/** Encode equiprobable bin */
void Entropy::encodeBinEP(uint32_t binValue)
{
diff -r a1902139ce82 -r 3e415f2dc737 source/encoder/entropy.h
--- a/source/encoder/entropy.h Wed Nov 05 20:34:26 2014 +0530
+++ b/source/encoder/entropy.h Wed Nov 05 20:34:26 2014 +0530
@@ -176,6 +176,7 @@
void codeQtRootCbfZero();
void codeCoeff(const CUData& cu, uint32_t absPartIdx, uint32_t depth, bool& bCodeDQP, uint32_t depthRange[2]);
void codeCoeffNxN(const CUData& cu, const coeff_t* coef, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype);
+ uint32_t estimateCbfBits(uint32_t cbf, TextType ttype, uint32_t trDepth);
uint32_t bitsIntraModeNonMPM() const;
uint32_t bitsIntraModeMPM(const uint32_t preds[3], uint32_t dir) const;
@@ -199,6 +200,7 @@
void encodeBinEP(uint32_t binValue);
void encodeBinsEP(uint32_t binValues, int numBins);
void encodeBinTrm(uint32_t binValue);
+ uint32_t encodeBinContext(uint32_t binValue, uint8_t &ctxModel);
void encodeCU(const CUData& cu, const CUGeom &cuGeom, uint32_t absPartIdx, uint32_t depth, bool& bEncodeDQP);
void finishCU(const CUData& cu, uint32_t absPartIdx, uint32_t depth);
diff -r a1902139ce82 -r 3e415f2dc737 source/encoder/search.cpp
--- a/source/encoder/search.cpp Wed Nov 05 20:34:26 2014 +0530
+++ b/source/encoder/search.cpp Wed Nov 05 20:34:26 2014 +0530
@@ -2714,11 +2714,10 @@
}
}
-uint64_t Search::deriveNullCost(uint32_t &dist, uint32_t &psyEnergy, uint32_t tuDepth, TextType compId)
+uint64_t Search::estimateNullCbfCost(uint32_t &dist, uint32_t &psyEnergy, uint32_t tuDepth, TextType compId)
{
- m_entropyCoder.resetBits();
- m_entropyCoder.codeQtCbfZero(compId, tuDepth);
- const uint32_t nullBits = m_entropyCoder.getNumberOfWrittenBits();
+ uint32_t nullBits = m_entropyCoder.estimateCbfBits(0, compId, tuDepth);
+
if (m_rdCost.m_psyRd)
return m_rdCost.calcPsyRdCost(dist, nullBits, psyEnergy);
else
@@ -2732,6 +2731,7 @@
bool bCheckSplit = log2TrSize > depthRange[0];
bool bCheckFull = log2TrSize <= depthRange[1];
+ bool bSplitPresentFlag = bCheckSplit && bCheckFull;
if (cu.m_partSize[absPartIdx] != SIZE_2Nx2N && depth == cu.m_cuDepth[absPartIdx] && bCheckSplit)
bCheckFull = false;
@@ -2757,9 +2757,9 @@
uint8_t cbfFlag[MAX_NUM_COMPONENT][2 /*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = { { 0, 0 }, {0, 0}, {0, 0} };
uint32_t numSig[MAX_NUM_COMPONENT][2 /*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = { { 0, 0 }, {0, 0}, {0, 0} };
- uint32_t singleBitsComp[MAX_NUM_COMPONENT][2 /*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = { { 0, 0 }, { 0, 0 }, { 0, 0 } };
- uint32_t singleDistComp[MAX_NUM_COMPONENT][2 /*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = { { 0, 0 }, { 0, 0 }, { 0, 0 } };
- uint32_t singlePsyEnergyComp[MAX_NUM_COMPONENT][2 /*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = { { 0, 0 }, { 0, 0 }, { 0, 0 } };
+ uint32_t singleBits[MAX_NUM_COMPONENT][2 /*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = { { 0, 0 }, { 0, 0 }, { 0, 0 } };
+ uint32_t singleDist[MAX_NUM_COMPONENT][2 /*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = { { 0, 0 }, { 0, 0 }, { 0, 0 } };
+ uint32_t singlePsyEnergy[MAX_NUM_COMPONENT][2 /*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = { { 0, 0 }, { 0, 0 }, { 0, 0 } };
uint32_t bestTransformMode[MAX_NUM_COMPONENT][2 /*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = { { 0, 0 }, { 0, 0 }, { 0, 0 } };
uint64_t minCost[MAX_NUM_COMPONENT][2 /*0 = top (or whole TU for non-4:2:2) sub-TU, 1 = bottom sub-TU*/] = { { MAX_INT64, MAX_INT64 }, {MAX_INT64, MAX_INT64}, {MAX_INT64, MAX_INT64} };
@@ -2796,48 +2796,19 @@
cbfFlag[TEXT_LUMA][0] = !!numSig[TEXT_LUMA][0];
m_entropyCoder.resetBits();
- m_entropyCoder.codeQtCbf(cbfFlag[TEXT_LUMA][0], TEXT_LUMA, tuDepth);
+
+ if (bSplitPresentFlag && log2TrSize > depthRange[0])
+ m_entropyCoder.codeTransformSubdivFlag(0, 5 - log2TrSize);
+ fullCost.bits = m_entropyCoder.getNumberOfWrittenBits();
+
+ // Coding luma cbf flag has been removed from here. The context for cbf flag is different for each depth.
+ // So it is valid if we encode coefficients and then cbfs at least for analysis.
+// m_entropyCoder.codeQtCbf(cbfFlag[TEXT_LUMA][0], TEXT_LUMA, tuDepth);
if (cbfFlag[TEXT_LUMA][0])
m_entropyCoder.codeCoeffNxN(cu, coeffCurY, absPartIdx, log2TrSize, TEXT_LUMA);
- singleBitsComp[TEXT_LUMA][0] = m_entropyCoder.getNumberOfWrittenBits();
-
- uint32_t singleBitsPrev = singleBitsComp[TEXT_LUMA][0];
-
- if (bCodeChroma)
- {
- uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
- for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
- {
- coeff_t* coeffCurC = m_rqt[qtLayer].coeffRQT[chromaId] + coeffOffsetC;
- TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, absPartIdxStep, absPartIdx);
-
- do
- {
- uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU;
- uint32_t subTUOffset = tuIterator.section << (log2TrSizeC * 2);
-
- cu.setTransformSkipPartRange(0, (TextType)chromaId, absPartIdxC, tuIterator.absPartIdxStep);
-
- if (m_bEnableRDOQ && (chromaId != TEXT_CHROMA_V))
- m_entropyCoder.estBit(m_entropyCoder.m_estBitsSbac, log2TrSizeC, false);
-
- fenc = const_cast<pixel*>(fencYuv->getChromaAddr(chromaId, absPartIdxC));
- resi = resiYuv.getChromaAddr(chromaId, absPartIdxC);
- numSig[chromaId][tuIterator.section] = m_quant.transformNxN(cu, fenc, fencYuv->m_csize, resi, resiYuv.m_csize, coeffCurC + subTUOffset, log2TrSizeC, (TextType)chromaId, absPartIdxC, false);
- cbfFlag[chromaId][tuIterator.section] = !!numSig[chromaId][tuIterator.section];
-
- m_entropyCoder.codeQtCbf(cbfFlag[chromaId][tuIterator.section], (TextType)chromaId, tuDepth);
- if (cbfFlag[chromaId][tuIterator.section])
- m_entropyCoder.codeCoeffNxN(cu, coeffCurC + subTUOffset, absPartIdxC, log2TrSizeC, (TextType)chromaId);
-
- uint32_t newBits = m_entropyCoder.getNumberOfWrittenBits();
- singleBitsComp[chromaId][tuIterator.section] = newBits - singleBitsPrev;
-
- singleBitsPrev = newBits;
- }
- while (tuIterator.isNextSection());
- }
- }
+
+ uint32_t singleBitsPrev = m_entropyCoder.getNumberOfWrittenBits();
+ singleBits[TEXT_LUMA][0] = singleBitsPrev - fullCost.bits;
X265_CHECK(log2TrSize <= 5, "log2TrSize is too large\n");
uint32_t distY = primitives.ssd_s[partSize](resiYuv.getLumaAddr(absPartIdx), resiYuv.m_size);
@@ -2852,28 +2823,35 @@
{
m_quant.invtransformNxN(cu.m_tqBypass[absPartIdx], curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSig[TEXT_LUMA][0]); //this is for inter mode only
+ // non-zero cost calculation for luma - This is an approximation
+ // finally we have to encode correct cbf after comparing with null cost
const uint32_t nonZeroDistY = primitives.sse_ss[partSize](resiYuv.getLumaAddr(absPartIdx), resiYuv.m_size, curResiY, strideResiY);
- uint32_t nonZeroPsyEnergyY = 0;
+ uint32_t nzCbfBitsY = m_entropyCoder.estimateCbfBits(cbfFlag[TEXT_LUMA][0], TEXT_LUMA, tuDepth);
+ uint32_t nonZeroPsyEnergyY = 0; uint64_t singleCostY = 0;
if (m_rdCost.m_psyRd)
+ {
nonZeroPsyEnergyY = m_rdCost.psyCost(partSize, resiYuv.getLumaAddr(absPartIdx), resiYuv.m_size, curResiY, strideResiY);
+ singleCostY = m_rdCost.calcPsyRdCost(nonZeroDistY, nzCbfBitsY + singleBits[TEXT_LUMA][0], nonZeroPsyEnergyY);
+ }
+ else
+ singleCostY = m_rdCost.calcRdCost(nonZeroDistY, nzCbfBitsY + singleBits[TEXT_LUMA][0]);
if (cu.m_tqBypass[0])
{
- distY = nonZeroDistY;
- psyEnergyY = nonZeroPsyEnergyY;
+ singleDist[TEXT_LUMA][0] = nonZeroDistY;
+ singlePsyEnergy[TEXT_LUMA][0] = nonZeroPsyEnergyY;
}
else
{
- uint64_t singleCostY = 0;
- if (m_rdCost.m_psyRd)
- singleCostY = m_rdCost.calcPsyRdCost(nonZeroDistY, singleBitsComp[TEXT_LUMA][0], nonZeroPsyEnergyY);
- else
- singleCostY = m_rdCost.calcRdCost(nonZeroDistY, singleBitsComp[TEXT_LUMA][0]);
-
- uint64_t nullCostY = deriveNullCost(distY, psyEnergyY, tuDepth, TEXT_LUMA);
+ // zero-cost calculation for luma. This is an approximation
+ // Initial cost calculation was also an approximation. First resetting the bit counter and then encoding zero cbf.
+ // Now encoding the zero cbf without writing into bitstream, keeping m_fracBits unchanged. The same is valid for chroma.
+ uint64_t nullCostY = estimateNullCbfCost(distY, psyEnergyY, tuDepth, TEXT_LUMA);
+
if (nullCostY < singleCostY)
{
cbfFlag[TEXT_LUMA][0] = 0;
+ singleBits[TEXT_LUMA][0] = 0;
#if CHECKED_BUILD || _DEBUG
uint32_t numCoeffY = 1 << (log2TrSize << 1);
memset(coeffCurY, 0, sizeof(coeff_t) * numCoeffY);
@@ -2881,32 +2859,33 @@
#endif
if (checkTransformSkipY)
minCost[TEXT_LUMA][0] = nullCostY;
+ singleDist[TEXT_LUMA][0] = distY;
+ singlePsyEnergy[TEXT_LUMA][0] = psyEnergyY;
}
else
{
- distY = nonZeroDistY;
- psyEnergyY = nonZeroPsyEnergyY;
if (checkTransformSkipY)
minCost[TEXT_LUMA][0] = singleCostY;
+ singleDist[TEXT_LUMA][0] = nonZeroDistY;
+ singlePsyEnergy[TEXT_LUMA][0] = nonZeroPsyEnergyY;
}
}
}
else
{
if (checkTransformSkipY)
- minCost[TEXT_LUMA][0] = deriveNullCost(distY, psyEnergyY, tuDepth, TEXT_LUMA);
+ minCost[TEXT_LUMA][0] = estimateNullCbfCost(distY, psyEnergyY, tuDepth, TEXT_LUMA);
primitives.blockfill_s[partSize](curResiY, strideResiY, 0);
+ singleDist[TEXT_LUMA][0] = distY;
+ singlePsyEnergy[TEXT_LUMA][0] = psyEnergyY;
}
- singleDistComp[TEXT_LUMA][0] = distY;
- singlePsyEnergyComp[TEXT_LUMA][0] = psyEnergyY;
-
cu.setCbfSubParts(cbfFlag[TEXT_LUMA][0] << tuDepth, TEXT_LUMA, absPartIdx, depth);
if (bCodeChroma)
{
- uint32_t strideResiC = m_rqt[qtLayer].resiQtYuv.m_csize;
uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
+ uint32_t strideResiC = m_rqt[qtLayer].resiQtYuv.m_csize;
for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
{
uint32_t distC = 0, psyEnergyC = 0;
@@ -2918,38 +2897,60 @@
uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU;
uint32_t subTUOffset = tuIterator.section << (log2TrSizeC * 2);
+ cu.setTransformSkipPartRange(0, (TextType)chromaId, absPartIdxC, tuIterator.absPartIdxStep);
+
+ if (m_bEnableRDOQ && (chromaId != TEXT_CHROMA_V))
+ m_entropyCoder.estBit(m_entropyCoder.m_estBitsSbac, log2TrSizeC, false);
+
+ fenc = const_cast<pixel*>(fencYuv->getChromaAddr(chromaId, absPartIdxC));
+ resi = resiYuv.getChromaAddr(chromaId, absPartIdxC);
+ numSig[chromaId][tuIterator.section] = m_quant.transformNxN(cu, fenc, fencYuv->m_csize, resi, resiYuv.m_csize, coeffCurC + subTUOffset, log2TrSizeC, (TextType)chromaId, absPartIdxC, false);
+ cbfFlag[chromaId][tuIterator.section] = !!numSig[chromaId][tuIterator.section];
+
+ //Coding cbf flags has been removed from here
+// m_entropyCoder.codeQtCbf(cbfFlag[chromaId][tuIterator.section], (TextType)chromaId, tuDepth);
+ if (cbfFlag[chromaId][tuIterator.section])
+ m_entropyCoder.codeCoeffNxN(cu, coeffCurC + subTUOffset, absPartIdxC, log2TrSizeC, (TextType)chromaId);
+ uint32_t newBits = m_entropyCoder.getNumberOfWrittenBits();
+ singleBits[chromaId][tuIterator.section] = newBits - singleBitsPrev;
+ singleBitsPrev = newBits;
+
int16_t *curResiC = m_rqt[qtLayer].resiQtYuv.getChromaAddr(chromaId, absPartIdxC);
-
distC = m_rdCost.scaleChromaDistCb(primitives.ssd_s[log2TrSizeC - 2](resiYuv.getChromaAddr(chromaId, absPartIdxC), resiYuv.m_csize));
if (cbfFlag[chromaId][tuIterator.section])
{
m_quant.invtransformNxN(cu.m_tqBypass[absPartIdxC], curResiC, strideResiC, coeffCurC + subTUOffset,
log2TrSizeC, (TextType)chromaId, false, false, numSig[chromaId][tuIterator.section]);
+
+ // non-zero cost calculation for luma, same as luma - This is an approximation
+ // finally we have to encode correct cbf after comparing with null cost
uint32_t dist = primitives.sse_ss[partSizeC](resiYuv.getChromaAddr(chromaId, absPartIdxC), resiYuv.m_csize, curResiC, strideResiC);
- const uint32_t nonZeroDistC = m_rdCost.scaleChromaDistCb(dist);
- uint32_t nonZeroPsyEnergyC = 0;
+ uint32_t nzCbfBitsC = m_entropyCoder.estimateCbfBits(cbfFlag[chromaId][tuIterator.section], (TextType)chromaId, tuDepth);
+ uint32_t nonZeroDistC = m_rdCost.scaleChromaDistCb(dist);
+ uint32_t nonZeroPsyEnergyC = 0; uint64_t singleCostC = 0;
if (m_rdCost.m_psyRd)
+ {
nonZeroPsyEnergyC = m_rdCost.psyCost(partSizeC, resiYuv.getChromaAddr(chromaId, absPartIdxC), resiYuv.m_csize, curResiC, strideResiC);
+ singleCostC = m_rdCost.calcPsyRdCost(nonZeroDistC, nzCbfBitsC + singleBits[chromaId][tuIterator.section], nonZeroPsyEnergyC);
+ }
+ else
+ singleCostC = m_rdCost.calcRdCost(nonZeroDistC, nzCbfBitsC + singleBits[chromaId][tuIterator.section]);
if (cu.m_tqBypass[0])
{
- distC = nonZeroDistC;
- psyEnergyC = nonZeroPsyEnergyC;
+ singleDist[chromaId][tuIterator.section] = nonZeroDistC;
+ singlePsyEnergy[chromaId][tuIterator.section] = nonZeroPsyEnergyC;
}
else
{
- uint64_t singleCostC = 0;
- if (m_rdCost.m_psyRd)
- singleCostC = m_rdCost.calcPsyRdCost(nonZeroDistC, singleBitsComp[chromaId][tuIterator.section], nonZeroPsyEnergyC);
- else
- singleCostC = m_rdCost.calcRdCost(nonZeroDistC, singleBitsComp[chromaId][tuIterator.section]);
-
- uint64_t nullCostC = deriveNullCost(distC, psyEnergyC, tuDepth, (TextType)chromaId);
+ //zero-cost calculation for chroma. This is an approximation
+ uint64_t nullCostC = estimateNullCbfCost(distC, psyEnergyC, tuDepth, (TextType)chromaId);
if (nullCostC < singleCostC)
{
cbfFlag[chromaId][tuIterator.section] = 0;
+ singleBits[chromaId][tuIterator.section] = 0;
#if CHECKED_BUILD || _DEBUG
uint32_t numCoeffC = 1 << (log2TrSizeC << 1);
memset(coeffCurC + subTUOffset, 0, sizeof(coeff_t) * numCoeffC);
@@ -2957,26 +2958,27 @@
#endif
if (checkTransformSkipC)
minCost[chromaId][tuIterator.section] = nullCostC;
+ singleDist[chromaId][tuIterator.section] = distC;
+ singlePsyEnergy[chromaId][tuIterator.section] = psyEnergyC;
}
else
{
- distC = nonZeroDistC;
- psyEnergyC = nonZeroPsyEnergyC;
if (checkTransformSkipC)
minCost[chromaId][tuIterator.section] = singleCostC;
+ singleDist[chromaId][tuIterator.section] = nonZeroDistC;
+ singlePsyEnergy[chromaId][tuIterator.section] = nonZeroPsyEnergyC;
}
}
}
else
{
if (checkTransformSkipC)
- minCost[chromaId][tuIterator.section] = deriveNullCost(distC, psyEnergyC, tuDepthC, (TextType)chromaId);
+ minCost[chromaId][tuIterator.section] = estimateNullCbfCost(distC, psyEnergyC, tuDepthC, (TextType)chromaId);
primitives.blockfill_s[partSizeC](curResiC, strideResiC, 0);
+ singleDist[chromaId][tuIterator.section] = distC;
+ singlePsyEnergy[chromaId][tuIterator.section] = psyEnergyC;
}
- singleDistComp[chromaId][tuIterator.section] = distC;
- singlePsyEnergyComp[chromaId][tuIterator.section] = psyEnergyC;
-
cu.setCbfPartRange(cbfFlag[chromaId][tuIterator.section] << tuDepth, (TextType)chromaId, absPartIdxC, tuIterator.absPartIdxStep);
}
while (tuIterator.isNextSection());
@@ -3027,8 +3029,8 @@
cu.setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, depth);
else
{
- singleDistComp[TEXT_LUMA][0] = nonZeroDistY;
- singlePsyEnergyComp[TEXT_LUMA][0] = nonZeroPsyEnergyY;
+ singleDist[TEXT_LUMA][0] = nonZeroDistY;
+ singlePsyEnergy[TEXT_LUMA][0] = nonZeroPsyEnergyY;
cbfFlag[TEXT_LUMA][0] = !!numSigTSkipY;
bestTransformMode[TEXT_LUMA][0] = 1;
uint32_t numCoeffY = 1 << (log2TrSize << 1);
@@ -3073,13 +3075,13 @@
uint32_t numSigTSkipC = m_quant.transformNxN(cu, fenc, fencYuv->m_csize, resi, resiYuv.m_csize, tsCoeffC, log2TrSizeC, (TextType)chromaId, absPartIdxC, true);
m_entropyCoder.resetBits();
- singleBitsComp[chromaId][tuIterator.section] = 0;
+ singleBits[chromaId][tuIterator.section] = 0;
if (numSigTSkipC)
{
m_entropyCoder.codeQtCbf(!!numSigTSkipC, (TextType)chromaId, tuDepth);
m_entropyCoder.codeCoeffNxN(cu, tsCoeffC, absPartIdxC, log2TrSizeC, (TextType)chromaId);
- singleBitsComp[chromaId][tuIterator.section] = m_entropyCoder.getNumberOfWrittenBits();
+ singleBits[chromaId][tuIterator.section] = m_entropyCoder.getNumberOfWrittenBits();
m_quant.invtransformNxN(cu.m_tqBypass[absPartIdxC], tsResiC, trSizeC, tsCoeffC,
log2TrSizeC, (TextType)chromaId, false, true, numSigTSkipC);
@@ -3088,18 +3090,18 @@
if (m_rdCost.m_psyRd)
{
nonZeroPsyEnergyC = m_rdCost.psyCost(partSizeC, resiYuv.getChromaAddr(chromaId, absPartIdxC), resiYuv.m_csize, tsResiC, trSizeC);
- singleCostC = m_rdCost.calcPsyRdCost(nonZeroDistC, singleBitsComp[chromaId][tuIterator.section], nonZeroPsyEnergyC);
+ singleCostC = m_rdCost.calcPsyRdCost(nonZeroDistC, singleBits[chromaId][tuIterator.section], nonZeroPsyEnergyC);
}
else
- singleCostC = m_rdCost.calcRdCost(nonZeroDistC, singleBitsComp[chromaId][tuIterator.section]);
+ singleCostC = m_rdCost.calcRdCost(nonZeroDistC, singleBits[chromaId][tuIterator.section]);
}
if (!numSigTSkipC || minCost[chromaId][tuIterator.section] < singleCostC)
cu.setTransformSkipPartRange(0, (TextType)chromaId, absPartIdxC, tuIterator.absPartIdxStep);
else
{
- singleDistComp[chromaId][tuIterator.section] = nonZeroDistC;
- singlePsyEnergyComp[chromaId][tuIterator.section] = nonZeroPsyEnergyC;
+ singleDist[chromaId][tuIterator.section] = nonZeroDistC;
+ singlePsyEnergy[chromaId][tuIterator.section] = nonZeroPsyEnergyC;
cbfFlag[chromaId][tuIterator.section] = !!numSigTSkipC;
bestTransformMode[chromaId][tuIterator.section] = 1;
uint32_t numCoeffC = 1 << (log2TrSizeC << 1);
@@ -3113,13 +3115,14 @@
}
}
+ // Here we were encoding cbfs and coefficients, after calculating distortion above.
+ // Now I am encoding only cbfs, since I have encoded coefficients above. I have just collected
+ // bits required for coefficients and added with number of cbf bits. As I tested the order does not
+ // make any difference. But bit confused whether I should load the original context as below.
m_entropyCoder.load(m_rqt[depth].rqtRoot);
-
m_entropyCoder.resetBits();
- if (log2TrSize > depthRange[0])
- m_entropyCoder.codeTransformSubdivFlag(0, 5 - log2TrSize);
-
+ //Encode cbf flags
if (bCodeChroma)
{
for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
@@ -3136,43 +3139,31 @@
}
m_entropyCoder.codeQtCbf(cbfFlag[TEXT_LUMA][0], TEXT_LUMA, tuDepth);
- if (cbfFlag[TEXT_LUMA][0])
- m_entropyCoder.codeCoeffNxN(cu, coeffCurY, absPartIdx, log2TrSize, TEXT_LUMA);
-
- if (bCodeChroma)
- {
- uint32_t subTUSize = 1 << (log2TrSizeC * 2);
- uint32_t partIdxesPerSubTU = absPartIdxStep >> 1;
- uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
-
- for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
- {
- coeff_t* coeffCurC = m_rqt[qtLayer].coeffRQT[chromaId] + coeffOffsetC;
- if (!splitIntoSubTUs)
- {
- if (cbfFlag[chromaId][0])
- m_entropyCoder.codeCoeffNxN(cu, coeffCurC, absPartIdx, log2TrSizeC, (TextType)chromaId);
- }
- else
- {
- for (uint32_t subTU = 0; subTU < 2; subTU++)
- {
- if (cbfFlag[chromaId][subTU])
- m_entropyCoder.codeCoeffNxN(cu, coeffCurC + subTU * subTUSize, absPartIdx + subTU * partIdxesPerSubTU, log2TrSizeC, (TextType)chromaId);
- }
- }
- }
- }
-
- fullCost.distortion += singleDistComp[TEXT_LUMA][0];
- fullCost.energy += singlePsyEnergyComp[TEXT_LUMA][0];// need to check we need to add chroma also
+
+ uint32_t cbfBits = m_entropyCoder.getNumberOfWrittenBits();
+
+ uint32_t coeffBits = 0;
+ coeffBits = singleBits[TEXT_LUMA][0];
for (uint32_t subTUIndex = 0; subTUIndex < 2; subTUIndex++)
{
- fullCost.distortion += singleDistComp[TEXT_CHROMA_U][subTUIndex];
- fullCost.distortion += singleDistComp[TEXT_CHROMA_V][subTUIndex];
+ coeffBits += singleBits[TEXT_CHROMA_U][subTUIndex];
+ coeffBits += singleBits[TEXT_CHROMA_V][subTUIndex];
}
- fullCost.bits = m_entropyCoder.getNumberOfWrittenBits();
+ // In split mode, we need only coeffBits. The reason is encoding chroma cbfs is different from luma.
+ // In case of chroma, if any one of the splitted block's cbf is 1, then we need to encode cbf 1, and then for
+ // four splitted block's individual cbf value. This is not known before analysis of four splitted blocks.
+ // For that reason, I am collecting individual coefficient bits only.
+ fullCost.bits = bSplitPresentFlag ? cbfBits + coeffBits : coeffBits;
+
+ fullCost.distortion += singleDist[TEXT_LUMA][0];
+ fullCost.energy += singlePsyEnergy[TEXT_LUMA][0];// need to check we need to add chroma also
+ for (uint32_t subTUIndex = 0; subTUIndex < 2; subTUIndex++)
+ {
+ fullCost.distortion += singleDist[TEXT_CHROMA_U][subTUIndex];
+ fullCost.distortion += singleDist[TEXT_CHROMA_V][subTUIndex];
+ }
+
if (m_rdCost.m_psyRd)
fullCost.rdcost = m_rdCost.calcPsyRdCost(fullCost.distortion, fullCost.bits, fullCost.energy);
else
@@ -3189,6 +3180,14 @@
}
Cost splitCost;
+ if (bSplitPresentFlag && (log2TrSize <= depthRange[1] && log2TrSize > depthRange[0]))
+ {
+ // Subdiv flag can be encoded at the start of anlysis of splitted blocks.
+ m_entropyCoder.resetBits();
+ m_entropyCoder.codeTransformSubdivFlag(1, 5 - log2TrSize);
+ splitCost.bits = m_entropyCoder.getNumberOfWrittenBits();
+ }
+
const uint32_t qPartNumSubdiv = NUM_CU_PARTITIONS >> ((depth + 1) << 1);
uint32_t ycbf = 0, ucbf = 0, vcbf = 0;
for (uint32_t i = 0; i < 4; ++i)
@@ -3205,15 +3204,16 @@
cu.m_cbf[2][absPartIdx + i] |= vcbf << tuDepth;
}
+ // Here we were encoding cbfs and coefficients for splitted blocks. Since I have collected coefficient bits
+ // for each individual blocks, only encoding cbf values. As I mentioned encoding chroma cbfs is different then luma.
+ // But have one doubt that if coefficients are encoded in context at depth 2 (for example) and cbfs are encoded in context
+ // at depth 0 (for example).
m_entropyCoder.load(m_rqt[depth].rqtRoot);
m_entropyCoder.resetBits();
codeInterSubdivCbfQT(cu, absPartIdx, depth, depthRange);
- encodeResidualQT(cu, absPartIdx, depth, TEXT_LUMA, depthRange);
- encodeResidualQT(cu, absPartIdx, depth, TEXT_CHROMA_U, depthRange);
- encodeResidualQT(cu, absPartIdx, depth, TEXT_CHROMA_V, depthRange);
-
- splitCost.bits = m_entropyCoder.getNumberOfWrittenBits();
+ uint32_t splitCbfBits = m_entropyCoder.getNumberOfWrittenBits();
+ splitCost.bits += splitCbfBits;
if (m_rdCost.m_psyRd)
splitCost.rdcost = m_rdCost.calcPsyRdCost(splitCost.distortion, splitCost.bits, splitCost.energy);
@@ -3293,9 +3293,6 @@
const bool bSubdiv = curTuDepth != tuDepth;
const uint32_t log2TrSize = g_maxLog2CUSize - depth;
- if (log2TrSize <= depthRange[1] && log2TrSize > depthRange[0])
- m_entropyCoder.codeTransformSubdivFlag(bSubdiv, 5 - log2TrSize);
-
const bool splitIntoSubTUs = (m_csp == X265_CSP_I422);
uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
uint32_t trWidthC = 1 << log2TrSizeC;
diff -r a1902139ce82 -r 3e415f2dc737 source/encoder/search.h
--- a/source/encoder/search.h Wed Nov 05 20:34:26 2014 +0530
+++ b/source/encoder/search.h Wed Nov 05 20:34:26 2014 +0530
@@ -218,7 +218,7 @@
Cost() { rdcost = 0; bits = 0; distortion = 0; energy = 0; }
};
- uint64_t deriveNullCost(uint32_t &dist, uint32_t &psyEnergy, uint32_t tuDepth, TextType compId);
+ uint64_t estimateNullCbfCost(uint32_t &dist, uint32_t &psyEnergy, uint32_t tuDepth, TextType compId);
void estimateResidualQT(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, ShortYuv& resiYuv, Cost& costs, uint32_t depthRange[2]);
// estimate bit cost of residual QT
More information about the x265-devel
mailing list