[x265] search: separate bSkipRes == true path
Satoshi Nakagawa
nakagawa424 at oki.com
Mon Jul 28 11:52:34 CEST 2014
# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1406540991 -32400
# Mon Jul 28 18:49:51 2014 +0900
# Node ID a4beebdb70524da737d4d5d11e6b55961b9ef988
# Parent 8bab5275baed85f8a6e183d7edfeba9a516a3669
search: separate bSkipRes == true path
diff -r 8bab5275baed -r a4beebdb7052 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jul 28 00:14:55 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jul 28 18:49:51 2014 +0900
@@ -2268,6 +2268,57 @@
mvmax.y = X265_MIN(mvmax.y, m_refLagPixels);
}
+void TEncSearch::encodeResAndCalcRdSkipCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, TComYuv* outReconYuv)
+{
+ X265_CHECK(!cu->isIntra(0), "intra CU not expected\n");
+
+ uint32_t log2CUSize = cu->getLog2CUSize(0);
+ uint32_t cuSize = 1 << log2CUSize;
+ uint8_t depth = cu->getDepth(0);
+
+ int hChromaShift = CHROMA_H_SHIFT(m_csp);
+ int vChromaShift = CHROMA_V_SHIFT(m_csp);
+
+ // No residual coding : SKIP mode
+
+ cu->setSkipFlagSubParts(true, 0, depth);
+ cu->setTrIdxSubParts(0, 0, depth);
+ cu->clearCbf(0, depth);
+
+ outReconYuv->copyFromYuv(predYuv);
+ // Luma
+ int part = partitionFromLog2Size(log2CUSize);
+ uint32_t distortion = primitives.sse_pp[part](fencYuv->getLumaAddr(), fencYuv->getStride(), outReconYuv->getLumaAddr(), outReconYuv->getStride());
+ // Chroma
+ part = partitionFromSizes(cuSize >> hChromaShift, cuSize >> vChromaShift);
+ distortion += m_rdCost.scaleChromaDistCb(primitives.sse_pp[part](fencYuv->getCbAddr(), fencYuv->getCStride(), outReconYuv->getCbAddr(), outReconYuv->getCStride()));
+ distortion += m_rdCost.scaleChromaDistCr(primitives.sse_pp[part](fencYuv->getCrAddr(), fencYuv->getCStride(), outReconYuv->getCrAddr(), outReconYuv->getCStride()));
+
+ m_entropyCoder->load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
+ m_entropyCoder->resetBits();
+ if (cu->m_slice->m_pps->bTransquantBypassEnabled)
+ m_entropyCoder->codeCUTransquantBypassFlag(cu, 0);
+ m_entropyCoder->codeSkipFlag(cu, 0);
+ m_entropyCoder->codeMergeIndex(cu, 0);
+
+ uint32_t bits = m_entropyCoder->getNumberOfWrittenBits();
+ cu->m_mvBits = bits;
+ cu->m_coeffBits = 0;
+ cu->m_totalBits = bits;
+ cu->m_totalDistortion = distortion;
+ if (m_rdCost.psyRdEnabled())
+ {
+ int size = log2CUSize - 2;
+ cu->m_psyEnergy = m_rdCost.psyCost(size, fencYuv->getLumaAddr(), fencYuv->getStride(),
+ outReconYuv->getLumaAddr(), outReconYuv->getStride());
+ cu->m_totalPsyCost = m_rdCost.calcPsyRdCost(cu->m_totalDistortion, cu->m_totalBits, cu->m_psyEnergy);
+ }
+ else
+ cu->m_totalRDCost = m_rdCost.calcRdCost(cu->m_totalDistortion, cu->m_totalBits);
+
+ m_entropyCoder->store(m_rdEntropyCoders[depth][CI_TEMP_BEST]);
+}
+
/** encode residual and calculate rate-distortion for a CU block
* \param cu
* \param fencYuv
@@ -2275,17 +2326,14 @@
* \param outResiYuv
* \param outBestResiYuv
* \param outReconYuv
- * \param bSkipRes
* \returns void
*/
void TEncSearch::encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* outResiYuv,
- ShortYuv* outBestResiYuv, TComYuv* outReconYuv, bool bSkipRes, bool curUseRDOQ)
+ ShortYuv* outBestResiYuv, TComYuv* outReconYuv, bool curUseRDOQ)
{
- if (cu->isIntra(0))
- return;
-
- uint32_t bits = 0, bestBits = 0, bestCoeffBits = 0;
- uint32_t distortion = 0, bestDist = 0;
+ X265_CHECK(!cu->isIntra(0), "intra CU not expected\n");
+
+ uint32_t bestBits = 0, bestCoeffBits = 0;
uint32_t log2CUSize = cu->getLog2CUSize(0);
uint32_t cuSize = 1 << log2CUSize;
@@ -2294,77 +2342,33 @@
int hChromaShift = CHROMA_H_SHIFT(m_csp);
int vChromaShift = CHROMA_V_SHIFT(m_csp);
- // No residual coding : SKIP mode
- if (bSkipRes)
+ m_trQuant.setQPforQuant(cu);
+
+ outResiYuv->subtract(fencYuv, predYuv, log2CUSize);
+
+ // Residual coding.
+ bool bIsTQBypassEnable = cu->m_slice->m_pps->bTransquantBypassEnabled;
+ uint32_t tqBypassMode = 1;
+
+ if (bIsTQBypassEnable)
{
- cu->setSkipFlagSubParts(true, 0, depth);
-
- outReconYuv->copyFromYuv(predYuv);
- // Luma
- int part = partitionFromLog2Size(log2CUSize);
- distortion = primitives.sse_pp[part](fencYuv->getLumaAddr(), fencYuv->getStride(), outReconYuv->getLumaAddr(), outReconYuv->getStride());
- // Chroma
- part = partitionFromSizes(cuSize >> hChromaShift, cuSize >> vChromaShift);
- distortion += m_rdCost.scaleChromaDistCb(primitives.sse_pp[part](fencYuv->getCbAddr(), fencYuv->getCStride(), outReconYuv->getCbAddr(), outReconYuv->getCStride()));
- distortion += m_rdCost.scaleChromaDistCr(primitives.sse_pp[part](fencYuv->getCrAddr(), fencYuv->getCStride(), outReconYuv->getCrAddr(), outReconYuv->getCStride()));
-
- m_entropyCoder->load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
- m_entropyCoder->resetBits();
- if (cu->m_slice->m_pps->bTransquantBypassEnabled)
- m_entropyCoder->codeCUTransquantBypassFlag(cu, 0);
- m_entropyCoder->codeSkipFlag(cu, 0);
- m_entropyCoder->codeMergeIndex(cu, 0);
-
- bits = m_entropyCoder->getNumberOfWrittenBits();
- cu->m_mvBits = bits;
- cu->m_coeffBits = 0;
- cu->m_totalBits = bits;
- cu->m_totalDistortion = distortion;
- if (m_rdCost.psyRdEnabled())
- {
- int size = log2CUSize - 2;
- cu->m_psyEnergy = m_rdCost.psyCost(size, fencYuv->getLumaAddr(), fencYuv->getStride(),
- outReconYuv->getLumaAddr(), outReconYuv->getStride());
- cu->m_totalPsyCost = m_rdCost.calcPsyRdCost(cu->m_totalDistortion, cu->m_totalBits, cu->m_psyEnergy);
- }
- else
- cu->m_totalRDCost = m_rdCost.calcRdCost(cu->m_totalDistortion, cu->m_totalBits);
-
- m_entropyCoder->store(m_rdEntropyCoders[depth][CI_TEMP_BEST]);
-
- cu->clearCbf(0, depth);
- cu->setTrIdxSubParts(0, 0, depth);
- return;
- }
-
- m_trQuant.setQPforQuant(cu);
-
- outResiYuv->subtract(fencYuv, predYuv, log2CUSize);
-
- // Residual coding.
- bool bIsTQBypassEnable = false, bIsLosslessMode = false;
- uint32_t tqBypassMode = 1;
-
- if ((cu->m_slice->m_pps->bTransquantBypassEnabled))
- {
- bIsTQBypassEnable = true; // mark that the first iteration is to cost TQB mode.
- tqBypassMode = 2;
- if (m_param->bLossless)
- tqBypassMode = 1;
+ // mark that the first iteration is to cost TQB mode.
+ if (!m_param->bLossless)
+ tqBypassMode = 2;
}
uint64_t bestCost = MAX_INT64;
for (uint32_t modeId = 0; modeId < tqBypassMode; modeId++)
{
- bIsLosslessMode = bIsTQBypassEnable && !modeId;
+ bool bIsLosslessMode = bIsTQBypassEnable && !modeId;
cu->setCUTransquantBypassSubParts(bIsLosslessMode, 0, depth);
uint64_t cost = 0;
uint32_t zeroDistortion = 0;
- bits = 0;
- distortion = 0;
+ uint32_t bits = 0;
+ uint32_t distortion = 0;
m_entropyCoder->load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
xEstimateResidualQT(cu, 0, fencYuv, predYuv, outResiYuv, depth, cost, bits, distortion, &zeroDistortion, curUseRDOQ);
@@ -2426,48 +2430,42 @@
bestCoeffBits = cu->m_coeffBits;
m_entropyCoder->store(m_rdEntropyCoders[depth][CI_TEMP_BEST]);
}
-
- X265_CHECK(bestCost != MAX_INT64, "no best cost\n");
-
- if (cu->getQtRootCbf(0))
- outReconYuv->addClip(predYuv, outBestResiYuv, log2CUSize);
- else
- outReconYuv->copyFromYuv(predYuv);
-
- // update with clipped distortion and cost (qp estimation loop uses unclipped values)
- int part = partitionFromLog2Size(log2CUSize);
- bestDist = primitives.sse_pp[part](fencYuv->getLumaAddr(), fencYuv->getStride(), outReconYuv->getLumaAddr(), outReconYuv->getStride());
- part = partitionFromSizes(cuSize >> hChromaShift, cuSize >> vChromaShift);
- bestDist += m_rdCost.scaleChromaDistCb(primitives.sse_pp[part](fencYuv->getCbAddr(), fencYuv->getCStride(), outReconYuv->getCbAddr(), outReconYuv->getCStride()));
- bestDist += m_rdCost.scaleChromaDistCr(primitives.sse_pp[part](fencYuv->getCrAddr(), fencYuv->getCStride(), outReconYuv->getCrAddr(), outReconYuv->getCStride()));
- if (m_rdCost.psyRdEnabled())
- {
- int size = log2CUSize - 2;
- cu->m_psyEnergy = m_rdCost.psyCost(size, fencYuv->getLumaAddr(), fencYuv->getStride(),
- outReconYuv->getLumaAddr(), outReconYuv->getStride());
- cu->m_totalPsyCost = m_rdCost.calcPsyRdCost(bestDist, bestBits, cu->m_psyEnergy);
- }
- else
- cu->m_totalRDCost = m_rdCost.calcRdCost(bestDist, bestBits);
- cu->m_totalBits = bestBits;
- cu->m_totalDistortion = bestDist;
- cu->m_coeffBits = bestCoeffBits;
- cu->m_mvBits = bestBits - bestCoeffBits;
-
- if (cu->isSkipped(0))
- cu->clearCbf(0, depth);
}
+
+ X265_CHECK(bestCost != MAX_INT64, "no best cost\n");
+
+ if (cu->getQtRootCbf(0))
+ outReconYuv->addClip(predYuv, outBestResiYuv, log2CUSize);
+ else
+ outReconYuv->copyFromYuv(predYuv);
+
+ // update with clipped distortion and cost (qp estimation loop uses unclipped values)
+ int part = partitionFromLog2Size(log2CUSize);
+ uint32_t bestDist = primitives.sse_pp[part](fencYuv->getLumaAddr(), fencYuv->getStride(), outReconYuv->getLumaAddr(), outReconYuv->getStride());
+ part = partitionFromSizes(cuSize >> hChromaShift, cuSize >> vChromaShift);
+ bestDist += m_rdCost.scaleChromaDistCb(primitives.sse_pp[part](fencYuv->getCbAddr(), fencYuv->getCStride(), outReconYuv->getCbAddr(), outReconYuv->getCStride()));
+ bestDist += m_rdCost.scaleChromaDistCr(primitives.sse_pp[part](fencYuv->getCrAddr(), fencYuv->getCStride(), outReconYuv->getCrAddr(), outReconYuv->getCStride()));
+ if (m_rdCost.psyRdEnabled())
+ {
+ int size = log2CUSize - 2;
+ cu->m_psyEnergy = m_rdCost.psyCost(size, fencYuv->getLumaAddr(), fencYuv->getStride(),
+ outReconYuv->getLumaAddr(), outReconYuv->getStride());
+ cu->m_totalPsyCost = m_rdCost.calcPsyRdCost(bestDist, bestBits, cu->m_psyEnergy);
+ }
+ else
+ cu->m_totalRDCost = m_rdCost.calcRdCost(bestDist, bestBits);
+
+ cu->m_totalBits = bestBits;
+ cu->m_totalDistortion = bestDist;
+ cu->m_coeffBits = bestCoeffBits;
+ cu->m_mvBits = bestBits - bestCoeffBits;
+
+ if (cu->isSkipped(0))
+ cu->clearCbf(0, depth);
}
-void TEncSearch::generateCoeffRecon(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, bool skipRes)
+void TEncSearch::generateCoeffRecon(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv)
{
- if (skipRes && cu->getPredictionMode(0) == MODE_INTER && cu->getMergeFlag(0) && cu->getPartitionSize(0) == SIZE_2Nx2N)
- {
- reconYuv->copyFromYuv(predYuv);
- cu->clearCbf(0, cu->getDepth(0));
- return;
- }
-
m_trQuant.setQPforQuant(cu);
if (cu->getPredictionMode(0) == MODE_INTER)
diff -r 8bab5275baed -r a4beebdb7052 source/Lib/TLibEncoder/TEncSearch.h
--- a/source/Lib/TLibEncoder/TEncSearch.h Mon Jul 28 00:14:55 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.h Mon Jul 28 18:49:51 2014 +0900
@@ -149,14 +149,15 @@
/// encode residual and compute rd-cost for inter mode
void encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, ShortYuv* bestResiYuv,
- TComYuv* reconYuv, bool bSkipRes, bool curUseRDOQ);
+ TComYuv* reconYuv, bool curUseRDOQ);
+ void encodeResAndCalcRdSkipCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, TComYuv* reconYuv);
void xRecurIntraCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* fencYuv,
TComYuv* predYuv, ShortYuv* resiYuv, uint32_t& distY, bool bCheckFirst,
uint64_t& dRDCost);
void xSetIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* reconYuv);
- void generateCoeffRecon(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, bool skipRes);
+ void generateCoeffRecon(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv);
void xEstimateResidualQT(TComDataCU* cu, uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, uint32_t depth,
uint64_t &rdCost, uint32_t &outBits, uint32_t &outDist, uint32_t *puiZeroDist, bool curUseRDOQ = true);
diff -r 8bab5275baed -r a4beebdb7052 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Mon Jul 28 00:14:55 2014 -0500
+++ b/source/encoder/analysis.cpp Mon Jul 28 18:49:51 2014 +0900
@@ -665,7 +665,7 @@
}
encodeResAndCalcRdInterCU(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth],
- m_bestResiYuv[depth], m_bestRecoYuv[depth], false, true);
+ m_bestResiYuv[depth], m_bestRecoYuv[depth], true);
uint64_t bestMergeCost = m_rdCost.psyRdEnabled() ? m_bestMergeCU[depth]->m_totalPsyCost : m_bestMergeCU[depth]->m_totalRDCost;
uint64_t bestCost = m_rdCost.psyRdEnabled() ? outBestCU->m_totalPsyCost : outBestCU->m_totalRDCost;
if (bestMergeCost < bestCost)
@@ -738,7 +738,7 @@
}
encodeResAndCalcRdInterCU(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth],
- m_bestResiYuv[depth], m_bestRecoYuv[depth], false, true);
+ m_bestResiYuv[depth], m_bestRecoYuv[depth], true);
m_rdEntropyCoders[depth][CI_TEMP_BEST].store(m_rdEntropyCoders[depth][CI_NEXT_BEST]);
}
else if (outBestCU->getPredictionMode(0) == MODE_INTRA)
@@ -763,10 +763,10 @@
}
m_tmpResiYuv[depth]->subtract(m_origYuv[depth], m_bestPredYuv[depth], outBestCU->getLog2CUSize(0));
- generateCoeffRecon(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth], m_bestRecoYuv[depth], false);
+ generateCoeffRecon(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth], m_bestRecoYuv[depth]);
}
else
- generateCoeffRecon(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth], m_bestRecoYuv[depth], false);
+ generateCoeffRecon(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth], m_bestRecoYuv[depth]);
}
else if (m_param->rdLevel == 0)
{
@@ -1419,13 +1419,13 @@
else
{
//No-residue mode
- encodeResAndCalcRdInterCU(outBestCU, m_origYuv[depth], bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], true, true);
+ encodeResAndCalcRdSkipCU(outBestCU, m_origYuv[depth], bestPredYuv, m_tmpRecoYuv[depth]);
std::swap(yuvReconBest, m_tmpRecoYuv[depth]);
m_rdEntropyCoders[depth][CI_TEMP_BEST].store(m_rdEntropyCoders[depth][CI_NEXT_BEST]);
}
//Encode with residue
- encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth], bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], false, true);
+ encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth], bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], true);
uint64_t tempCost = m_rdCost.psyRdEnabled() ? outTempCU->m_totalPsyCost : outTempCU->m_totalRDCost;
uint64_t bestCost = m_rdCost.psyRdEnabled() ? outBestCU->m_totalPsyCost : outBestCU->m_totalRDCost;
@@ -1487,14 +1487,20 @@
outTempCU->getPartIndexAndSize(0, m_partAddr, m_width, m_height);
motionCompensation(outTempCU, m_tmpPredYuv[depth], REF_PIC_LIST_X, true, true);
// estimate residual and encode everything
- encodeResAndCalcRdInterCU(outTempCU,
- m_origYuv[depth],
- m_tmpPredYuv[depth],
- m_tmpResiYuv[depth],
- m_bestResiYuv[depth],
- m_tmpRecoYuv[depth],
- !!noResidual,
- true);
+ if (noResidual)
+ encodeResAndCalcRdSkipCU(outTempCU,
+ m_origYuv[depth],
+ m_tmpPredYuv[depth],
+ m_tmpRecoYuv[depth]);
+ else
+ encodeResAndCalcRdInterCU(outTempCU,
+ m_origYuv[depth],
+ m_tmpPredYuv[depth],
+ m_tmpResiYuv[depth],
+ m_bestResiYuv[depth],
+ m_tmpRecoYuv[depth],
+ true);
+
/* Todo: Fix the satd cost estimates. Why is merge being chosen in high motion areas: estimated distortion is too low? */
if (!noResidual && !outTempCU->getQtRootCbf(0))
@@ -1577,7 +1583,7 @@
if (predInterSearch(outTempCU, m_tmpPredYuv[depth], bUseMRG, true))
{
- encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], false, true);
+ encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], true);
checkDQP(outTempCU);
checkBestMode(outBestCU, outTempCU, depth);
}
@@ -1922,7 +1928,7 @@
else
{
m_origYuv[0]->copyPartToYuv(m_origYuv[depth], absPartIdx);
- generateCoeffRecon(cu, m_origYuv[depth], m_modePredYuv[5][depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth], false);
+ generateCoeffRecon(cu, m_origYuv[depth], m_modePredYuv[5][depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth]);
checkDQP(cu);
m_tmpRecoYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), lcu->getAddr(), absPartIdx);
cu->copyCodedToPic(depth);
More information about the x265-devel
mailing list