<div dir="ltr"><div>Thanks, Min. This is a solution but will affect performance with an extra TComDataCU*. I have sent another patch where I'm just re-encoding the CU if lossless is chosen as the best mode. This will not affect normal analysis. Can you review that?<br>
<br></div>Deepthi<br></div><div class="gmail_extra"><br><br><div class="gmail_quote">On Tue, Aug 26, 2014 at 3:47 AM, Steve Borho <span dir="ltr"><<a href="mailto:steve@borho.org" target="_blank">steve@borho.org</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Min Chen <<a href="mailto:chenm003@163.com">chenm003@163.com</a>><br>
# Date 1409002891 18000<br>
# Mon Aug 25 16:41:31 2014 -0500<br>
# Node ID 0bf2756898bc78e5660a6b607b2f3cda97834264<br>
# Parent 5acfb12ec5d17cc700e313fc99248e2408e5967b<br>
analysis: fix inter hash mistake with --cu-lossless<br>
<br>
diff -r 5acfb12ec5d1 -r 0bf2756898bc source/Lib/TLibEncoder/TEncSearch.cpp<br>
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 17:53:12 2014 +0900<br>
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 16:41:31 2014 -0500<br>
@@ -2293,7 +2293,7 @@<br>
* \returns void<br>
*/<br>
void TEncSearch::encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* outResiYuv,<br>
- ShortYuv* outBestResiYuv, TComYuv* outReconYuv)<br>
+ ShortYuv* outBestResiYuv, TComYuv* outReconYuv, TComDataCU* tmpCu)<br>
{<br>
X265_CHECK(!cu->isIntra(0), "intra CU not expected\n");<br>
<br>
@@ -2321,6 +2321,7 @@<br>
}<br>
<br>
uint64_t bestCost = MAX_INT64;<br>
+ bool bestTransquantBypassFlag = bIsTQBypassEnable;<br>
<br>
for (uint32_t modeId = 0; modeId < numModes; modeId++)<br>
{<br>
@@ -2388,15 +2389,29 @@<br>
if (cu->getQtRootCbf(0))<br>
xSetResidualQTData(cu, 0, outBestResiYuv, depth, true);<br>
<br>
+ bestTransquantBypassFlag = bIsLosslessMode;<br>
bestBits = bits;<br>
bestCost = cost;<br>
bestCoeffBits = cu->m_coeffBits;<br>
m_entropyCoder->store(m_rdEntropyCoders[depth][CI_TEMP_BEST]);<br>
}<br>
+<br>
+ // Save lossless mode coeff<br>
+ if (bIsLosslessMode)<br>
+ {<br>
+ tmpCu->copyPartFrom(cu, 0, depth, false);<br>
+ }<br>
}<br>
<br>
X265_CHECK(bestCost != MAX_INT64, "no best cost\n");<br>
<br>
+ if (bestTransquantBypassFlag && !m_param->bLossless)<br>
+ {<br>
+ assert(log2CUSize > 2);<br>
+ cu->setCUTransquantBypassSubParts(true, 0, depth);<br>
+ cu->copyPartFrom(tmpCu, 0, depth, false);<br>
+ }<br>
+<br>
if (cu->getQtRootCbf(0))<br>
outReconYuv->addClip(predYuv, outBestResiYuv, log2CUSize);<br>
else<br>
diff -r 5acfb12ec5d1 -r 0bf2756898bc source/Lib/TLibEncoder/TEncSearch.h<br>
--- a/source/Lib/TLibEncoder/TEncSearch.h Mon Aug 25 17:53:12 2014 +0900<br>
+++ b/source/Lib/TLibEncoder/TEncSearch.h Mon Aug 25 16:41:31 2014 -0500<br>
@@ -147,7 +147,7 @@<br>
<br>
/// encode residual and compute rd-cost for inter mode<br>
void encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, ShortYuv* bestResiYuv,<br>
- TComYuv* reconYuv);<br>
+ TComYuv* reconYuv, TComDataCU* tmpCu);<br>
void encodeResAndCalcRdSkipCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, TComYuv* reconYuv);<br>
<br>
void xRecurIntraCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* fencYuv,<br>
diff -r 5acfb12ec5d1 -r 0bf2756898bc source/encoder/analysis.cpp<br>
--- a/source/encoder/analysis.cpp Mon Aug 25 17:53:12 2014 +0900<br>
+++ b/source/encoder/analysis.cpp Mon Aug 25 16:41:31 2014 -0500<br>
@@ -82,7 +82,7 @@<br>
uint32_t sizeL = cuSize * cuSize;<br>
uint32_t sizeC = sizeL >> (CHROMA_H_SHIFT(csp) + CHROMA_V_SHIFT(csp));<br>
<br>
- ok &= m_memPool[i].initialize(numPartitions, sizeL, sizeC, 8, tqBypass);<br>
+ ok &= m_memPool[i].initialize(numPartitions, sizeL, sizeC, 9, tqBypass);<br>
<br>
m_interCU_2Nx2N[i] = new TComDataCU;<br>
m_interCU_2Nx2N[i]->create(&m_memPool[i], numPartitions, cuSize, csp, 0, tqBypass);<br>
@@ -108,6 +108,9 @@<br>
m_tempCU[i] = new TComDataCU;<br>
m_tempCU[i]->create(&m_memPool[i], numPartitions, cuSize, csp, 7, tqBypass);<br>
<br>
+ m_tempLosslessCU[i] = new TComDataCU;<br>
+ m_tempLosslessCU[i]->create(&m_memPool[i], numPartitions, cuSize, csp, 8, tqBypass);<br>
+<br>
m_bestPredYuv[i] = new TComYuv;<br>
ok &= m_bestPredYuv[i]->create(cuSize, cuSize, csp);<br>
<br>
@@ -158,6 +161,7 @@<br>
delete m_bestMergeCU[i];<br>
delete m_bestCU[i];<br>
delete m_tempCU[i];<br>
+ delete m_tempLosslessCU[i];<br>
<br>
if (m_bestPredYuv && m_bestPredYuv[i])<br>
{<br>
@@ -240,6 +244,7 @@<br>
// initialize CU data<br>
m_bestCU[0]->initCU(cu->m_pic, cu->getAddr());<br>
m_tempCU[0]->initCU(cu->m_pic, cu->getAddr());<br>
+ m_tempLosslessCU[0]->initCU(cu->m_pic, cu->getAddr());<br>
<br>
// analysis of CU<br>
uint32_t numPartition = cu->getTotalNumPart();<br>
@@ -394,6 +399,7 @@<br>
uint32_t nextDepth = depth + 1;<br>
TComDataCU* subBestPartCU = m_bestCU[nextDepth];<br>
TComDataCU* subTempPartCU = m_tempCU[nextDepth];<br>
+ TComDataCU* subTempLosslessPartCU = m_tempLosslessCU[nextDepth];<br>
for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++)<br>
{<br>
int qp = outTempCU->getQP(0);<br>
@@ -404,6 +410,7 @@<br>
(subBestPartCU->getCUPelY() < slice->m_sps->picHeightInLumaSamples)))<br>
{<br>
subTempPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init.<br>
+ subTempLosslessPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init.<br>
if (0 == partUnitIdx) //initialize RD with previous depth buffer<br>
{<br>
m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[depth][CI_CURR_BEST]);<br>
@@ -663,7 +670,7 @@<br>
}<br>
<br>
encodeResAndCalcRdInterCU(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth],<br>
- m_bestResiYuv[depth], m_bestRecoYuv[depth]);<br>
+ m_bestResiYuv[depth], m_bestRecoYuv[depth], m_tempLosslessCU[depth]);<br>
uint64_t bestMergeCost = m_rdCost.m_psyRd ? m_bestMergeCU[depth]->m_totalPsyCost : m_bestMergeCU[depth]->m_totalRDCost;<br>
uint64_t bestCost = m_rdCost.m_psyRd ? outBestCU->m_totalPsyCost : outBestCU->m_totalRDCost;<br>
if (bestMergeCost < bestCost)<br>
@@ -733,7 +740,7 @@<br>
}<br>
<br>
encodeResAndCalcRdInterCU(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth],<br>
- m_bestResiYuv[depth], m_bestRecoYuv[depth]);<br>
+ m_bestResiYuv[depth], m_bestRecoYuv[depth], m_tempLosslessCU[depth]);<br>
m_rdEntropyCoders[depth][CI_TEMP_BEST].store(m_rdEntropyCoders[depth][CI_NEXT_BEST]);<br>
}<br>
else if (outBestCU->getPredictionMode(0) == MODE_INTRA)<br>
@@ -880,10 +887,12 @@<br>
outTempCU->setQPSubParts(qp, 0, depth);<br>
uint32_t nextDepth = depth + 1;<br>
TComDataCU* subTempPartCU = m_tempCU[nextDepth];<br>
+ TComDataCU* subTempLosslessPartCU = m_tempLosslessCU[nextDepth];<br>
for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++)<br>
{<br>
TComDataCU* subBestPartCU = NULL;<br>
subTempPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init.<br>
+ subTempLosslessPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp);<br>
<br>
if (bInsidePicture ||<br>
((subTempPartCU->getCUPelX() < slice->m_sps->picWidthInLumaSamples) &&<br>
@@ -1258,10 +1267,12 @@<br>
uint32_t nextDepth = depth + 1;<br>
TComDataCU* subBestPartCU = m_bestCU[nextDepth];<br>
TComDataCU* subTempPartCU = m_tempCU[nextDepth];<br>
+ TComDataCU* subTempLosslessPartCU = m_tempLosslessCU[nextDepth];<br>
for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++)<br>
{<br>
int qp = outTempCU->getQP(0);<br>
subBestPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init.<br>
+ subTempLosslessPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp);<br>
<br>
if (bInsidePicture ||<br>
((subBestPartCU->getCUPelX() < slice->m_sps->picWidthInLumaSamples) &&<br>
@@ -1433,7 +1444,7 @@<br>
}<br>
<br>
//Encode with residue<br>
- encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth], bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth]);<br>
+ encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth], bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], m_tempLosslessCU[depth]);<br>
<br>
uint64_t tempCost = m_rdCost.m_psyRd ? outTempCU->m_totalPsyCost : outTempCU->m_totalRDCost;<br>
uint64_t bestCost = m_rdCost.m_psyRd ? outBestCU->m_totalPsyCost : outBestCU->m_totalRDCost;<br>
@@ -1506,7 +1517,8 @@<br>
m_tmpPredYuv[depth],<br>
m_tmpResiYuv[depth],<br>
m_bestResiYuv[depth],<br>
- m_tmpRecoYuv[depth]);<br>
+ m_tmpRecoYuv[depth],<br>
+ m_tempLosslessCU[depth]);<br>
<br>
<br>
/* Todo: Fix the satd cost estimates. Why is merge being chosen in high motion areas: estimated distortion is too low? */<br>
@@ -1590,7 +1602,7 @@<br>
<br>
if (predInterSearch(outTempCU, m_tmpPredYuv[depth], bUseMRG, true))<br>
{<br>
- encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth]);<br>
+ encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], m_tempLosslessCU[depth]);<br>
checkDQP(outTempCU);<br>
checkBestMode(outBestCU, outTempCU, depth);<br>
}<br>
diff -r 5acfb12ec5d1 -r 0bf2756898bc source/encoder/analysis.h<br>
--- a/source/encoder/analysis.h Mon Aug 25 17:53:12 2014 +0900<br>
+++ b/source/encoder/analysis.h Mon Aug 25 16:41:31 2014 -0500<br>
@@ -82,6 +82,7 @@<br>
TComDataCU* m_bestMergeCU[NUM_CU_DEPTH];<br>
TComDataCU* m_bestCU[NUM_CU_DEPTH]; // Best CUs at each depth<br>
TComDataCU* m_tempCU[NUM_CU_DEPTH]; // Temporary CUs at each depth<br>
+ TComDataCU* m_tempLosslessCU[NUM_CU_DEPTH]; // Temporary CUs for lossless at each depth<br>
<br>
TComYuv** m_bestPredYuv; // Best Prediction Yuv for each depth<br>
ShortYuv** m_bestResiYuv; // Best Residual Yuv for each depth<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div><br></div>