[x265] [PATCH 1 of 3] analysis: fix inter hash mistake with --cu-lossless
Deepthi Nandakumar
deepthi at multicorewareinc.com
Tue Aug 26 09:29:05 CEST 2014
Thanks, Min. This is a solution but will affect performance with an extra
TComDataCU*. I have sent another patch where I'm just re-encoding the CU if
lossless is chosen as the best mode. This will not affect normal analysis.
Can you review that?
Deepthi
On Tue, Aug 26, 2014 at 3:47 AM, Steve Borho <steve at borho.org> wrote:
> # HG changeset patch
> # User Min Chen <chenm003 at 163.com>
> # Date 1409002891 18000
> # Mon Aug 25 16:41:31 2014 -0500
> # Node ID 0bf2756898bc78e5660a6b607b2f3cda97834264
> # Parent 5acfb12ec5d17cc700e313fc99248e2408e5967b
> analysis: fix inter hash mistake with --cu-lossless
>
> diff -r 5acfb12ec5d1 -r 0bf2756898bc source/Lib/TLibEncoder/TEncSearch.cpp
> --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 17:53:12 2014
> +0900
> +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 16:41:31 2014
> -0500
> @@ -2293,7 +2293,7 @@
> * \returns void
> */
> void TEncSearch::encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv*
> fencYuv, TComYuv* predYuv, ShortYuv* outResiYuv,
> - ShortYuv* outBestResiYuv,
> TComYuv* outReconYuv)
> + ShortYuv* outBestResiYuv,
> TComYuv* outReconYuv, TComDataCU* tmpCu)
> {
> X265_CHECK(!cu->isIntra(0), "intra CU not expected\n");
>
> @@ -2321,6 +2321,7 @@
> }
>
> uint64_t bestCost = MAX_INT64;
> + bool bestTransquantBypassFlag = bIsTQBypassEnable;
>
> for (uint32_t modeId = 0; modeId < numModes; modeId++)
> {
> @@ -2388,15 +2389,29 @@
> if (cu->getQtRootCbf(0))
> xSetResidualQTData(cu, 0, outBestResiYuv, depth, true);
>
> + bestTransquantBypassFlag = bIsLosslessMode;
> bestBits = bits;
> bestCost = cost;
> bestCoeffBits = cu->m_coeffBits;
> m_entropyCoder->store(m_rdEntropyCoders[depth][CI_TEMP_BEST]);
> }
> +
> + // Save lossless mode coeff
> + if (bIsLosslessMode)
> + {
> + tmpCu->copyPartFrom(cu, 0, depth, false);
> + }
> }
>
> X265_CHECK(bestCost != MAX_INT64, "no best cost\n");
>
> + if (bestTransquantBypassFlag && !m_param->bLossless)
> + {
> + assert(log2CUSize > 2);
> + cu->setCUTransquantBypassSubParts(true, 0, depth);
> + cu->copyPartFrom(tmpCu, 0, depth, false);
> + }
> +
> if (cu->getQtRootCbf(0))
> outReconYuv->addClip(predYuv, outBestResiYuv, log2CUSize);
> else
> diff -r 5acfb12ec5d1 -r 0bf2756898bc source/Lib/TLibEncoder/TEncSearch.h
> --- a/source/Lib/TLibEncoder/TEncSearch.h Mon Aug 25 17:53:12 2014
> +0900
> +++ b/source/Lib/TLibEncoder/TEncSearch.h Mon Aug 25 16:41:31 2014
> -0500
> @@ -147,7 +147,7 @@
>
> /// encode residual and compute rd-cost for inter mode
> void encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv, ShortYuv* resiYuv, ShortYuv* bestResiYuv,
> - TComYuv* reconYuv);
> + TComYuv* reconYuv, TComDataCU* tmpCu);
> void encodeResAndCalcRdSkipCU(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv, TComYuv* reconYuv);
>
> void xRecurIntraCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t
> absPartIdx, TComYuv* fencYuv,
> diff -r 5acfb12ec5d1 -r 0bf2756898bc source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp Mon Aug 25 17:53:12 2014 +0900
> +++ b/source/encoder/analysis.cpp Mon Aug 25 16:41:31 2014 -0500
> @@ -82,7 +82,7 @@
> uint32_t sizeL = cuSize * cuSize;
> uint32_t sizeC = sizeL >> (CHROMA_H_SHIFT(csp) +
> CHROMA_V_SHIFT(csp));
>
> - ok &= m_memPool[i].initialize(numPartitions, sizeL, sizeC, 8,
> tqBypass);
> + ok &= m_memPool[i].initialize(numPartitions, sizeL, sizeC, 9,
> tqBypass);
>
> m_interCU_2Nx2N[i] = new TComDataCU;
> m_interCU_2Nx2N[i]->create(&m_memPool[i], numPartitions, cuSize,
> csp, 0, tqBypass);
> @@ -108,6 +108,9 @@
> m_tempCU[i] = new TComDataCU;
> m_tempCU[i]->create(&m_memPool[i], numPartitions, cuSize, csp, 7,
> tqBypass);
>
> + m_tempLosslessCU[i] = new TComDataCU;
> + m_tempLosslessCU[i]->create(&m_memPool[i], numPartitions, cuSize,
> csp, 8, tqBypass);
> +
> m_bestPredYuv[i] = new TComYuv;
> ok &= m_bestPredYuv[i]->create(cuSize, cuSize, csp);
>
> @@ -158,6 +161,7 @@
> delete m_bestMergeCU[i];
> delete m_bestCU[i];
> delete m_tempCU[i];
> + delete m_tempLosslessCU[i];
>
> if (m_bestPredYuv && m_bestPredYuv[i])
> {
> @@ -240,6 +244,7 @@
> // initialize CU data
> m_bestCU[0]->initCU(cu->m_pic, cu->getAddr());
> m_tempCU[0]->initCU(cu->m_pic, cu->getAddr());
> + m_tempLosslessCU[0]->initCU(cu->m_pic, cu->getAddr());
>
> // analysis of CU
> uint32_t numPartition = cu->getTotalNumPart();
> @@ -394,6 +399,7 @@
> uint32_t nextDepth = depth + 1;
> TComDataCU* subBestPartCU = m_bestCU[nextDepth];
> TComDataCU* subTempPartCU = m_tempCU[nextDepth];
> + TComDataCU* subTempLosslessPartCU = m_tempLosslessCU[nextDepth];
> for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++)
> {
> int qp = outTempCU->getQP(0);
> @@ -404,6 +410,7 @@
> (subBestPartCU->getCUPelY() <
> slice->m_sps->picHeightInLumaSamples)))
> {
> subTempPartCU->initSubCU(outTempCU, partUnitIdx,
> nextDepth, qp); // clear sub partition datas or init.
> + subTempLosslessPartCU->initSubCU(outTempCU, partUnitIdx,
> nextDepth, qp); // clear sub partition datas or init.
> if (0 == partUnitIdx) //initialize RD with previous depth
> buffer
> {
>
> m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
> @@ -663,7 +670,7 @@
> }
>
> encodeResAndCalcRdInterCU(outBestCU,
> m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth],
> - m_bestResiYuv[depth],
> m_bestRecoYuv[depth]);
> + m_bestResiYuv[depth],
> m_bestRecoYuv[depth], m_tempLosslessCU[depth]);
> uint64_t bestMergeCost = m_rdCost.m_psyRd ?
> m_bestMergeCU[depth]->m_totalPsyCost : m_bestMergeCU[depth]->m_totalRDCost;
> uint64_t bestCost = m_rdCost.m_psyRd ?
> outBestCU->m_totalPsyCost : outBestCU->m_totalRDCost;
> if (bestMergeCost < bestCost)
> @@ -733,7 +740,7 @@
> }
>
> encodeResAndCalcRdInterCU(outBestCU,
> m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth],
> - m_bestResiYuv[depth],
> m_bestRecoYuv[depth]);
> + m_bestResiYuv[depth],
> m_bestRecoYuv[depth], m_tempLosslessCU[depth]);
>
> m_rdEntropyCoders[depth][CI_TEMP_BEST].store(m_rdEntropyCoders[depth][CI_NEXT_BEST]);
> }
> else if (outBestCU->getPredictionMode(0) ==
> MODE_INTRA)
> @@ -880,10 +887,12 @@
> outTempCU->setQPSubParts(qp, 0, depth);
> uint32_t nextDepth = depth + 1;
> TComDataCU* subTempPartCU = m_tempCU[nextDepth];
> + TComDataCU* subTempLosslessPartCU = m_tempLosslessCU[nextDepth];
> for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++)
> {
> TComDataCU* subBestPartCU = NULL;
> subTempPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth,
> qp); // clear sub partition datas or init.
> + subTempLosslessPartCU->initSubCU(outTempCU, partUnitIdx,
> nextDepth, qp);
>
> if (bInsidePicture ||
> ((subTempPartCU->getCUPelX() <
> slice->m_sps->picWidthInLumaSamples) &&
> @@ -1258,10 +1267,12 @@
> uint32_t nextDepth = depth + 1;
> TComDataCU* subBestPartCU = m_bestCU[nextDepth];
> TComDataCU* subTempPartCU = m_tempCU[nextDepth];
> + TComDataCU* subTempLosslessPartCU = m_tempLosslessCU[nextDepth];
> for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++)
> {
> int qp = outTempCU->getQP(0);
> subBestPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth,
> qp); // clear sub partition datas or init.
> + subTempLosslessPartCU->initSubCU(outTempCU, partUnitIdx,
> nextDepth, qp);
>
> if (bInsidePicture ||
> ((subBestPartCU->getCUPelX() <
> slice->m_sps->picWidthInLumaSamples) &&
> @@ -1433,7 +1444,7 @@
> }
>
> //Encode with residue
> - encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth],
> bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth],
> m_tmpRecoYuv[depth]);
> + encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth],
> bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth],
> m_tmpRecoYuv[depth], m_tempLosslessCU[depth]);
>
> uint64_t tempCost = m_rdCost.m_psyRd ?
> outTempCU->m_totalPsyCost : outTempCU->m_totalRDCost;
> uint64_t bestCost = m_rdCost.m_psyRd ?
> outBestCU->m_totalPsyCost : outBestCU->m_totalRDCost;
> @@ -1506,7 +1517,8 @@
> m_tmpPredYuv[depth],
> m_tmpResiYuv[depth],
> m_bestResiYuv[depth],
> - m_tmpRecoYuv[depth]);
> + m_tmpRecoYuv[depth],
> +
> m_tempLosslessCU[depth]);
>
>
> /* Todo: Fix the satd cost estimates. Why is merge
> being chosen in high motion areas: estimated distortion is too low? */
> @@ -1590,7 +1602,7 @@
>
> if (predInterSearch(outTempCU, m_tmpPredYuv[depth], bUseMRG, true))
> {
> - encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth],
> m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_bestResiYuv[depth],
> m_tmpRecoYuv[depth]);
> + encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth],
> m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_bestResiYuv[depth],
> m_tmpRecoYuv[depth], m_tempLosslessCU[depth]);
> checkDQP(outTempCU);
> checkBestMode(outBestCU, outTempCU, depth);
> }
> diff -r 5acfb12ec5d1 -r 0bf2756898bc source/encoder/analysis.h
> --- a/source/encoder/analysis.h Mon Aug 25 17:53:12 2014 +0900
> +++ b/source/encoder/analysis.h Mon Aug 25 16:41:31 2014 -0500
> @@ -82,6 +82,7 @@
> TComDataCU* m_bestMergeCU[NUM_CU_DEPTH];
> TComDataCU* m_bestCU[NUM_CU_DEPTH]; // Best CUs at each depth
> TComDataCU* m_tempCU[NUM_CU_DEPTH]; // Temporary CUs at each depth
> + TComDataCU* m_tempLosslessCU[NUM_CU_DEPTH]; // Temporary CUs for
> lossless at each depth
>
> TComYuv** m_bestPredYuv; // Best Prediction Yuv for each
> depth
> ShortYuv** m_bestResiYuv; // Best Residual Yuv for each
> depth
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140826/d936fba3/attachment-0001.html>
More information about the x265-devel
mailing list