[x265] [PATCH 1 of 3] analysis: fix inter hash mistake with --cu-lossless

Tue Aug 26 09:29:05 CEST 2014

Thanks, Min. This is a solution but will affect performance with an extra
TComDataCU*. I have sent another patch where I'm just re-encoding the CU if
lossless is chosen as the best mode. This will not affect normal analysis.
Can you review that?

Deepthi

On Tue, Aug 26, 2014 at 3:47 AM, Steve Borho <steve at borho.org> wrote:

> # HG changeset patch
> # User Min Chen <chenm003 at 163.com>
> # Date 1409002891 18000
> #      Mon Aug 25 16:41:31 2014 -0500
> # Node ID 0bf2756898bc78e5660a6b607b2f3cda97834264
> # Parent  5acfb12ec5d17cc700e313fc99248e2408e5967b
> analysis: fix inter hash mistake with --cu-lossless
>
> diff -r 5acfb12ec5d1 -r 0bf2756898bc source/Lib/TLibEncoder/TEncSearch.cpp
> --- a/source/Lib/TLibEncoder/TEncSearch.cpp     Mon Aug 25 17:53:12 2014
> +0900
> +++ b/source/Lib/TLibEncoder/TEncSearch.cpp     Mon Aug 25 16:41:31 2014
> -0500
> @@ -2293,7 +2293,7 @@
>   * \returns void
>   */
>  void TEncSearch::encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv*
> fencYuv, TComYuv* predYuv, ShortYuv* outResiYuv,
> -                                           ShortYuv* outBestResiYuv,
> TComYuv* outReconYuv)
> +                                           ShortYuv* outBestResiYuv,
> TComYuv* outReconYuv, TComDataCU* tmpCu)
>  {
>      X265_CHECK(!cu->isIntra(0), "intra CU not expected\n");
>
> @@ -2321,6 +2321,7 @@
>      }
>
>      uint64_t bestCost = MAX_INT64;
> +    bool bestTransquantBypassFlag = bIsTQBypassEnable;
>
>      for (uint32_t modeId = 0; modeId < numModes; modeId++)
>      {
> @@ -2388,15 +2389,29 @@
>              if (cu->getQtRootCbf(0))
>                  xSetResidualQTData(cu, 0, outBestResiYuv, depth, true);
>
> +            bestTransquantBypassFlag = bIsLosslessMode;
>              bestBits = bits;
>              bestCost = cost;
>              bestCoeffBits = cu->m_coeffBits;
>              m_entropyCoder->store(m_rdEntropyCoders[depth][CI_TEMP_BEST]);
>          }
> +
> +        // Save lossless mode coeff
> +        if (bIsLosslessMode)
> +        {
> +            tmpCu->copyPartFrom(cu, 0, depth, false);
> +        }
>      }
>
>      X265_CHECK(bestCost != MAX_INT64, "no best cost\n");
>
> +    if (bestTransquantBypassFlag && !m_param->bLossless)
> +    {
> +        assert(log2CUSize > 2);
> +        cu->setCUTransquantBypassSubParts(true, 0, depth);
> +        cu->copyPartFrom(tmpCu, 0, depth, false);
> +    }
> +
>      if (cu->getQtRootCbf(0))
>          outReconYuv->addClip(predYuv, outBestResiYuv, log2CUSize);
>      else
> diff -r 5acfb12ec5d1 -r 0bf2756898bc source/Lib/TLibEncoder/TEncSearch.h
> --- a/source/Lib/TLibEncoder/TEncSearch.h       Mon Aug 25 17:53:12 2014
> +0900
> +++ b/source/Lib/TLibEncoder/TEncSearch.h       Mon Aug 25 16:41:31 2014
> -0500
> @@ -147,7 +147,7 @@
>
>      /// encode residual and compute rd-cost for inter mode
>      void encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv, ShortYuv* resiYuv, ShortYuv* bestResiYuv,
> -                                   TComYuv* reconYuv);
> +                                   TComYuv* reconYuv, TComDataCU* tmpCu);
>      void encodeResAndCalcRdSkipCU(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv, TComYuv* reconYuv);
>
>      void xRecurIntraCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t
> absPartIdx, TComYuv* fencYuv,
> diff -r 5acfb12ec5d1 -r 0bf2756898bc source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp       Mon Aug 25 17:53:12 2014 +0900
> +++ b/source/encoder/analysis.cpp       Mon Aug 25 16:41:31 2014 -0500
> @@ -82,7 +82,7 @@
>          uint32_t sizeL = cuSize * cuSize;
>          uint32_t sizeC = sizeL >> (CHROMA_H_SHIFT(csp) +
> CHROMA_V_SHIFT(csp));
>
> -        ok &= m_memPool[i].initialize(numPartitions, sizeL, sizeC, 8,
> tqBypass);
> +        ok &= m_memPool[i].initialize(numPartitions, sizeL, sizeC, 9,
> tqBypass);
>
>          m_interCU_2Nx2N[i]  = new TComDataCU;
>          m_interCU_2Nx2N[i]->create(&m_memPool[i], numPartitions, cuSize,
> csp, 0, tqBypass);
> @@ -108,6 +108,9 @@
>          m_tempCU[i]         = new TComDataCU;
>          m_tempCU[i]->create(&m_memPool[i], numPartitions, cuSize, csp, 7,
> tqBypass);
>
> +        m_tempLosslessCU[i] = new TComDataCU;
> +        m_tempLosslessCU[i]->create(&m_memPool[i], numPartitions, cuSize,
> csp, 8, tqBypass);
> +
>          m_bestPredYuv[i] = new TComYuv;
>          ok &= m_bestPredYuv[i]->create(cuSize, cuSize, csp);
>
> @@ -158,6 +161,7 @@
>          delete m_bestMergeCU[i];
>          delete m_bestCU[i];
>          delete m_tempCU[i];
> +        delete m_tempLosslessCU[i];
>
>          if (m_bestPredYuv && m_bestPredYuv[i])
>          {
> @@ -240,6 +244,7 @@
>      // initialize CU data
>      m_bestCU[0]->initCU(cu->m_pic, cu->getAddr());
>      m_tempCU[0]->initCU(cu->m_pic, cu->getAddr());
> +    m_tempLosslessCU[0]->initCU(cu->m_pic, cu->getAddr());
>
>      // analysis of CU
>      uint32_t numPartition = cu->getTotalNumPart();
> @@ -394,6 +399,7 @@
>          uint32_t    nextDepth     = depth + 1;
>          TComDataCU* subBestPartCU = m_bestCU[nextDepth];
>          TComDataCU* subTempPartCU = m_tempCU[nextDepth];
> +        TComDataCU* subTempLosslessPartCU = m_tempLosslessCU[nextDepth];
>          for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++)
>          {
>              int qp = outTempCU->getQP(0);
> @@ -404,6 +410,7 @@
>                   (subBestPartCU->getCUPelY() <
> slice->m_sps->picHeightInLumaSamples)))
>              {
>                  subTempPartCU->initSubCU(outTempCU, partUnitIdx,
> nextDepth, qp); // clear sub partition datas or init.
> +                subTempLosslessPartCU->initSubCU(outTempCU, partUnitIdx,
> nextDepth, qp); // clear sub partition datas or init.
>                  if (0 == partUnitIdx) //initialize RD with previous depth
> buffer
>                  {
>
>  m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
> @@ -663,7 +670,7 @@
>                      }
>
>                      encodeResAndCalcRdInterCU(outBestCU,
> m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth],
> -                                              m_bestResiYuv[depth],
> m_bestRecoYuv[depth]);
> +                                              m_bestResiYuv[depth],
> m_bestRecoYuv[depth], m_tempLosslessCU[depth]);
>                      uint64_t bestMergeCost = m_rdCost.m_psyRd ?
> m_bestMergeCU[depth]->m_totalPsyCost : m_bestMergeCU[depth]->m_totalRDCost;
>                      uint64_t bestCost = m_rdCost.m_psyRd ?
> outBestCU->m_totalPsyCost : outBestCU->m_totalRDCost;
>                      if (bestMergeCost < bestCost)
> @@ -733,7 +740,7 @@
>                          }
>
>                          encodeResAndCalcRdInterCU(outBestCU,
> m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth],
> -                                                  m_bestResiYuv[depth],
> m_bestRecoYuv[depth]);
> +                                                  m_bestResiYuv[depth],
> m_bestRecoYuv[depth], m_tempLosslessCU[depth]);
>
>  m_rdEntropyCoders[depth][CI_TEMP_BEST].store(m_rdEntropyCoders[depth][CI_NEXT_BEST]);
>                      }
>                      else if (outBestCU->getPredictionMode(0) ==
> MODE_INTRA)
> @@ -880,10 +887,12 @@
>          outTempCU->setQPSubParts(qp, 0, depth);
>          uint32_t    nextDepth = depth + 1;
>          TComDataCU* subTempPartCU = m_tempCU[nextDepth];
> +        TComDataCU* subTempLosslessPartCU = m_tempLosslessCU[nextDepth];
>          for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++)
>          {
>              TComDataCU* subBestPartCU = NULL;
>              subTempPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth,
> qp); // clear sub partition datas or init.
> +            subTempLosslessPartCU->initSubCU(outTempCU, partUnitIdx,
> nextDepth, qp);
>
>              if (bInsidePicture ||
>                  ((subTempPartCU->getCUPelX() <
> slice->m_sps->picWidthInLumaSamples) &&
> @@ -1258,10 +1267,12 @@
>          uint32_t    nextDepth     = depth + 1;
>          TComDataCU* subBestPartCU = m_bestCU[nextDepth];
>          TComDataCU* subTempPartCU = m_tempCU[nextDepth];
> +        TComDataCU* subTempLosslessPartCU = m_tempLosslessCU[nextDepth];
>          for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++)
>          {
>              int qp = outTempCU->getQP(0);
>              subBestPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth,
> qp); // clear sub partition datas or init.
> +            subTempLosslessPartCU->initSubCU(outTempCU, partUnitIdx,
> nextDepth, qp);
>
>              if (bInsidePicture ||
>                  ((subBestPartCU->getCUPelX() <
> slice->m_sps->picWidthInLumaSamples) &&
> @@ -1433,7 +1444,7 @@
>              }
>
>              //Encode with residue
> -            encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth],
> bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth],
> m_tmpRecoYuv[depth]);
> +            encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth],
> bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth],
> m_tmpRecoYuv[depth], m_tempLosslessCU[depth]);
>
>              uint64_t tempCost = m_rdCost.m_psyRd ?
> outTempCU->m_totalPsyCost : outTempCU->m_totalRDCost;
>              uint64_t bestCost = m_rdCost.m_psyRd ?
> outBestCU->m_totalPsyCost : outBestCU->m_totalRDCost;
> @@ -1506,7 +1517,8 @@
>                                                    m_tmpPredYuv[depth],
>                                                    m_tmpResiYuv[depth],
>                                                    m_bestResiYuv[depth],
> -                                                  m_tmpRecoYuv[depth]);
> +                                                  m_tmpRecoYuv[depth],
> +
> m_tempLosslessCU[depth]);
>
>
>                      /* Todo: Fix the satd cost estimates. Why is merge
> being chosen in high motion areas: estimated distortion is too low? */
> @@ -1590,7 +1602,7 @@
>
>      if (predInterSearch(outTempCU, m_tmpPredYuv[depth], bUseMRG, true))
>      {
> -        encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth],
> m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_bestResiYuv[depth],
> m_tmpRecoYuv[depth]);
> +        encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth],
> m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_bestResiYuv[depth],
> m_tmpRecoYuv[depth], m_tempLosslessCU[depth]);
>          checkDQP(outTempCU);
>          checkBestMode(outBestCU, outTempCU, depth);
>      }
> diff -r 5acfb12ec5d1 -r 0bf2756898bc source/encoder/analysis.h
> --- a/source/encoder/analysis.h Mon Aug 25 17:53:12 2014 +0900
> +++ b/source/encoder/analysis.h Mon Aug 25 16:41:31 2014 -0500
> @@ -82,6 +82,7 @@
>      TComDataCU*  m_bestMergeCU[NUM_CU_DEPTH];
>      TComDataCU*  m_bestCU[NUM_CU_DEPTH]; // Best CUs at each depth
>      TComDataCU*  m_tempCU[NUM_CU_DEPTH]; // Temporary CUs at each depth
> +    TComDataCU*  m_tempLosslessCU[NUM_CU_DEPTH]; // Temporary CUs for
> lossless at each depth
>
>      TComYuv**    m_bestPredYuv;          // Best Prediction Yuv for each
> depth
>      ShortYuv**   m_bestResiYuv;          // Best Residual Yuv for each
> depth
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140826/d936fba3/attachment-0001.html>