[x265] [PATCH 1 of 3] analysis: fix inter hash mistake with --cu-lossless

Tue Aug 26 00:17:33 CEST 2014

# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1409002891 18000
#      Mon Aug 25 16:41:31 2014 -0500
# Node ID 0bf2756898bc78e5660a6b607b2f3cda97834264
# Parent  5acfb12ec5d17cc700e313fc99248e2408e5967b
analysis: fix inter hash mistake with --cu-lossless

diff -r 5acfb12ec5d1 -r 0bf2756898bc source/Lib/TLibEncoder/TEncSearch.cpp

--- a/source/Lib/TLibEncoder/TEncSearch.cpp	Mon Aug 25 17:53:12 2014 +0900
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp	Mon Aug 25 16:41:31 2014 -0500
@@ -2293,7 +2293,7 @@
  * \returns void
  */
 void TEncSearch::encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* outResiYuv,
-                                           ShortYuv* outBestResiYuv, TComYuv* outReconYuv)
+                                           ShortYuv* outBestResiYuv, TComYuv* outReconYuv, TComDataCU* tmpCu)
 {
     X265_CHECK(!cu->isIntra(0), "intra CU not expected\n");
 
@@ -2321,6 +2321,7 @@
     }
 
     uint64_t bestCost = MAX_INT64;
+    bool bestTransquantBypassFlag = bIsTQBypassEnable;
 
     for (uint32_t modeId = 0; modeId < numModes; modeId++)
     {
@@ -2388,15 +2389,29 @@
             if (cu->getQtRootCbf(0))
                 xSetResidualQTData(cu, 0, outBestResiYuv, depth, true);
 
+            bestTransquantBypassFlag = bIsLosslessMode;
             bestBits = bits;
             bestCost = cost;
             bestCoeffBits = cu->m_coeffBits;
             m_entropyCoder->store(m_rdEntropyCoders[depth][CI_TEMP_BEST]);
         }
+
+        // Save lossless mode coeff
+        if (bIsLosslessMode)
+        {
+            tmpCu->copyPartFrom(cu, 0, depth, false);
+        }
     }
 
     X265_CHECK(bestCost != MAX_INT64, "no best cost\n");
 
+    if (bestTransquantBypassFlag && !m_param->bLossless)
+    {
+        assert(log2CUSize > 2);
+        cu->setCUTransquantBypassSubParts(true, 0, depth);
+        cu->copyPartFrom(tmpCu, 0, depth, false);
+    }
+
     if (cu->getQtRootCbf(0))
         outReconYuv->addClip(predYuv, outBestResiYuv, log2CUSize);
     else
diff -r 5acfb12ec5d1 -r 0bf2756898bc source/Lib/TLibEncoder/TEncSearch.h
--- a/source/Lib/TLibEncoder/TEncSearch.h	Mon Aug 25 17:53:12 2014 +0900
+++ b/source/Lib/TLibEncoder/TEncSearch.h	Mon Aug 25 16:41:31 2014 -0500
@@ -147,7 +147,7 @@
 
     /// encode residual and compute rd-cost for inter mode
     void encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, ShortYuv* bestResiYuv,
-                                   TComYuv* reconYuv);
+                                   TComYuv* reconYuv, TComDataCU* tmpCu);
     void encodeResAndCalcRdSkipCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, TComYuv* reconYuv);
 
     void xRecurIntraCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* fencYuv,
diff -r 5acfb12ec5d1 -r 0bf2756898bc source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Mon Aug 25 17:53:12 2014 +0900
+++ b/source/encoder/analysis.cpp	Mon Aug 25 16:41:31 2014 -0500
@@ -82,7 +82,7 @@
         uint32_t sizeL = cuSize * cuSize;
         uint32_t sizeC = sizeL >> (CHROMA_H_SHIFT(csp) + CHROMA_V_SHIFT(csp));
 
-        ok &= m_memPool[i].initialize(numPartitions, sizeL, sizeC, 8, tqBypass);
+        ok &= m_memPool[i].initialize(numPartitions, sizeL, sizeC, 9, tqBypass);
 
         m_interCU_2Nx2N[i]  = new TComDataCU;
         m_interCU_2Nx2N[i]->create(&m_memPool[i], numPartitions, cuSize, csp, 0, tqBypass);
@@ -108,6 +108,9 @@
         m_tempCU[i]         = new TComDataCU;
         m_tempCU[i]->create(&m_memPool[i], numPartitions, cuSize, csp, 7, tqBypass);
 
+        m_tempLosslessCU[i] = new TComDataCU;
+        m_tempLosslessCU[i]->create(&m_memPool[i], numPartitions, cuSize, csp, 8, tqBypass);
+
         m_bestPredYuv[i] = new TComYuv;
         ok &= m_bestPredYuv[i]->create(cuSize, cuSize, csp);
 
@@ -158,6 +161,7 @@
         delete m_bestMergeCU[i];
         delete m_bestCU[i];
         delete m_tempCU[i];
+        delete m_tempLosslessCU[i];
 
         if (m_bestPredYuv && m_bestPredYuv[i])
         {
@@ -240,6 +244,7 @@
     // initialize CU data
     m_bestCU[0]->initCU(cu->m_pic, cu->getAddr());
     m_tempCU[0]->initCU(cu->m_pic, cu->getAddr());
+    m_tempLosslessCU[0]->initCU(cu->m_pic, cu->getAddr());
 
     // analysis of CU
     uint32_t numPartition = cu->getTotalNumPart();
@@ -394,6 +399,7 @@
         uint32_t    nextDepth     = depth + 1;
         TComDataCU* subBestPartCU = m_bestCU[nextDepth];
         TComDataCU* subTempPartCU = m_tempCU[nextDepth];
+        TComDataCU* subTempLosslessPartCU = m_tempLosslessCU[nextDepth];
         for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++)
         {
             int qp = outTempCU->getQP(0);
@@ -404,6 +410,7 @@
                  (subBestPartCU->getCUPelY() < slice->m_sps->picHeightInLumaSamples)))
             {
                 subTempPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init.
+                subTempLosslessPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init.
                 if (0 == partUnitIdx) //initialize RD with previous depth buffer
                 {
                     m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
@@ -663,7 +670,7 @@
                     }
 
                     encodeResAndCalcRdInterCU(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth],
-                                              m_bestResiYuv[depth], m_bestRecoYuv[depth]);
+                                              m_bestResiYuv[depth], m_bestRecoYuv[depth], m_tempLosslessCU[depth]);
                     uint64_t bestMergeCost = m_rdCost.m_psyRd ? m_bestMergeCU[depth]->m_totalPsyCost : m_bestMergeCU[depth]->m_totalRDCost;
                     uint64_t bestCost = m_rdCost.m_psyRd ? outBestCU->m_totalPsyCost : outBestCU->m_totalRDCost;
                     if (bestMergeCost < bestCost)
@@ -733,7 +740,7 @@
                         }
 
                         encodeResAndCalcRdInterCU(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth],
-                                                  m_bestResiYuv[depth], m_bestRecoYuv[depth]);
+                                                  m_bestResiYuv[depth], m_bestRecoYuv[depth], m_tempLosslessCU[depth]);
                         m_rdEntropyCoders[depth][CI_TEMP_BEST].store(m_rdEntropyCoders[depth][CI_NEXT_BEST]);
                     }
                     else if (outBestCU->getPredictionMode(0) == MODE_INTRA)
@@ -880,10 +887,12 @@
         outTempCU->setQPSubParts(qp, 0, depth);
         uint32_t    nextDepth = depth + 1;
         TComDataCU* subTempPartCU = m_tempCU[nextDepth];
+        TComDataCU* subTempLosslessPartCU = m_tempLosslessCU[nextDepth];
         for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++)
         {
             TComDataCU* subBestPartCU = NULL;
             subTempPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init.
+            subTempLosslessPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp);
 
             if (bInsidePicture ||
                 ((subTempPartCU->getCUPelX() < slice->m_sps->picWidthInLumaSamples) &&
@@ -1258,10 +1267,12 @@
         uint32_t    nextDepth     = depth + 1;
         TComDataCU* subBestPartCU = m_bestCU[nextDepth];
         TComDataCU* subTempPartCU = m_tempCU[nextDepth];
+        TComDataCU* subTempLosslessPartCU = m_tempLosslessCU[nextDepth];
         for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++)
         {
             int qp = outTempCU->getQP(0);
             subBestPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init.
+            subTempLosslessPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp);
 
             if (bInsidePicture ||
                 ((subBestPartCU->getCUPelX() < slice->m_sps->picWidthInLumaSamples) &&
@@ -1433,7 +1444,7 @@
             }
 
             //Encode with residue
-            encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth], bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth]);
+            encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth], bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], m_tempLosslessCU[depth]);
 
             uint64_t tempCost = m_rdCost.m_psyRd ? outTempCU->m_totalPsyCost : outTempCU->m_totalRDCost;
             uint64_t bestCost = m_rdCost.m_psyRd ? outBestCU->m_totalPsyCost : outBestCU->m_totalRDCost;
@@ -1506,7 +1517,8 @@
                                                   m_tmpPredYuv[depth],
                                                   m_tmpResiYuv[depth],
                                                   m_bestResiYuv[depth],
-                                                  m_tmpRecoYuv[depth]);
+                                                  m_tmpRecoYuv[depth],
+                                                  m_tempLosslessCU[depth]);
 
 
                     /* Todo: Fix the satd cost estimates. Why is merge being chosen in high motion areas: estimated distortion is too low? */
@@ -1590,7 +1602,7 @@
 
     if (predInterSearch(outTempCU, m_tmpPredYuv[depth], bUseMRG, true))
     {
-        encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth]);
+        encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], m_tempLosslessCU[depth]);
         checkDQP(outTempCU);
         checkBestMode(outBestCU, outTempCU, depth);
     }
diff -r 5acfb12ec5d1 -r 0bf2756898bc source/encoder/analysis.h
--- a/source/encoder/analysis.h	Mon Aug 25 17:53:12 2014 +0900
+++ b/source/encoder/analysis.h	Mon Aug 25 16:41:31 2014 -0500
@@ -82,6 +82,7 @@
     TComDataCU*  m_bestMergeCU[NUM_CU_DEPTH];
     TComDataCU*  m_bestCU[NUM_CU_DEPTH]; // Best CUs at each depth
     TComDataCU*  m_tempCU[NUM_CU_DEPTH]; // Temporary CUs at each depth
+    TComDataCU*  m_tempLosslessCU[NUM_CU_DEPTH]; // Temporary CUs for lossless at each depth
 
     TComYuv**    m_bestPredYuv;          // Best Prediction Yuv for each depth
     ShortYuv**   m_bestResiYuv;          // Best Residual Yuv for each depth