[x265] search: separate bSkipRes == true path

Satoshi Nakagawa nakagawa424 at oki.com
Mon Jul 28 11:52:34 CEST 2014


# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1406540991 -32400
#      Mon Jul 28 18:49:51 2014 +0900
# Node ID a4beebdb70524da737d4d5d11e6b55961b9ef988
# Parent  8bab5275baed85f8a6e183d7edfeba9a516a3669
search: separate bSkipRes == true path

diff -r 8bab5275baed -r a4beebdb7052 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp	Mon Jul 28 00:14:55 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp	Mon Jul 28 18:49:51 2014 +0900
@@ -2268,6 +2268,57 @@
     mvmax.y = X265_MIN(mvmax.y, m_refLagPixels);
 }
 
+void TEncSearch::encodeResAndCalcRdSkipCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, TComYuv* outReconYuv)
+{
+    X265_CHECK(!cu->isIntra(0), "intra CU not expected\n");
+
+    uint32_t log2CUSize = cu->getLog2CUSize(0);
+    uint32_t cuSize = 1 << log2CUSize;
+    uint8_t  depth  = cu->getDepth(0);
+
+    int hChromaShift = CHROMA_H_SHIFT(m_csp);
+    int vChromaShift = CHROMA_V_SHIFT(m_csp);
+
+    // No residual coding : SKIP mode
+
+    cu->setSkipFlagSubParts(true, 0, depth);
+    cu->setTrIdxSubParts(0, 0, depth);
+    cu->clearCbf(0, depth);
+
+    outReconYuv->copyFromYuv(predYuv);
+    // Luma
+    int part = partitionFromLog2Size(log2CUSize);
+    uint32_t distortion = primitives.sse_pp[part](fencYuv->getLumaAddr(), fencYuv->getStride(), outReconYuv->getLumaAddr(), outReconYuv->getStride());
+    // Chroma
+    part = partitionFromSizes(cuSize >> hChromaShift, cuSize >> vChromaShift);
+    distortion += m_rdCost.scaleChromaDistCb(primitives.sse_pp[part](fencYuv->getCbAddr(), fencYuv->getCStride(), outReconYuv->getCbAddr(), outReconYuv->getCStride()));
+    distortion += m_rdCost.scaleChromaDistCr(primitives.sse_pp[part](fencYuv->getCrAddr(), fencYuv->getCStride(), outReconYuv->getCrAddr(), outReconYuv->getCStride()));
+
+    m_entropyCoder->load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
+    m_entropyCoder->resetBits();
+    if (cu->m_slice->m_pps->bTransquantBypassEnabled)
+        m_entropyCoder->codeCUTransquantBypassFlag(cu, 0);
+    m_entropyCoder->codeSkipFlag(cu, 0);
+    m_entropyCoder->codeMergeIndex(cu, 0);
+
+    uint32_t bits = m_entropyCoder->getNumberOfWrittenBits();
+    cu->m_mvBits = bits;
+    cu->m_coeffBits = 0;
+    cu->m_totalBits       = bits;
+    cu->m_totalDistortion = distortion;
+    if (m_rdCost.psyRdEnabled())
+    {
+        int size = log2CUSize - 2;
+        cu->m_psyEnergy = m_rdCost.psyCost(size, fencYuv->getLumaAddr(), fencYuv->getStride(),
+                                           outReconYuv->getLumaAddr(), outReconYuv->getStride());
+        cu->m_totalPsyCost = m_rdCost.calcPsyRdCost(cu->m_totalDistortion, cu->m_totalBits, cu->m_psyEnergy);
+    }
+    else
+        cu->m_totalRDCost = m_rdCost.calcRdCost(cu->m_totalDistortion, cu->m_totalBits);
+
+    m_entropyCoder->store(m_rdEntropyCoders[depth][CI_TEMP_BEST]);
+}
+
 /** encode residual and calculate rate-distortion for a CU block
  * \param cu
  * \param fencYuv
@@ -2275,17 +2326,14 @@
  * \param outResiYuv
  * \param outBestResiYuv
  * \param outReconYuv
- * \param bSkipRes
  * \returns void
  */
 void TEncSearch::encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* outResiYuv,
-                                           ShortYuv* outBestResiYuv, TComYuv* outReconYuv, bool bSkipRes, bool curUseRDOQ)
+                                           ShortYuv* outBestResiYuv, TComYuv* outReconYuv, bool curUseRDOQ)
 {
-    if (cu->isIntra(0))
-        return;
-
-    uint32_t bits = 0, bestBits = 0, bestCoeffBits = 0;
-    uint32_t distortion = 0, bestDist = 0;
+    X265_CHECK(!cu->isIntra(0), "intra CU not expected\n");
+
+    uint32_t bestBits = 0, bestCoeffBits = 0;
 
     uint32_t log2CUSize = cu->getLog2CUSize(0);
     uint32_t cuSize = 1 << log2CUSize;
@@ -2294,77 +2342,33 @@
     int hChromaShift = CHROMA_H_SHIFT(m_csp);
     int vChromaShift = CHROMA_V_SHIFT(m_csp);
 
-    // No residual coding : SKIP mode
-    if (bSkipRes)
+    m_trQuant.setQPforQuant(cu);
+
+    outResiYuv->subtract(fencYuv, predYuv, log2CUSize);
+
+    // Residual coding.
+    bool bIsTQBypassEnable = cu->m_slice->m_pps->bTransquantBypassEnabled;
+    uint32_t tqBypassMode  = 1;
+
+    if (bIsTQBypassEnable)
     {
-        cu->setSkipFlagSubParts(true, 0, depth);
-
-        outReconYuv->copyFromYuv(predYuv);
-        // Luma
-        int part = partitionFromLog2Size(log2CUSize);
-        distortion = primitives.sse_pp[part](fencYuv->getLumaAddr(), fencYuv->getStride(), outReconYuv->getLumaAddr(), outReconYuv->getStride());
-        // Chroma
-        part = partitionFromSizes(cuSize >> hChromaShift, cuSize >> vChromaShift);
-        distortion += m_rdCost.scaleChromaDistCb(primitives.sse_pp[part](fencYuv->getCbAddr(), fencYuv->getCStride(), outReconYuv->getCbAddr(), outReconYuv->getCStride()));
-        distortion += m_rdCost.scaleChromaDistCr(primitives.sse_pp[part](fencYuv->getCrAddr(), fencYuv->getCStride(), outReconYuv->getCrAddr(), outReconYuv->getCStride()));
-
-        m_entropyCoder->load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
-        m_entropyCoder->resetBits();
-        if (cu->m_slice->m_pps->bTransquantBypassEnabled)
-            m_entropyCoder->codeCUTransquantBypassFlag(cu, 0);
-        m_entropyCoder->codeSkipFlag(cu, 0);
-        m_entropyCoder->codeMergeIndex(cu, 0);
-
-        bits = m_entropyCoder->getNumberOfWrittenBits();
-        cu->m_mvBits = bits;
-        cu->m_coeffBits = 0;
-        cu->m_totalBits       = bits;
-        cu->m_totalDistortion = distortion;
-        if (m_rdCost.psyRdEnabled())
-        {
-            int size = log2CUSize - 2;
-            cu->m_psyEnergy = m_rdCost.psyCost(size, fencYuv->getLumaAddr(), fencYuv->getStride(),
-                                               outReconYuv->getLumaAddr(), outReconYuv->getStride());
-            cu->m_totalPsyCost = m_rdCost.calcPsyRdCost(cu->m_totalDistortion, cu->m_totalBits, cu->m_psyEnergy);
-        }
-        else
-            cu->m_totalRDCost = m_rdCost.calcRdCost(cu->m_totalDistortion, cu->m_totalBits);
-
-        m_entropyCoder->store(m_rdEntropyCoders[depth][CI_TEMP_BEST]);
-
-        cu->clearCbf(0, depth);
-        cu->setTrIdxSubParts(0, 0, depth);
-        return;
-    }
-
-    m_trQuant.setQPforQuant(cu);
-
-    outResiYuv->subtract(fencYuv, predYuv, log2CUSize);
-
-    // Residual coding.
-    bool bIsTQBypassEnable = false, bIsLosslessMode = false;
-    uint32_t tqBypassMode  = 1;
-
-    if ((cu->m_slice->m_pps->bTransquantBypassEnabled))
-    {
-        bIsTQBypassEnable = true; // mark that the first iteration is to cost TQB mode.
-        tqBypassMode = 2;
-        if (m_param->bLossless)
-            tqBypassMode = 1;
+        // mark that the first iteration is to cost TQB mode.
+        if (!m_param->bLossless)
+            tqBypassMode = 2;
     }
 
     uint64_t bestCost = MAX_INT64;
 
     for (uint32_t modeId = 0; modeId < tqBypassMode; modeId++)
     {
-        bIsLosslessMode = bIsTQBypassEnable && !modeId;
+        bool bIsLosslessMode = bIsTQBypassEnable && !modeId;
 
         cu->setCUTransquantBypassSubParts(bIsLosslessMode, 0, depth);
 
         uint64_t cost = 0;
         uint32_t zeroDistortion = 0;
-        bits = 0;
-        distortion = 0;
+        uint32_t bits = 0;
+        uint32_t distortion = 0;
 
         m_entropyCoder->load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
         xEstimateResidualQT(cu, 0, fencYuv, predYuv, outResiYuv, depth, cost, bits, distortion, &zeroDistortion, curUseRDOQ);
@@ -2426,48 +2430,42 @@
             bestCoeffBits = cu->m_coeffBits;
             m_entropyCoder->store(m_rdEntropyCoders[depth][CI_TEMP_BEST]);
         }
-
-        X265_CHECK(bestCost != MAX_INT64, "no best cost\n");
-
-        if (cu->getQtRootCbf(0))
-            outReconYuv->addClip(predYuv, outBestResiYuv, log2CUSize);
-        else
-            outReconYuv->copyFromYuv(predYuv);
-
-        // update with clipped distortion and cost (qp estimation loop uses unclipped values)
-        int part = partitionFromLog2Size(log2CUSize);
-        bestDist = primitives.sse_pp[part](fencYuv->getLumaAddr(), fencYuv->getStride(), outReconYuv->getLumaAddr(), outReconYuv->getStride());
-        part = partitionFromSizes(cuSize >> hChromaShift, cuSize >> vChromaShift);
-        bestDist += m_rdCost.scaleChromaDistCb(primitives.sse_pp[part](fencYuv->getCbAddr(), fencYuv->getCStride(), outReconYuv->getCbAddr(), outReconYuv->getCStride()));
-        bestDist += m_rdCost.scaleChromaDistCr(primitives.sse_pp[part](fencYuv->getCrAddr(), fencYuv->getCStride(), outReconYuv->getCrAddr(), outReconYuv->getCStride()));
-        if (m_rdCost.psyRdEnabled())
-        {
-            int size = log2CUSize - 2;
-            cu->m_psyEnergy = m_rdCost.psyCost(size, fencYuv->getLumaAddr(), fencYuv->getStride(),
-                                               outReconYuv->getLumaAddr(), outReconYuv->getStride());
-            cu->m_totalPsyCost = m_rdCost.calcPsyRdCost(bestDist, bestBits, cu->m_psyEnergy);
-        }
-        else
-            cu->m_totalRDCost = m_rdCost.calcRdCost(bestDist, bestBits);
-        cu->m_totalBits       = bestBits;
-        cu->m_totalDistortion = bestDist;
-        cu->m_coeffBits = bestCoeffBits;
-        cu->m_mvBits = bestBits - bestCoeffBits;
-
-        if (cu->isSkipped(0))
-            cu->clearCbf(0, depth);
     }
+
+    X265_CHECK(bestCost != MAX_INT64, "no best cost\n");
+
+    if (cu->getQtRootCbf(0))
+        outReconYuv->addClip(predYuv, outBestResiYuv, log2CUSize);
+    else
+        outReconYuv->copyFromYuv(predYuv);
+
+    // update with clipped distortion and cost (qp estimation loop uses unclipped values)
+    int part = partitionFromLog2Size(log2CUSize);
+    uint32_t bestDist = primitives.sse_pp[part](fencYuv->getLumaAddr(), fencYuv->getStride(), outReconYuv->getLumaAddr(), outReconYuv->getStride());
+    part = partitionFromSizes(cuSize >> hChromaShift, cuSize >> vChromaShift);
+    bestDist += m_rdCost.scaleChromaDistCb(primitives.sse_pp[part](fencYuv->getCbAddr(), fencYuv->getCStride(), outReconYuv->getCbAddr(), outReconYuv->getCStride()));
+    bestDist += m_rdCost.scaleChromaDistCr(primitives.sse_pp[part](fencYuv->getCrAddr(), fencYuv->getCStride(), outReconYuv->getCrAddr(), outReconYuv->getCStride()));
+    if (m_rdCost.psyRdEnabled())
+    {
+        int size = log2CUSize - 2;
+        cu->m_psyEnergy = m_rdCost.psyCost(size, fencYuv->getLumaAddr(), fencYuv->getStride(),
+                                           outReconYuv->getLumaAddr(), outReconYuv->getStride());
+        cu->m_totalPsyCost = m_rdCost.calcPsyRdCost(bestDist, bestBits, cu->m_psyEnergy);
+    }
+    else
+        cu->m_totalRDCost = m_rdCost.calcRdCost(bestDist, bestBits);
+
+    cu->m_totalBits       = bestBits;
+    cu->m_totalDistortion = bestDist;
+    cu->m_coeffBits = bestCoeffBits;
+    cu->m_mvBits = bestBits - bestCoeffBits;
+
+    if (cu->isSkipped(0))
+        cu->clearCbf(0, depth);
 }
 
-void TEncSearch::generateCoeffRecon(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, bool skipRes)
+void TEncSearch::generateCoeffRecon(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv)
 {
-    if (skipRes && cu->getPredictionMode(0) == MODE_INTER && cu->getMergeFlag(0) && cu->getPartitionSize(0) == SIZE_2Nx2N)
-    {
-        reconYuv->copyFromYuv(predYuv);
-        cu->clearCbf(0, cu->getDepth(0));
-        return;
-    }
-
     m_trQuant.setQPforQuant(cu);
 
     if (cu->getPredictionMode(0) == MODE_INTER)
diff -r 8bab5275baed -r a4beebdb7052 source/Lib/TLibEncoder/TEncSearch.h
--- a/source/Lib/TLibEncoder/TEncSearch.h	Mon Jul 28 00:14:55 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.h	Mon Jul 28 18:49:51 2014 +0900
@@ -149,14 +149,15 @@
 
     /// encode residual and compute rd-cost for inter mode
     void encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, ShortYuv* bestResiYuv,
-                                   TComYuv* reconYuv, bool bSkipRes, bool curUseRDOQ);
+                                   TComYuv* reconYuv, bool curUseRDOQ);
+    void encodeResAndCalcRdSkipCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, TComYuv* reconYuv);
 
     void xRecurIntraCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* fencYuv,
                              TComYuv* predYuv, ShortYuv* resiYuv, uint32_t& distY, bool bCheckFirst,
                              uint64_t& dRDCost);
     void xSetIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* reconYuv);
 
-    void generateCoeffRecon(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, bool skipRes);
+    void generateCoeffRecon(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv);
 
     void xEstimateResidualQT(TComDataCU* cu, uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, uint32_t depth,
                              uint64_t &rdCost, uint32_t &outBits, uint32_t &outDist, uint32_t *puiZeroDist, bool curUseRDOQ = true);
diff -r 8bab5275baed -r a4beebdb7052 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Mon Jul 28 00:14:55 2014 -0500
+++ b/source/encoder/analysis.cpp	Mon Jul 28 18:49:51 2014 +0900
@@ -665,7 +665,7 @@
                     }
 
                     encodeResAndCalcRdInterCU(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth],
-                                              m_bestResiYuv[depth], m_bestRecoYuv[depth], false, true);
+                                              m_bestResiYuv[depth], m_bestRecoYuv[depth], true);
                     uint64_t bestMergeCost = m_rdCost.psyRdEnabled() ? m_bestMergeCU[depth]->m_totalPsyCost : m_bestMergeCU[depth]->m_totalRDCost;
                     uint64_t bestCost = m_rdCost.psyRdEnabled() ? outBestCU->m_totalPsyCost : outBestCU->m_totalRDCost;
                     if (bestMergeCost < bestCost)
@@ -738,7 +738,7 @@
                         }
 
                         encodeResAndCalcRdInterCU(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth],
-                                                  m_bestResiYuv[depth], m_bestRecoYuv[depth], false, true);
+                                                  m_bestResiYuv[depth], m_bestRecoYuv[depth], true);
                         m_rdEntropyCoders[depth][CI_TEMP_BEST].store(m_rdEntropyCoders[depth][CI_NEXT_BEST]);
                     }
                     else if (outBestCU->getPredictionMode(0) == MODE_INTRA)
@@ -763,10 +763,10 @@
                         }
 
                         m_tmpResiYuv[depth]->subtract(m_origYuv[depth], m_bestPredYuv[depth], outBestCU->getLog2CUSize(0));
-                        generateCoeffRecon(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth], m_bestRecoYuv[depth], false);
+                        generateCoeffRecon(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth], m_bestRecoYuv[depth]);
                     }
                     else
-                        generateCoeffRecon(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth], m_bestRecoYuv[depth], false);
+                        generateCoeffRecon(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth], m_bestRecoYuv[depth]);
                 }
                 else if (m_param->rdLevel == 0)
                 {
@@ -1419,13 +1419,13 @@
             else
             {
                 //No-residue mode
-                encodeResAndCalcRdInterCU(outBestCU, m_origYuv[depth], bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], true, true);
+                encodeResAndCalcRdSkipCU(outBestCU, m_origYuv[depth], bestPredYuv, m_tmpRecoYuv[depth]);
                 std::swap(yuvReconBest, m_tmpRecoYuv[depth]);
                 m_rdEntropyCoders[depth][CI_TEMP_BEST].store(m_rdEntropyCoders[depth][CI_NEXT_BEST]);
             }
 
             //Encode with residue
-            encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth], bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], false, true);
+            encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth], bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], true);
 
             uint64_t tempCost = m_rdCost.psyRdEnabled() ? outTempCU->m_totalPsyCost : outTempCU->m_totalRDCost;
             uint64_t bestCost = m_rdCost.psyRdEnabled() ? outBestCU->m_totalPsyCost : outBestCU->m_totalRDCost;
@@ -1487,14 +1487,20 @@
                     outTempCU->getPartIndexAndSize(0, m_partAddr, m_width, m_height);
                     motionCompensation(outTempCU, m_tmpPredYuv[depth], REF_PIC_LIST_X, true, true);
                     // estimate residual and encode everything
-                    encodeResAndCalcRdInterCU(outTempCU,
-                                              m_origYuv[depth],
-                                              m_tmpPredYuv[depth],
-                                              m_tmpResiYuv[depth],
-                                              m_bestResiYuv[depth],
-                                              m_tmpRecoYuv[depth],
-                                              !!noResidual,
-                                              true);
+                    if (noResidual)
+                        encodeResAndCalcRdSkipCU(outTempCU,
+                                                 m_origYuv[depth],
+                                                 m_tmpPredYuv[depth],
+                                                 m_tmpRecoYuv[depth]);
+                    else
+                        encodeResAndCalcRdInterCU(outTempCU,
+                                                  m_origYuv[depth],
+                                                  m_tmpPredYuv[depth],
+                                                  m_tmpResiYuv[depth],
+                                                  m_bestResiYuv[depth],
+                                                  m_tmpRecoYuv[depth],
+                                                  true);
+
 
                     /* Todo: Fix the satd cost estimates. Why is merge being chosen in high motion areas: estimated distortion is too low? */
                     if (!noResidual && !outTempCU->getQtRootCbf(0))
@@ -1577,7 +1583,7 @@
 
     if (predInterSearch(outTempCU, m_tmpPredYuv[depth], bUseMRG, true))
     {
-        encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], false, true);
+        encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], true);
         checkDQP(outTempCU);
         checkBestMode(outBestCU, outTempCU, depth);
     }
@@ -1922,7 +1928,7 @@
     else
     {
         m_origYuv[0]->copyPartToYuv(m_origYuv[depth], absPartIdx);
-        generateCoeffRecon(cu, m_origYuv[depth], m_modePredYuv[5][depth], m_tmpResiYuv[depth],  m_tmpRecoYuv[depth], false);
+        generateCoeffRecon(cu, m_origYuv[depth], m_modePredYuv[5][depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth]);
         checkDQP(cu);
         m_tmpRecoYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), lcu->getAddr(), absPartIdx);
         cu->copyCodedToPic(depth);


More information about the x265-devel mailing list