[x265] [PATCH 2 of 2] no-rdo: Move residual encoding at depth 0

deepthidevaki at multicorewareinc.com deepthidevaki at multicorewareinc.com
Fri Nov 22 12:13:52 CET 2013


# HG changeset patch
# User Deepthi Devaki <deepthidevaki at multicorewareinc.com>
# Date 1385118778 -19800
# Node ID aaa803dfbe6f334db45e943dbfb40a7d4ac38142
# Parent  883d9279fde14aad2f2042c5aaaa2c98700a9b8b
no-rdo: Move residual encoding at depth 0.

During mode decision residual encoding done with no-rdoq. At depth 0, on the final best modes residual encoding is done with rdoq.
Intra Resisual encoding is not done at depth 0. Hence the changes are disabled with a macro NORESENC

diff -r 883d9279fde1 -r aaa803dfbe6f source/Lib/TLibEncoder/TEncCu.h
--- a/source/Lib/TLibEncoder/TEncCu.h	Fri Nov 22 16:41:56 2013 +0530
+++ b/source/Lib/TLibEncoder/TEncCu.h	Fri Nov 22 16:42:58 2013 +0530
@@ -191,6 +191,8 @@
                            bool &bTestMergeAMP_Hor, bool &bTestMergeAMP_Ver);
 
     void xFillPCMBuffer(TComDataCU* outCU, TComYuv* origYuv);
+
+    void xEncodeResidue(TComDataCU* lcu, TComDataCU* cu, uint32_t absPartIdx, UChar depth, uint32_t partIndex = 0);
 };
 }
 //! \}
diff -r 883d9279fde1 -r aaa803dfbe6f source/Lib/TLibEncoder/TEncSearch.h
--- a/source/Lib/TLibEncoder/TEncSearch.h	Fri Nov 22 16:41:56 2013 +0530
+++ b/source/Lib/TLibEncoder/TEncSearch.h	Fri Nov 22 16:42:58 2013 +0530
@@ -186,6 +186,11 @@
 
     uint32_t xSymbolBitsInter(TComDataCU* cu);
 
+    void xEstimateResidualQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t absTUPartIdx, TShortYUV* resiYuv, uint32_t depth,
+                             uint64_t &rdCost, uint32_t &outBits, uint32_t &outDist, uint32_t *puiZeroDist, bool curUseRDOQ = true);
+
+    void xSetResidualQTData(TComDataCU* cu, uint32_t absPartIdx, uint32_t absTUPartIdx, TShortYUV* resiYuv, uint32_t depth, bool bSpatial);
+
 protected:
 
     // --------------------------------------------------------------------------------------------
@@ -249,9 +254,6 @@
     // -------------------------------------------------------------------------------------------------------------------
 
     void xEncodeResidualQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool bSubdivAndCbf, TextType ttype);
-    void xEstimateResidualQT(TComDataCU* cu, uint32_t absPartIdx, uint32_t absTUPartIdx, TShortYUV* resiYuv, uint32_t depth,
-                             uint64_t &rdCost, uint32_t &outBits, uint32_t &outDist, uint32_t *puiZeroDist, bool curUseRDOQ = true);
-    void xSetResidualQTData(TComDataCU* cu, uint32_t absPartIdx, uint32_t absTUPartIdx, TShortYUV* resiYuv, uint32_t depth, bool bSpatial);
 
     void setWpScalingDistParam(TComDataCU* cu, int refIdx, int picList);
 };
diff -r 883d9279fde1 -r aaa803dfbe6f source/encoder/compress.cpp
--- a/source/encoder/compress.cpp	Fri Nov 22 16:41:56 2013 +0530
+++ b/source/encoder/compress.cpp	Fri Nov 22 16:42:58 2013 +0530
@@ -27,6 +27,7 @@
 /* Lambda Partition Select adjusts the threshold value for Early Exit in No-RDO flow */
 #define LAMBDA_PARTITION_SELECT     0.9
 #define EARLY_EXIT                  1
+#define NORESENC 0
 
 using namespace x265;
 
@@ -137,7 +138,7 @@
         // Filtered and Unfiltered refAbove and refLeft pointing to above and left.
         above         = aboveScale;
         left          = leftScale;
-        aboveFiltered = aboveScale; 
+        aboveFiltered = aboveScale;
         leftFiltered  = leftScale;
     }
 
@@ -303,7 +304,11 @@
     m_tmpRecoYuv[depth] = yuv;
 
     //Encode with residue
+#if NORESENC
+    m_search->estimateRDInterCU(outTempCU, m_origYuv[depth], bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], false, false);
+#else
     m_search->estimateRDInterCU(outTempCU, m_origYuv[depth], bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], false);
+#endif
 
     if (outTempCU->m_totalCost < outBestCU->m_totalCost)    //Choose best from no-residue mode and residue mode
     {
@@ -458,9 +463,13 @@
                 m_search->motionCompensation(outBestCU, m_bestPredYuv[depth], REF_PIC_LIST_X, partIdx, false, true);
             }
 
+#if NORESENC
+            m_search->estimateRDInterCU(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth],
+                                        m_bestResiYuv[depth], m_bestRecoYuv[depth], false, false);
+#else
             m_search->estimateRDInterCU(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth],
                                         m_bestResiYuv[depth], m_bestRecoYuv[depth], false);
-
+#endif
 
             if (m_bestMergeCU[depth]->m_totalCost < outBestCU->m_totalCost)
             {
@@ -474,7 +483,8 @@
                 m_bestMergeRecoYuv[depth] = tempYuv;
             }
 
-            /* Check for Intra in inter frames only if its a P-slice*/
+#if !NORESENC
+            /*Check for Intra in inter frames only if its a P-slice*/
             if (outBestCU->getSlice()->getSliceType() == P_SLICE)
             {
                 /*compute intra cost */
@@ -498,6 +508,7 @@
                     }
                 }
             }
+#endif // if !NORESENC
         }
         else
         {
@@ -537,7 +548,7 @@
     if (bSubBranch && bTrySplitDQP && depth < g_maxCUDepth - g_addCUDepth)
     {
 #if EARLY_EXIT // turn ON this to enable early exit
-        // early exit when the RD cost of best mode at depth n is less than the sum of avgerage of RD cost of the neighbour 
+        // early exit when the RD cost of best mode at depth n is less than the sum of avgerage of RD cost of the neighbour
         // CU's(above, aboveleft, aboveright, left, colocated) and avg cost of that CU at depth "n"  with weightage for each quantity
         if (outBestCU != 0)
         {
@@ -628,7 +639,12 @@
 #endif // if EARLY_EXIT
                 /* Adding costs from best SUbCUs */
                 outTempCU->copyPartFrom(subBestPartCU, nextDepth_partIndex, nextDepth, true); // Keep best part data to current temporary data.
+#if !NORESENC
                 xCopyYuv2Tmp(subBestPartCU->getTotalNumPart() * nextDepth_partIndex, nextDepth);
+#endif
+#if NORESENC
+                m_bestPredYuv[nextDepth]->copyToPartYuv(m_tmpPredYuv[depth], subBestPartCU->getTotalNumPart() * nextDepth_partIndex);
+#endif
             }
             else if (bInSlice)
             {
@@ -727,17 +743,29 @@
             if (outTempCU->m_totalCost < outBestCU->m_totalCost)
             {
                 outBestCU = outTempCU;
+#if !NORESENC
                 tempYuv = m_tmpRecoYuv[depth];
                 m_tmpRecoYuv[depth] = m_bestRecoYuv[depth];
                 m_bestRecoYuv[depth] = tempYuv;
+#else
+                tempYuv = m_tmpPredYuv[depth];
+                m_tmpPredYuv[depth] = m_bestPredYuv[depth];
+                m_bestPredYuv[depth] = tempYuv;
+#endif
             }
         }
         else
         {
             outBestCU = outTempCU;
+#if !NORESENC
             tempYuv = m_tmpRecoYuv[depth];
             m_tmpRecoYuv[depth] = m_bestRecoYuv[depth];
             m_bestRecoYuv[depth] = tempYuv;
+#else
+            tempYuv = m_tmpPredYuv[depth];
+            m_tmpPredYuv[depth] = m_bestPredYuv[depth];
+            m_bestPredYuv[depth] = tempYuv;
+#endif
         }
     }
 
@@ -782,6 +810,12 @@
     /* Copy Best data to Picture for next partition prediction. */
     outBestCU->copyToPic((UChar)depth);
 
+#if NORESENC
+    if (depth == 0)
+    {
+        xEncodeResidue(outBestCU, outBestCU, 0, 0);
+    }
+#endif
     /* Copy Yuv data to picture Yuv */
     xCopyYuv2Pic(outBestCU->getPic(), outBestCU->getAddr(), outBestCU->getZorderIdxInCU(), depth, depth, outBestCU, lpelx, tpely);
 
@@ -797,3 +831,130 @@
     assert(outBestCU->getPredictionMode(0) != MODE_NONE);
     assert(outBestCU->m_totalCost != MAX_DOUBLE);
 }
+
+void TEncCu::xEncodeResidue(TComDataCU* lcu, TComDataCU* cu, uint32_t absPartIdx, UChar depth, uint32_t partIndex)
+{
+    UChar nextDepth = (UChar)(depth + 1);
+    TComDataCU* subTempPartCU = m_tempCU[nextDepth];
+    TComPic* pic = cu->getPic();
+    TComSlice* slice = cu->getPic()->getSlice();
+
+    if (depth != 0)
+    {
+        if (0 == partIndex)         //initialize RD with previous depth buffer
+        {
+            m_rdSbacCoders[depth][CI_CURR_BEST]->load(m_rdSbacCoders[depth - 1][CI_CURR_BEST]);
+        }
+        else
+        {
+            m_rdSbacCoders[depth][CI_CURR_BEST]->load(m_rdSbacCoders[depth][CI_NEXT_BEST]);
+        }
+    }
+
+    if (((depth < lcu->getDepth(absPartIdx)) && (depth < (g_maxCUDepth - g_addCUDepth))))
+    {
+        uint32_t qNumParts = (pic->getNumPartInCU() >> (depth << 1)) >> 2;
+        for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++, absPartIdx += qNumParts)
+        {
+            uint32_t lpelx = lcu->getCUPelX() + g_rasterToPelX[g_zscanToRaster[absPartIdx]];
+            uint32_t tpely = lcu->getCUPelY() + g_rasterToPelY[g_zscanToRaster[absPartIdx]];
+            bool bInSlice = lcu->getSCUAddr() + absPartIdx < slice->getSliceCurEndCUAddr();
+            if (bInSlice && (lpelx < slice->getSPS()->getPicWidthInLumaSamples()) && (tpely < slice->getSPS()->getPicHeightInLumaSamples()))
+            {
+                subTempPartCU->copyToSubCU(cu, partUnitIdx, depth + 1);
+                xEncodeResidue(lcu, subTempPartCU, absPartIdx, depth + 1, partUnitIdx);
+            }
+        }
+
+        return;
+    }
+
+    if (lcu->getQtRootCbf(absPartIdx))
+    {
+        uint64_t cost = 0;
+        uint32_t bits = 0;
+        uint32_t distortion = 0;
+
+        m_search->m_rdGoOnSbacCoder->load(m_search->m_rdSbacCoders[cu->getDepth(0)][CI_CURR_BEST]);
+
+        //Calculate Residue
+        Pel* src2 = m_bestPredYuv[0]->getLumaAddr(absPartIdx);
+        Pel* src1 = m_origYuv[0]->getLumaAddr(absPartIdx);
+        int16_t* dst = m_tmpResiYuv[depth]->getLumaAddr(0);
+        uint32_t src2stride = m_bestPredYuv[0]->getStride();
+        uint32_t src1stride = m_origYuv[0]->getStride();
+        uint32_t dststride = m_tmpResiYuv[depth]->m_width;
+        int part = partitionFromSizes(cu->getWidth(0), cu->getWidth(0));
+        primitives.luma_sub_ps[part](dst, dststride, src1, src2, src1stride, src2stride);
+
+        src2 = m_bestPredYuv[0]->getCbAddr(absPartIdx);
+        src1 = m_origYuv[0]->getCbAddr(absPartIdx);
+        dst = m_tmpResiYuv[depth]->getCbAddr(0);
+        src2stride = m_bestPredYuv[0]->getCStride();
+        src1stride = m_origYuv[0]->getCStride();
+        dststride = m_tmpResiYuv[depth]->m_cwidth;
+        primitives.chroma[m_cfg->param.internalCsp].sub_ps[part](dst, dststride, src1, src2, src1stride, src2stride);
+
+        src2 = m_bestPredYuv[0]->getCrAddr(absPartIdx);
+        src1 = m_origYuv[0]->getCrAddr(absPartIdx);
+        dst = m_tmpResiYuv[depth]->getCrAddr(0);
+        dststride = m_tmpResiYuv[depth]->m_cwidth;
+        primitives.chroma[m_cfg->param.internalCsp].sub_ps[part](dst, dststride, src1, src2, src1stride, src2stride);
+
+        //Residual encoding
+        uint32_t zeroDistortion = 0;
+        m_search->xEstimateResidualQT(cu, 0, 0, m_tmpResiYuv[depth], cu->getDepth(0), cost, bits, distortion, &zeroDistortion, true);
+        m_search->xSetResidualQTData(cu, 0, 0, NULL, cu->getDepth(0), false);
+
+        if (lcu->getMergeFlag(absPartIdx) && cu->getPartitionSize(0) == SIZE_2Nx2N && !cu->getQtRootCbf(0))
+        {
+            cu->setSkipFlagSubParts(true, 0, depth);
+            cu->copyCodedToPic(depth);
+        }
+        else
+        {
+            cu->copyCodedToPic(depth);
+            m_search->xSetResidualQTData(cu, 0, 0, m_tmpResiYuv[depth], cu->getDepth(0), true);
+
+            //Generate Recon
+            Pel* pred = m_bestPredYuv[0]->getLumaAddr(absPartIdx);
+            int16_t* res = m_tmpResiYuv[depth]->getLumaAddr(0);
+            Pel* reco = m_bestRecoYuv[0]->getLumaAddr(absPartIdx);
+            dststride = m_bestRecoYuv[0]->getStride();
+            src1stride = m_bestPredYuv[0]->getStride();
+            src2stride = m_tmpResiYuv[depth]->m_width;
+            primitives.luma_add_ps[part](reco, dststride, pred, res, src1stride, src2stride);
+
+            pred = m_bestPredYuv[0]->getCbAddr(absPartIdx);
+            res = m_tmpResiYuv[depth]->getCbAddr(0);
+            reco = m_bestRecoYuv[0]->getCbAddr(absPartIdx);
+            dststride = m_bestRecoYuv[0]->getCStride();
+            src1stride = m_bestPredYuv[0]->getCStride();
+            src2stride = m_tmpResiYuv[depth]->m_cwidth;
+            primitives.chroma[m_cfg->param.internalCsp].add_ps[part](reco, dststride, pred, res, src1stride, src2stride);
+
+            pred = m_bestPredYuv[0]->getCrAddr(absPartIdx);
+            res = m_tmpResiYuv[depth]->getCrAddr(0);
+            reco = m_bestRecoYuv[0]->getCrAddr(absPartIdx);
+            primitives.chroma[m_cfg->param.internalCsp].add_ps[part](reco, dststride, pred, res, src1stride, src2stride);
+            return;
+        }
+    }
+    //Generate Recon
+    int part = partitionFromSizes(cu->getWidth(0), cu->getWidth(0));
+    Pel* src = m_bestPredYuv[0]->getLumaAddr(absPartIdx);
+    Pel* dst = m_bestRecoYuv[0]->getLumaAddr(absPartIdx);
+    uint32_t srcstride = m_bestPredYuv[0]->getStride();
+    uint32_t dststride = m_bestRecoYuv[0]->getStride();
+    primitives.luma_copy_pp[part](dst, dststride, src, srcstride);
+
+    src = m_bestPredYuv[0]->getCbAddr(absPartIdx);
+    dst = m_bestRecoYuv[0]->getCbAddr(absPartIdx);
+    srcstride = m_bestPredYuv[0]->getCStride();
+    dststride = m_bestRecoYuv[0]->getCStride();
+    primitives.chroma[m_cfg->param.internalCsp].copy_pp[part](dst, dststride, src, srcstride);
+
+    src = m_bestPredYuv[0]->getCrAddr(absPartIdx);
+    dst = m_bestRecoYuv[0]->getCrAddr(absPartIdx);
+    primitives.chroma[m_cfg->param.internalCsp].copy_pp[part](dst, dststride, src, srcstride);
+}


More information about the x265-devel mailing list