[x265] [PATCH] changes to Early Exit for No- RDO - performance gain of 14%

aarthi at multicorewareinc.com aarthi at multicorewareinc.com
Fri Jul 26 19:10:02 CEST 2013


# HG changeset patch
# User Aarthi<aarthi at multicorewareinc.com>
# Date 1374857788 -19800
#      Fri Jul 26 22:26:28 2013 +0530
# Node ID 68023f02bf3d02e9a254ddd8e679c734b5eb2342
# Parent  f2f70fa9b4f3f075629d02c35684d16bea67fee0
changes to Early Exit for No- RDO - performance gain of 14%

diff -r f2f70fa9b4f3 -r 68023f02bf3d source/encoder/compress.cpp
--- a/source/encoder/compress.cpp	Fri Jul 26 02:19:06 2013 -0500
+++ b/source/encoder/compress.cpp	Fri Jul 26 22:26:28 2013 +0530
@@ -105,6 +105,8 @@
     //===== determine set of modes to be tested (using prediction signal only) =====
     UInt numModesAvailable = 35; //total number of Intra modes
     Pel* fenc   = m_origYuv[depth]->getLumaAddr(0, width);
+    //  Pel* pred   = m_modePredYuv[5][depth]->getLumaAddr(0, width);
+    // UInt stride = m_modePredYuv[5][depth]->getStride();
     Pel* pred   = m_modePredYuv[5][depth]->getLumaAddr(0, width);
     UInt stride = m_modePredYuv[5][depth]->getStride();
     UInt rdModeList[FAST_UDI_MAX_RDMODE_NUM];
@@ -266,19 +268,19 @@
     m_search->predInterSearch(outTempCU, m_origYuv[depth], outPredYuv, bUseMRG);
     int part = PartitionFromSizes(outTempCU->getWidth(0), outTempCU->getHeight(0));
     outTempCU->m_totalCost = primitives.sse_pp[part](m_origYuv[depth]->getLumaAddr(), m_origYuv[depth]->getStride(),
-                                                        outPredYuv->getLumaAddr(), outPredYuv->getStride());
+                                                     outPredYuv->getLumaAddr(), outPredYuv->getStride());
 }
 
 /*Temporary macro for development only. Will be removed once the early exit is fully tested and profiled*/
-#define EARLY_EXIT_NO_RDO 0
-
+#define EARLY_EXIT_NO_RDO 1
+int cnt = 0;
 Void TEncCu::xCompressInterCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, TComDataCU*& cu, UInt depth, UInt PartitionIndex)
 {
 #if CU_STAT_LOGFILE
     cntTotalCu[depth]++;
 #endif
     m_abortFlag = false;
-
+    cnt++;
     TComPic* pic = outTempCU->getPic();
 
     // get Original YUV data from picture
@@ -289,12 +291,12 @@
     Bool bSubBranch = true;
     Bool bTrySplitDQP = true;
     Bool bBoundary = false;
-
+    UInt64 _NxNCost = 0, Inter2Nx2NCost = 0;
     UInt lpelx = outTempCU->getCUPelX();
     UInt rpelx = lpelx + outTempCU->getWidth(0)  - 1;
     UInt tpely = outTempCU->getCUPelY();
     UInt bpely = tpely + outTempCU->getHeight(0) - 1;
-
+    TComDataCU* subTempPartCU, * subBestPartCU;
     Int qp = m_cfg->getUseRateCtrl() ? m_rateControl->getRCQP() : outTempCU->getQP(0);
 
     // If slice start or slice end is within this cu...
@@ -335,15 +337,24 @@
 
         if (!earlyDetectionSkip)
         {
-            
             /*Compute 2Nx2N mode costs*/
-            xComputeCostInter(m_interCU_2Nx2N[depth], m_modePredYuv[0][depth], SIZE_2Nx2N);
-            /*Choose best mode; initialise outBestCU to 2Nx2N*/
-            outBestCU = m_interCU_2Nx2N[depth];
-            tempYuv = m_modePredYuv[0][depth];
-            m_modePredYuv[0][depth] = m_bestPredYuv[depth];
-            m_bestPredYuv[depth] = tempYuv;
-            
+            if (depth == 0)
+            {
+                xComputeCostInter(m_interCU_2Nx2N[depth], m_modePredYuv[0][depth], SIZE_2Nx2N);
+                /*Choose best mode; initialise outBestCU to 2Nx2N*/
+                outBestCU = m_interCU_2Nx2N[depth];
+                tempYuv = m_modePredYuv[0][depth];
+                m_modePredYuv[0][depth] = m_bestPredYuv[depth];
+                m_bestPredYuv[depth] = tempYuv;
+            }
+            else
+            {
+                outBestCU = m_interCU_NxN[PartitionIndex][depth];
+                tempYuv = m_bestPredYuvNxN[PartitionIndex][depth];
+                m_bestPredYuvNxN[PartitionIndex][depth] = m_bestPredYuv[depth];
+                m_bestPredYuv[depth] = tempYuv;
+            }
+            Inter2Nx2NCost = outBestCU->m_totalCost;
             bTrySplitDQP = bTrySplit;
 
             if ((Int)depth <= m_addSADDepth)
@@ -351,30 +362,66 @@
                 m_LCUPredictionSAD += m_temporalSAD;
                 m_addSADDepth = depth;
             }
+#if EARLY_EXIT_NO_RDO
 
-            /*Compute Rect costs*/
-            if (m_cfg->getUseRectInter())
+            if (depth < g_maxCUDepth - g_addCUDepth)
             {
-                xComputeCostInter(m_interCU_Nx2N[depth], m_modePredYuv[1][depth], SIZE_Nx2N);
-                xComputeCostInter(m_interCU_2NxN[depth], m_modePredYuv[2][depth], SIZE_2NxN);
+                outTempCU->initEstData(depth, qp);
+                UChar nextDepth = (UChar)(depth + 1);
+                /*Best CU initialised to NULL; */
+                subBestPartCU = NULL;
+                /*The temp structure is used for boundary analysis, and to copy Best SubCU mode data on return*/
+                subTempPartCU;
+                _NxNCost = 0;
+                for (UInt partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++)
+                {
+                    subTempPartCU = m_interCU_NxN[partUnitIdx][nextDepth];
+                    subTempPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init.
+                    TComPic* subPic = subTempPartCU->getPic();
+                    m_origYuv[nextDepth]->copyFromPicYuv(subPic->getPicYuvOrg(), subTempPartCU->getAddr(), subTempPartCU->getZorderIdxInCU());
+
+                    Bool bInSlice = subTempPartCU->getSCUAddr() < slice->getSliceCurEndCUAddr();
+                    if (bInSlice && (subTempPartCU->getCUPelX() < slice->getSPS()->getPicWidthInLumaSamples()) &&
+                        (subTempPartCU->getCUPelY() < slice->getSPS()->getPicHeightInLumaSamples()))
+                    {
+                        xComputeCostInter(subTempPartCU, m_bestPredYuvNxN[partUnitIdx][nextDepth], SIZE_2Nx2N, 0);
+                        _NxNCost += subTempPartCU->m_totalCost;
+                    }
+                    else if (bInSlice)
+                    {
+                        subTempPartCU->copyToPic((UChar)nextDepth);
+                        outTempCU->copyPartFrom(subTempPartCU, partUnitIdx, nextDepth, false);
+                    }
+                }
             }
 
-            
-            if (m_interCU_Nx2N[depth]->m_totalCost < outBestCU->m_totalCost)
+#endif // if EARLY_EXIT_NO_RDO
+
+            if (outBestCU->m_totalCost >  _NxNCost)
             {
-                outBestCU = m_interCU_Nx2N[depth];
+                /*Compute Rect costs*/
+                if (m_cfg->getUseRectInter())
+                {
+                    xComputeCostInter(m_interCU_Nx2N[depth], m_modePredYuv[1][depth], SIZE_Nx2N);
+                    xComputeCostInter(m_interCU_2NxN[depth], m_modePredYuv[2][depth], SIZE_2NxN);
+                }
 
-                tempYuv = m_modePredYuv[1][depth];
-                m_modePredYuv[1][depth] = m_bestPredYuv[depth];
-                m_bestPredYuv[depth] = tempYuv;
-            }
-            if (m_interCU_2NxN[depth]->m_totalCost < outBestCU->m_totalCost)
-            {
-                outBestCU = m_interCU_2NxN[depth];
+                if (m_interCU_Nx2N[depth]->m_totalCost < outBestCU->m_totalCost)
+                {
+                    outBestCU = m_interCU_Nx2N[depth];
 
-                tempYuv = m_modePredYuv[2][depth];
-                m_modePredYuv[2][depth] = m_bestPredYuv[depth];
-                m_bestPredYuv[depth] = tempYuv;
+                    tempYuv = m_modePredYuv[1][depth];
+                    m_modePredYuv[1][depth] = m_bestPredYuv[depth];
+                    m_bestPredYuv[depth] = tempYuv;
+                }
+                if (m_interCU_2NxN[depth]->m_totalCost < outBestCU->m_totalCost)
+                {
+                    outBestCU = m_interCU_2NxN[depth];
+
+                    tempYuv = m_modePredYuv[2][depth];
+                    m_modePredYuv[2][depth] = m_bestPredYuv[depth];
+                    m_bestPredYuv[depth] = tempYuv;
+                }
             }
 
             m_search->encodeResAndCalcRdInterCU(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth], m_bestResiYuv[depth], m_bestRecoYuv[depth], false);
@@ -414,7 +461,27 @@
                     m_tmpRecoYuv[depth] = tmpPic;
                 }
             }
+
+            // further split
+#if EARLY_EXIT_NO_RDO
+
+            if (Inter2Nx2NCost < _NxNCost && depth < g_maxCUDepth - g_addCUDepth)
+            {
+                m_entropyCoder->resetBits();
+                m_entropyCoder->encodeSplitFlag(outBestCU, 0, depth, true);
+                outBestCU->m_totalBits += m_entropyCoder->getNumberOfWrittenBits();        // split bits
+                outBestCU->m_totalBins += ((TEncBinCABAC*)((TEncSbac*)m_entropyCoder->m_pcEntropyCoderIf)->getEncBinIf())->getBinsCoded();
+                outBestCU->m_totalCost  = m_rdCost->calcRdCost(outBestCU->m_totalDistortion, outBestCU->m_totalBits);
+                /* Copy Best data to Picture for next partition prediction. */
+                outBestCU->copyToPic((UChar)depth);
+
+                /* Copy Yuv data to picture Yuv */
+                xCopyYuv2Pic(outBestCU->getPic(), outBestCU->getAddr(), outBestCU->getZorderIdxInCU(), depth, depth, outBestCU, lpelx, tpely);
+                return;
+            }
+#endif // if EARLY_EXIT_NO_RDO
         }
+
         else
         {
             outBestCU = m_bestMergeCU[depth];
@@ -465,34 +532,6 @@
     {
         outTempCU->initEstData(depth, qp);
         UChar nextDepth = (UChar)(depth + 1);
-        /*Best CU initialised to NULL; */
-        TComDataCU* subBestPartCU = NULL;
-        /*The temp structure is used for boundary analysis, and to copy Best SubCU mode data on return*/
-        TComDataCU* subTempPartCU;
-
-#if EARLY_EXIT_NO_RDO
-        UInt64 _NxNCost = 0;
-        for (UInt nextDepth_partIndex = 0; nextDepth_partIndex < 4; nextDepth_partIndex++)
-        {
-            subTempPartCU = m_interCU_NxN[nextDepth_partIndex][nextDepth];
-            subTempPartCU->initSubCU(outTempCU, nextDepth_partIndex, nextDepth, qp); // clear sub partition datas or init.
-            TComPic* subPic = subTempPartCU->getPic();
-            m_origYuv[nextDepth]->copyFromPicYuv(subPic->getPicYuvOrg(), subTempPartCU->getAddr(), subTempPartCU->getZorderIdxInCU());
-            
-            Bool bInSlice = subTempPartCU->getSCUAddr() < slice->getSliceCurEndCUAddr();
-            if (bInSlice && (subTempPartCU->getCUPelX() < slice->getSPS()->getPicWidthInLumaSamples()) &&
-                (subTempPartCU->getCUPelY() < slice->getSPS()->getPicHeightInLumaSamples()))
-            {
-                xComputeCostInter(subTempPartCU, m_bestPredYuvNxN[nextDepth_partIndex][depth], SIZE_2Nx2N, 0);
-                _NxNCost += subTempPartCU->m_totalCost;
-            }
-            else if (bInSlice)
-            {
-                subTempPartCU->copyToPic((UChar)nextDepth);
-                outTempCU->copyPartFrom(subTempPartCU, nextDepth_partIndex, nextDepth, false);
-            }
-        }
-#endif
         subTempPartCU = m_tempCU[nextDepth];
         for (UInt nextDepth_partIndex = 0; nextDepth_partIndex < 4; nextDepth_partIndex++)
         {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: x265_fork.patch
Type: text/x-patch
Size: 11182 bytes
Desc: not available
URL: <http://mailman.videolan.org/private/x265-devel/attachments/20130726/932e33e6/attachment-0001.bin>


More information about the x265-devel mailing list