[x265] [PATCH] no_rdo: implementation of new early exit

sumalatha at multicorewareinc.com sumalatha at multicorewareinc.com
Tue Oct 29 11:03:21 CET 2013


# HG changeset patch
# User Sumalatha Polureddy
# Date 1383040993 -19800
# Node ID 717127253b0a3cebe263bd2e7ddb728368e5d140
# Parent  bf9686cc9e0f3ffd33f722ab941ba423513a6111
no_rdo: implementation of new early exit

early exit is done when the RD cost of best mode at depth "n" is less than the
average of RD cost of the CU's at depth "n" of previosuly coded CU's(CUAbove,
CUAboveRight, CUAoveLeft, CULeft, CUColocated).
For HD videos
performance improvement of 20 to 27%
bitrate increases by 0.75 to 0.02%

diff -r bf9686cc9e0f -r 717127253b0a source/Lib/TLibCommon/TComDataCU.cpp
--- a/source/Lib/TLibCommon/TComDataCU.cpp	Tue Oct 29 15:29:02 2013 +0530
+++ b/source/Lib/TLibCommon/TComDataCU.cpp	Tue Oct 29 15:33:13 2013 +0530
@@ -247,6 +247,12 @@
     m_totalBits        = 0;
     m_numPartitions    = pic->getNumPartInCU();
 
+    for (int i = 0; i < 4; i++)
+    {
+        m_avgCost[i] = 0;
+        m_count[i] = 0;
+    }
+
     // CHECK_ME: why partStartIdx always negative
     int partStartIdx = 0 - (cuAddr) * pic->getNumPartInCU();
 
@@ -470,6 +476,12 @@
     m_totalBits        = 0;
     m_numPartitions    = cu->getTotalNumPart() >> 2;
 
+    for (int i = 0; i < 4; i++)
+    {
+        m_avgCost[i] = cu->m_avgCost[i];
+        m_count[i] = cu->m_count[i];
+    }
+
     int iSizeInUchar = sizeof(UChar) * m_numPartitions;
     int iSizeInBool  = sizeof(bool) * m_numPartitions;
 
diff -r bf9686cc9e0f -r 717127253b0a source/Lib/TLibCommon/TComDataCU.h
--- a/source/Lib/TLibCommon/TComDataCU.h	Tue Oct 29 15:29:02 2013 +0530
+++ b/source/Lib/TLibCommon/TComDataCU.h	Tue Oct 29 15:33:13 2013 +0530
@@ -178,6 +178,8 @@
     UInt64        m_totalCost;       ///< sum of partition RD costs
     uint32_t      m_totalDistortion; ///< sum of partition distortion
     uint32_t      m_totalBits;       ///< sum of partition signal bits
+    UInt64        m_avgCost[4];      // stores the avg cost of CU's in frame for each depth
+    uint32_t      m_count[4];
 
     // -------------------------------------------------------------------------------------------------------------------
     // create / destroy / initialize / copy
diff -r bf9686cc9e0f -r 717127253b0a source/encoder/compress.cpp
--- a/source/encoder/compress.cpp	Tue Oct 29 15:29:02 2013 +0530
+++ b/source/encoder/compress.cpp	Tue Oct 29 15:33:13 2013 +0530
@@ -26,6 +26,7 @@
 
 /* Lambda Partition Select adjusts the threshold value for Early Exit in No-RDO flow */
 #define LAMBDA_PARTITION_SELECT     0.9
+#define EARLY_EXIT                  1
 
 using namespace x265;
 
@@ -531,7 +532,78 @@
     // further split
     if (bSubBranch && bTrySplitDQP && depth < g_maxCUDepth - g_addCUDepth)
     {
+#if EARLY_EXIT // turn ON this to enable early exit
+        // early exit when the RD cost of best mode at depth n is less than the avgerage of RD cost of the
+        // CU's(above, aboveleft, aboveright, left, colocated) at depth "n" of previosuly coded CU's
+        if (outBestCU != 0)
+        {
+            UInt64 costCU = 0, costCUAbove = 0, costCUAboveLeft = 0, costCUAboveRight = 0, costCULeft = 0, costCUColocated0 = 0, costCUColocated1 = 0, totalCost = 0, avgCost= 0;
+            UInt64 countCU = 0, countCUAbove = 0, countCUAboveLeft = 0, countCUAboveRight = 0, countCULeft = 0, countCUColocated0 = 0, countCUColocated1 = 0;
+            UInt64 totalCount = 0;
+            TComDataCU* above = outTempCU->getCUAbove();
+            TComDataCU* aboveLeft = outTempCU->getCUAboveLeft();
+            TComDataCU* aboveRight = outTempCU->getCUAboveRight();
+            TComDataCU* left = outTempCU->getCULeft();
+            TComDataCU* colocated0 = outTempCU->getCUColocated(REF_PIC_LIST_0);
+            TComDataCU* colocated1 = outTempCU->getCUColocated(REF_PIC_LIST_1);
+
+            costCU = outTempCU->m_avgCost[depth] * outTempCU->m_count[depth];
+            countCU = outTempCU->m_count[depth];
+            if (above)
+            {
+                costCUAbove = above->m_avgCost[depth] * above->m_count[depth];
+                countCUAbove = above->m_count[depth];
+            }
+            if (aboveLeft)
+            {
+                costCUAboveLeft = aboveLeft->m_avgCost[depth] * aboveLeft->m_count[depth];
+                countCUAboveLeft = aboveLeft->m_count[depth];
+            }
+            if (aboveRight)
+            {
+                costCUAboveRight = aboveRight->m_avgCost[depth] * aboveRight->m_count[depth];
+                countCUAboveRight = aboveRight->m_count[depth];
+            }
+            if (left)
+            {
+                costCULeft = left->m_avgCost[depth] * left->m_count[depth];
+                countCULeft = left->m_count[depth];
+            }
+            if (colocated0)
+            {
+                costCUColocated0 = colocated0->m_avgCost[depth] * colocated0->m_count[depth];
+                countCUColocated0 = colocated0->m_count[depth];
+            }
+            if (colocated1)
+            {
+                costCUColocated1 = colocated1->m_avgCost[depth] * colocated1->m_count[depth];
+                countCUColocated1 = colocated1->m_count[depth];
+            }
+
+            totalCost = costCU + costCUAbove + costCUAboveLeft + costCUAboveRight + costCULeft + costCUColocated0 + costCUColocated1;
+            totalCount = countCU + countCUAbove + countCUAboveLeft + countCUAboveRight + countCULeft + countCUColocated0 + countCUColocated1;
+            if (totalCount != 0)
+                avgCost = totalCost / totalCount;
+
+            float lambda = 1.0f;
+
+            if (outBestCU->m_totalCost < lambda * avgCost && avgCost != 0 && depth != 0)
+            {
+                m_entropyCoder->resetBits();
+                m_entropyCoder->encodeSplitFlag(outBestCU, 0, depth, true);
+                outBestCU->m_totalBits += m_entropyCoder->getNumberOfWrittenBits();        // split bits
+                outBestCU->m_totalCost  = m_rdCost->calcRdCost(outBestCU->m_totalDistortion, outBestCU->m_totalBits);
+                /* Copy Best data to Picture for next partition prediction. */
+                outBestCU->copyToPic((UChar)depth);
+
+                /* Copy Yuv data to picture Yuv */
+                xCopyYuv2Pic(outBestCU->getPic(), outBestCU->getAddr(), outBestCU->getZorderIdxInCU(), depth, depth, outBestCU, lpelx, tpely);
+                return;
+            }
+        }
+#endif
 #if 0 // turn ON this to enable early exit
+        //early exit when RD cost of best mode is less than the cumulative RD cost of 4 subpartition
         UInt64 nxnCost = 0;
         if (outBestCU != 0 && depth > 0)
         {
@@ -612,7 +684,22 @@
                     m_rdSbacCoders[nextDepth][CI_CURR_BEST]->load(m_rdSbacCoders[nextDepth][CI_NEXT_BEST]);
                 }
                 xCompressInterCU(subBestPartCU, subTempPartCU, outTempCU, nextDepth, nextDepth_partIndex);
-
+#if EARLY_EXIT
+                for (int k = 0; k < 4; k++)
+                {
+                    outTempCU->m_avgCost[k] = subTempPartCU->m_avgCost[k];
+                    outTempCU->m_count[k] = subTempPartCU->m_count[k];
+                }
+                if (subBestPartCU->getPredictionMode(0) != MODE_INTRA)
+                {
+                    UInt64 tempavgCost = subBestPartCU->m_totalCost;
+                    UInt64 temp = outTempCU->m_avgCost[depth + 1] * outTempCU->m_count[depth + 1];
+                    outTempCU->m_count[depth + 1] += 1;
+                    outTempCU->getPic()->getPicSym()->getCU(outTempCU->getAddr())->m_count[depth + 1] += 1;
+                    outTempCU->m_avgCost[depth + 1] = (temp + tempavgCost) / outTempCU->m_count[depth + 1];
+                    outTempCU->getPic()->getPicSym()->getCU(outTempCU->getAddr())->m_avgCost[depth + 1] = outTempCU->m_avgCost[depth + 1];
+                }
+#endif
                 /* Adding costs from best SUbCUs */
                 outTempCU->copyPartFrom(subBestPartCU, nextDepth_partIndex, nextDepth, true); // Keep best part data to current temporary data.
                 xCopyYuv2Tmp(subBestPartCU->getTotalNumPart() * nextDepth_partIndex, nextDepth);
@@ -708,6 +795,16 @@
          * Copy recon data from Temp structure to Best structure */
         if (outBestCU)
         {
+            if (depth == 0)
+            {
+                UInt64 tempavgCost = outBestCU->m_totalCost;
+                UInt64 temp = outTempCU->m_avgCost[depth] * outTempCU->m_count[depth];
+                outTempCU->m_count[depth] += 1;
+                outTempCU->getPic()->getPicSym()->getCU(outTempCU->getAddr())->m_count[depth] += 1;
+
+                outTempCU->m_avgCost[depth] = (temp + tempavgCost) / outTempCU->m_count[depth];
+                outTempCU->getPic()->getPicSym()->getCU(outTempCU->getAddr())->m_avgCost[depth] = outTempCU->m_avgCost[depth];
+            }
             if (outTempCU->m_totalCost < outBestCU->m_totalCost)
             {
                 outBestCU = outTempCU;


More information about the x265-devel mailing list