[x265] [PATCH] no-rdo early exit: giving weightage to the cost of that CU and neighbour CU's for early exit

sumalatha at multicorewareinc.com sumalatha at multicorewareinc.com
Thu Nov 14 12:07:06 CET 2013


# HG changeset patch
# User Sumalatha Polureddy
# Date 1384427113 -19800
# Node ID eb8391256d6a68919df73d450d2b9b3cf658cf9f
# Parent  c4ca80d19105ccf1ba2ec14dd65915f2820a660d
no-rdo early exit: giving weightage to the cost of that CU and neighbour CU's for early exit

Early exit is done when CU cost at depth "n" is lessthan sum of 60% of avgcost of
that CU at same depth and 40% of avgcost of neighbour CU's at same depth.

the performance, bitrate increase and psnr comparison are given below
CLI: x265.exe input.y4m -o abc.hevc -r recon.y4m --rd 1 --ref 1

BasketballDrive_1920x1080_50
Timetaken to encode: 704/585/564s
bitrate: 3650/3696/3696
PSNR: 36.7/36.67/36.67
perf improvement: 16.9% (compared to early exit OFF and already existing early exit)
perf improvement: 19.8% (compared to early exit OFF and new early exit)

Cactus_1920x1080_50
Timetaken to encode: 526/443/436s
bitrate: 2787/2831/2833
PSNR: 35.527/35.48/35.48
perf improvement: 15.7% (compared to early exit OFF and already existing early exit)
perf improvement: 17.1% (compared to early exit OFF and new early exit)

Kimono1_1920x1080_24
Timetaken to encode: 279/235/238s
bitrate: 1243/1252/1252
PSNR: 38.16/38.158/38.159
perf improvement: 15.7% (compared to early exit OFF and already existing early exit)
perf improvement: 14.6% (compared to early exit OFF and new early exit)

FourPeople_1280x720_60
Timetaken to encode: 169/157/157s  16.9%/19.8%
bitrate: 486/489/489
PSNR: 39.09/39.052/39.042
perf improvement: 7.1% (compared to early exit OFF and already existing early exit)
perf improvement: 7.1% (compared to early exit OFF and new early exit)

big_buck_bunny_360p24
Timetaken to encode: 1739/1511/1505s  16.9%/19.8%
bitrate: 174.9/175.38/175.5
PSNR: 37.798/37.746/37.752
perf improvement: 13.1% (compared to early exit OFF and already existing early exit)
perf improvement: 13.4% (compared to early exit OFF and new early exit)

PartyScene_832x480_50
Timetaken to encode: 123/120/120s  16.9%/19.8%
bitrate: 208/208/208
PSNR: 40.344/40.33/40.332
perf improvement: 2.4% (compared to early exit OFF and already existing early exit)
perf improvement: 2.4% (compared to early exit OFF and new early exit)

diff -r c4ca80d19105 -r eb8391256d6a source/encoder/compress.cpp
--- a/source/encoder/compress.cpp	Tue Nov 12 19:10:23 2013 +0530
+++ b/source/encoder/compress.cpp	Thu Nov 14 16:35:13 2013 +0530
@@ -560,57 +560,45 @@
     if (bSubBranch && bTrySplitDQP && depth < g_maxCUDepth - g_addCUDepth)
     {
 #if EARLY_EXIT // turn ON this to enable early exit
-        // early exit when the RD cost of best mode at depth n is less than the avgerage of RD cost of the
-        // CU's(above, aboveleft, aboveright, left, colocated) at depth "n" of previosuly coded CU's
+        // early exit when the RD cost of best mode at depth n is less than the sum of avgerage of RD cost of the neighbour 
+        // CU's(above, aboveleft, aboveright, left, colocated) and avg cost of that CU at depth "n"  with weightage for each quantity
         if (outBestCU != 0)
         {
-            UInt64 costCU = 0, costCUAbove = 0, costCUAboveLeft = 0, costCUAboveRight = 0, costCULeft = 0, costCUColocated0 = 0, costCUColocated1 = 0, totalCost = 0, avgCost = 0;
-            UInt64 countCU = 0, countCUAbove = 0, countCUAboveLeft = 0, countCUAboveRight = 0, countCULeft = 0, countCUColocated0 = 0, countCUColocated1 = 0;
-            UInt64 totalCount = 0;
+            uint64_t totalCostNeigh = 0, totalCostCU = 0, totalCountCU = 0;
+            double avgCost = 0;
+            uint64_t totalCountNeigh = 0;
             TComDataCU* above = outTempCU->getCUAbove();
             TComDataCU* aboveLeft = outTempCU->getCUAboveLeft();
             TComDataCU* aboveRight = outTempCU->getCUAboveRight();
             TComDataCU* left = outTempCU->getCULeft();
-            TComDataCU* colocated0 = outTempCU->getCUColocated(REF_PIC_LIST_0);
-            TComDataCU* colocated1 = outTempCU->getCUColocated(REF_PIC_LIST_1);
+            TComDataCU* rootCU = outTempCU->getPic()->getPicSym()->getCU(outTempCU->getAddr());
 
-            costCU = outTempCU->m_avgCost[depth] * outTempCU->m_count[depth];
-            countCU = outTempCU->m_count[depth];
+            totalCostCU += rootCU->m_avgCost[depth] * rootCU->m_count[depth];
+            totalCountCU += rootCU->m_count[depth];
             if (above)
             {
-                costCUAbove = above->m_avgCost[depth] * above->m_count[depth];
-                countCUAbove = above->m_count[depth];
+                totalCostNeigh += above->m_avgCost[depth] * above->m_count[depth];
+                totalCountNeigh += above->m_count[depth];
             }
             if (aboveLeft)
             {
-                costCUAboveLeft = aboveLeft->m_avgCost[depth] * aboveLeft->m_count[depth];
-                countCUAboveLeft = aboveLeft->m_count[depth];
+                totalCostNeigh += aboveLeft->m_avgCost[depth] * aboveLeft->m_count[depth];
+                totalCountNeigh += aboveLeft->m_count[depth];
             }
             if (aboveRight)
             {
-                costCUAboveRight = aboveRight->m_avgCost[depth] * aboveRight->m_count[depth];
-                countCUAboveRight = aboveRight->m_count[depth];
+                totalCostNeigh += aboveRight->m_avgCost[depth] * aboveRight->m_count[depth];
+                totalCountNeigh += aboveRight->m_count[depth];
             }
             if (left)
             {
-                costCULeft = left->m_avgCost[depth] * left->m_count[depth];
-                countCULeft = left->m_count[depth];
-            }
-            if (colocated0)
-            {
-                costCUColocated0 = colocated0->m_avgCost[depth] * colocated0->m_count[depth];
-                countCUColocated0 = colocated0->m_count[depth];
-            }
-            if (colocated1)
-            {
-                costCUColocated1 = colocated1->m_avgCost[depth] * colocated1->m_count[depth];
-                countCUColocated1 = colocated1->m_count[depth];
+                totalCostNeigh += left->m_avgCost[depth] * left->m_count[depth];
+                totalCountNeigh += left->m_count[depth];
             }
 
-            totalCost = costCU + costCUAbove + costCUAboveLeft + costCUAboveRight + costCULeft + costCUColocated0 + costCUColocated1;
-            totalCount = countCU + countCUAbove + countCUAboveLeft + countCUAboveRight + countCULeft + countCUColocated0 + countCUColocated1;
-            if (totalCount != 0)
-                avgCost = totalCost / totalCount;
+            //giving 60% weight to all CU's and 40% weight to neighbour CU's
+            if (totalCountNeigh + totalCountCU)
+                avgCost = ((0.6 * totalCostCU) + (0.4 * totalCostNeigh)) / ((0.6 * totalCountCU) + (0.4 * totalCountNeigh));
 
             float lambda = 1.0f;
 
@@ -651,20 +639,13 @@
                 }
                 xCompressInterCU(subBestPartCU, subTempPartCU, outTempCU, nextDepth, nextDepth_partIndex);
 #if EARLY_EXIT
-                for (int k = 0; k < 4; k++)
-                {
-                    outTempCU->m_avgCost[k] = subTempPartCU->m_avgCost[k];
-                    outTempCU->m_count[k] = subTempPartCU->m_count[k];
-                }
-
                 if (subBestPartCU->getPredictionMode(0) != MODE_INTRA)
                 {
-                    UInt64 tempavgCost = subBestPartCU->m_totalCost;
-                    UInt64 temp = outTempCU->m_avgCost[depth + 1] * outTempCU->m_count[depth + 1];
-                    outTempCU->m_count[depth + 1] += 1;
-                    outTempCU->getPic()->getPicSym()->getCU(outTempCU->getAddr())->m_count[depth + 1] += 1;
-                    outTempCU->m_avgCost[depth + 1] = (temp + tempavgCost) / outTempCU->m_count[depth + 1];
-                    outTempCU->getPic()->getPicSym()->getCU(outTempCU->getAddr())->m_avgCost[depth + 1] = outTempCU->m_avgCost[depth + 1];
+                    uint64_t tempavgCost = subBestPartCU->m_totalCost;
+                    TComDataCU* rootCU = outTempCU->getPic()->getPicSym()->getCU(outTempCU->getAddr());
+                    uint64_t temp = rootCU->m_avgCost[depth + 1] * rootCU->m_count[depth + 1];
+                    rootCU->m_count[depth + 1] += 1;
+                    rootCU->m_avgCost[depth + 1] = (temp + tempavgCost) / rootCU->m_count[depth + 1];
                 }
 #endif // if EARLY_EXIT
                 /* Adding costs from best SUbCUs */
@@ -762,16 +743,16 @@
          * Copy recon data from Temp structure to Best structure */
         if (outBestCU)
         {
+#if EARLY_EXIT
             if (depth == 0)
             {
-                UInt64 tempavgCost = outBestCU->m_totalCost;
-                UInt64 temp = outTempCU->m_avgCost[depth] * outTempCU->m_count[depth];
-                outTempCU->m_count[depth] += 1;
-                outTempCU->getPic()->getPicSym()->getCU(outTempCU->getAddr())->m_count[depth] += 1;
-
-                outTempCU->m_avgCost[depth] = (temp + tempavgCost) / outTempCU->m_count[depth];
-                outTempCU->getPic()->getPicSym()->getCU(outTempCU->getAddr())->m_avgCost[depth] = outTempCU->m_avgCost[depth];
+                uint64_t tempavgCost = outBestCU->m_totalCost;
+                TComDataCU* rootCU = outTempCU->getPic()->getPicSym()->getCU(outTempCU->getAddr());
+                uint64_t temp = rootCU->m_avgCost[depth] * rootCU->m_count[depth];
+                rootCU->m_count[depth] += 1;
+                rootCU->m_avgCost[depth] = (temp + tempavgCost) / rootCU->m_count[depth];
             }
+#endif
             if (outTempCU->m_totalCost < outBestCU->m_totalCost)
             {
                 outBestCU = outTempCU;


More information about the x265-devel mailing list