[x265] [PATCH] no-rdo early exit: giving weightage to the cost of that CU and neighbour CU's for early exit
sumalatha at multicorewareinc.com
sumalatha at multicorewareinc.com
Thu Nov 14 12:07:06 CET 2013
# HG changeset patch
# User Sumalatha Polureddy
# Date 1384427113 -19800
# Node ID eb8391256d6a68919df73d450d2b9b3cf658cf9f
# Parent c4ca80d19105ccf1ba2ec14dd65915f2820a660d
no-rdo early exit: giving weightage to the cost of that CU and neighbour CU's for early exit
Early exit is done when CU cost at depth "n" is lessthan sum of 60% of avgcost of
that CU at same depth and 40% of avgcost of neighbour CU's at same depth.
the performance, bitrate increase and psnr comparison are given below
CLI: x265.exe input.y4m -o abc.hevc -r recon.y4m --rd 1 --ref 1
BasketballDrive_1920x1080_50
Timetaken to encode: 704/585/564s
bitrate: 3650/3696/3696
PSNR: 36.7/36.67/36.67
perf improvement: 16.9% (compared to early exit OFF and already existing early exit)
perf improvement: 19.8% (compared to early exit OFF and new early exit)
Cactus_1920x1080_50
Timetaken to encode: 526/443/436s
bitrate: 2787/2831/2833
PSNR: 35.527/35.48/35.48
perf improvement: 15.7% (compared to early exit OFF and already existing early exit)
perf improvement: 17.1% (compared to early exit OFF and new early exit)
Kimono1_1920x1080_24
Timetaken to encode: 279/235/238s
bitrate: 1243/1252/1252
PSNR: 38.16/38.158/38.159
perf improvement: 15.7% (compared to early exit OFF and already existing early exit)
perf improvement: 14.6% (compared to early exit OFF and new early exit)
FourPeople_1280x720_60
Timetaken to encode: 169/157/157s 16.9%/19.8%
bitrate: 486/489/489
PSNR: 39.09/39.052/39.042
perf improvement: 7.1% (compared to early exit OFF and already existing early exit)
perf improvement: 7.1% (compared to early exit OFF and new early exit)
big_buck_bunny_360p24
Timetaken to encode: 1739/1511/1505s 16.9%/19.8%
bitrate: 174.9/175.38/175.5
PSNR: 37.798/37.746/37.752
perf improvement: 13.1% (compared to early exit OFF and already existing early exit)
perf improvement: 13.4% (compared to early exit OFF and new early exit)
PartyScene_832x480_50
Timetaken to encode: 123/120/120s 16.9%/19.8%
bitrate: 208/208/208
PSNR: 40.344/40.33/40.332
perf improvement: 2.4% (compared to early exit OFF and already existing early exit)
perf improvement: 2.4% (compared to early exit OFF and new early exit)
diff -r c4ca80d19105 -r eb8391256d6a source/encoder/compress.cpp
--- a/source/encoder/compress.cpp Tue Nov 12 19:10:23 2013 +0530
+++ b/source/encoder/compress.cpp Thu Nov 14 16:35:13 2013 +0530
@@ -560,57 +560,45 @@
if (bSubBranch && bTrySplitDQP && depth < g_maxCUDepth - g_addCUDepth)
{
#if EARLY_EXIT // turn ON this to enable early exit
- // early exit when the RD cost of best mode at depth n is less than the avgerage of RD cost of the
- // CU's(above, aboveleft, aboveright, left, colocated) at depth "n" of previosuly coded CU's
+ // early exit when the RD cost of best mode at depth n is less than the sum of avgerage of RD cost of the neighbour
+ // CU's(above, aboveleft, aboveright, left, colocated) and avg cost of that CU at depth "n" with weightage for each quantity
if (outBestCU != 0)
{
- UInt64 costCU = 0, costCUAbove = 0, costCUAboveLeft = 0, costCUAboveRight = 0, costCULeft = 0, costCUColocated0 = 0, costCUColocated1 = 0, totalCost = 0, avgCost = 0;
- UInt64 countCU = 0, countCUAbove = 0, countCUAboveLeft = 0, countCUAboveRight = 0, countCULeft = 0, countCUColocated0 = 0, countCUColocated1 = 0;
- UInt64 totalCount = 0;
+ uint64_t totalCostNeigh = 0, totalCostCU = 0, totalCountCU = 0;
+ double avgCost = 0;
+ uint64_t totalCountNeigh = 0;
TComDataCU* above = outTempCU->getCUAbove();
TComDataCU* aboveLeft = outTempCU->getCUAboveLeft();
TComDataCU* aboveRight = outTempCU->getCUAboveRight();
TComDataCU* left = outTempCU->getCULeft();
- TComDataCU* colocated0 = outTempCU->getCUColocated(REF_PIC_LIST_0);
- TComDataCU* colocated1 = outTempCU->getCUColocated(REF_PIC_LIST_1);
+ TComDataCU* rootCU = outTempCU->getPic()->getPicSym()->getCU(outTempCU->getAddr());
- costCU = outTempCU->m_avgCost[depth] * outTempCU->m_count[depth];
- countCU = outTempCU->m_count[depth];
+ totalCostCU += rootCU->m_avgCost[depth] * rootCU->m_count[depth];
+ totalCountCU += rootCU->m_count[depth];
if (above)
{
- costCUAbove = above->m_avgCost[depth] * above->m_count[depth];
- countCUAbove = above->m_count[depth];
+ totalCostNeigh += above->m_avgCost[depth] * above->m_count[depth];
+ totalCountNeigh += above->m_count[depth];
}
if (aboveLeft)
{
- costCUAboveLeft = aboveLeft->m_avgCost[depth] * aboveLeft->m_count[depth];
- countCUAboveLeft = aboveLeft->m_count[depth];
+ totalCostNeigh += aboveLeft->m_avgCost[depth] * aboveLeft->m_count[depth];
+ totalCountNeigh += aboveLeft->m_count[depth];
}
if (aboveRight)
{
- costCUAboveRight = aboveRight->m_avgCost[depth] * aboveRight->m_count[depth];
- countCUAboveRight = aboveRight->m_count[depth];
+ totalCostNeigh += aboveRight->m_avgCost[depth] * aboveRight->m_count[depth];
+ totalCountNeigh += aboveRight->m_count[depth];
}
if (left)
{
- costCULeft = left->m_avgCost[depth] * left->m_count[depth];
- countCULeft = left->m_count[depth];
- }
- if (colocated0)
- {
- costCUColocated0 = colocated0->m_avgCost[depth] * colocated0->m_count[depth];
- countCUColocated0 = colocated0->m_count[depth];
- }
- if (colocated1)
- {
- costCUColocated1 = colocated1->m_avgCost[depth] * colocated1->m_count[depth];
- countCUColocated1 = colocated1->m_count[depth];
+ totalCostNeigh += left->m_avgCost[depth] * left->m_count[depth];
+ totalCountNeigh += left->m_count[depth];
}
- totalCost = costCU + costCUAbove + costCUAboveLeft + costCUAboveRight + costCULeft + costCUColocated0 + costCUColocated1;
- totalCount = countCU + countCUAbove + countCUAboveLeft + countCUAboveRight + countCULeft + countCUColocated0 + countCUColocated1;
- if (totalCount != 0)
- avgCost = totalCost / totalCount;
+ //giving 60% weight to all CU's and 40% weight to neighbour CU's
+ if (totalCountNeigh + totalCountCU)
+ avgCost = ((0.6 * totalCostCU) + (0.4 * totalCostNeigh)) / ((0.6 * totalCountCU) + (0.4 * totalCountNeigh));
float lambda = 1.0f;
@@ -651,20 +639,13 @@
}
xCompressInterCU(subBestPartCU, subTempPartCU, outTempCU, nextDepth, nextDepth_partIndex);
#if EARLY_EXIT
- for (int k = 0; k < 4; k++)
- {
- outTempCU->m_avgCost[k] = subTempPartCU->m_avgCost[k];
- outTempCU->m_count[k] = subTempPartCU->m_count[k];
- }
-
if (subBestPartCU->getPredictionMode(0) != MODE_INTRA)
{
- UInt64 tempavgCost = subBestPartCU->m_totalCost;
- UInt64 temp = outTempCU->m_avgCost[depth + 1] * outTempCU->m_count[depth + 1];
- outTempCU->m_count[depth + 1] += 1;
- outTempCU->getPic()->getPicSym()->getCU(outTempCU->getAddr())->m_count[depth + 1] += 1;
- outTempCU->m_avgCost[depth + 1] = (temp + tempavgCost) / outTempCU->m_count[depth + 1];
- outTempCU->getPic()->getPicSym()->getCU(outTempCU->getAddr())->m_avgCost[depth + 1] = outTempCU->m_avgCost[depth + 1];
+ uint64_t tempavgCost = subBestPartCU->m_totalCost;
+ TComDataCU* rootCU = outTempCU->getPic()->getPicSym()->getCU(outTempCU->getAddr());
+ uint64_t temp = rootCU->m_avgCost[depth + 1] * rootCU->m_count[depth + 1];
+ rootCU->m_count[depth + 1] += 1;
+ rootCU->m_avgCost[depth + 1] = (temp + tempavgCost) / rootCU->m_count[depth + 1];
}
#endif // if EARLY_EXIT
/* Adding costs from best SUbCUs */
@@ -762,16 +743,16 @@
* Copy recon data from Temp structure to Best structure */
if (outBestCU)
{
+#if EARLY_EXIT
if (depth == 0)
{
- UInt64 tempavgCost = outBestCU->m_totalCost;
- UInt64 temp = outTempCU->m_avgCost[depth] * outTempCU->m_count[depth];
- outTempCU->m_count[depth] += 1;
- outTempCU->getPic()->getPicSym()->getCU(outTempCU->getAddr())->m_count[depth] += 1;
-
- outTempCU->m_avgCost[depth] = (temp + tempavgCost) / outTempCU->m_count[depth];
- outTempCU->getPic()->getPicSym()->getCU(outTempCU->getAddr())->m_avgCost[depth] = outTempCU->m_avgCost[depth];
+ uint64_t tempavgCost = outBestCU->m_totalCost;
+ TComDataCU* rootCU = outTempCU->getPic()->getPicSym()->getCU(outTempCU->getAddr());
+ uint64_t temp = rootCU->m_avgCost[depth] * rootCU->m_count[depth];
+ rootCU->m_count[depth] += 1;
+ rootCU->m_avgCost[depth] = (temp + tempavgCost) / rootCU->m_count[depth];
}
+#endif
if (outTempCU->m_totalCost < outBestCU->m_totalCost)
{
outBestCU = outTempCU;
More information about the x265-devel
mailing list