[x265] [PATCH] no_rdo: implementation of new early exit
sumalatha at multicorewareinc.com
sumalatha at multicorewareinc.com
Tue Oct 29 11:03:21 CET 2013
# HG changeset patch
# User Sumalatha Polureddy
# Date 1383040993 -19800
# Node ID 717127253b0a3cebe263bd2e7ddb728368e5d140
# Parent bf9686cc9e0f3ffd33f722ab941ba423513a6111
no_rdo: implementation of new early exit
early exit is done when the RD cost of best mode at depth "n" is less than the
average of RD cost of the CU's at depth "n" of previosuly coded CU's(CUAbove,
CUAboveRight, CUAoveLeft, CULeft, CUColocated).
For HD videos
performance improvement of 20 to 27%
bitrate increases by 0.75 to 0.02%
diff -r bf9686cc9e0f -r 717127253b0a source/Lib/TLibCommon/TComDataCU.cpp
--- a/source/Lib/TLibCommon/TComDataCU.cpp Tue Oct 29 15:29:02 2013 +0530
+++ b/source/Lib/TLibCommon/TComDataCU.cpp Tue Oct 29 15:33:13 2013 +0530
@@ -247,6 +247,12 @@
m_totalBits = 0;
m_numPartitions = pic->getNumPartInCU();
+ for (int i = 0; i < 4; i++)
+ {
+ m_avgCost[i] = 0;
+ m_count[i] = 0;
+ }
+
// CHECK_ME: why partStartIdx always negative
int partStartIdx = 0 - (cuAddr) * pic->getNumPartInCU();
@@ -470,6 +476,12 @@
m_totalBits = 0;
m_numPartitions = cu->getTotalNumPart() >> 2;
+ for (int i = 0; i < 4; i++)
+ {
+ m_avgCost[i] = cu->m_avgCost[i];
+ m_count[i] = cu->m_count[i];
+ }
+
int iSizeInUchar = sizeof(UChar) * m_numPartitions;
int iSizeInBool = sizeof(bool) * m_numPartitions;
diff -r bf9686cc9e0f -r 717127253b0a source/Lib/TLibCommon/TComDataCU.h
--- a/source/Lib/TLibCommon/TComDataCU.h Tue Oct 29 15:29:02 2013 +0530
+++ b/source/Lib/TLibCommon/TComDataCU.h Tue Oct 29 15:33:13 2013 +0530
@@ -178,6 +178,8 @@
UInt64 m_totalCost; ///< sum of partition RD costs
uint32_t m_totalDistortion; ///< sum of partition distortion
uint32_t m_totalBits; ///< sum of partition signal bits
+ UInt64 m_avgCost[4]; // stores the avg cost of CU's in frame for each depth
+ uint32_t m_count[4];
// -------------------------------------------------------------------------------------------------------------------
// create / destroy / initialize / copy
diff -r bf9686cc9e0f -r 717127253b0a source/encoder/compress.cpp
--- a/source/encoder/compress.cpp Tue Oct 29 15:29:02 2013 +0530
+++ b/source/encoder/compress.cpp Tue Oct 29 15:33:13 2013 +0530
@@ -26,6 +26,7 @@
/* Lambda Partition Select adjusts the threshold value for Early Exit in No-RDO flow */
#define LAMBDA_PARTITION_SELECT 0.9
+#define EARLY_EXIT 1
using namespace x265;
@@ -531,7 +532,78 @@
// further split
if (bSubBranch && bTrySplitDQP && depth < g_maxCUDepth - g_addCUDepth)
{
+#if EARLY_EXIT // turn ON this to enable early exit
+ // early exit when the RD cost of best mode at depth n is less than the avgerage of RD cost of the
+ // CU's(above, aboveleft, aboveright, left, colocated) at depth "n" of previosuly coded CU's
+ if (outBestCU != 0)
+ {
+ UInt64 costCU = 0, costCUAbove = 0, costCUAboveLeft = 0, costCUAboveRight = 0, costCULeft = 0, costCUColocated0 = 0, costCUColocated1 = 0, totalCost = 0, avgCost= 0;
+ UInt64 countCU = 0, countCUAbove = 0, countCUAboveLeft = 0, countCUAboveRight = 0, countCULeft = 0, countCUColocated0 = 0, countCUColocated1 = 0;
+ UInt64 totalCount = 0;
+ TComDataCU* above = outTempCU->getCUAbove();
+ TComDataCU* aboveLeft = outTempCU->getCUAboveLeft();
+ TComDataCU* aboveRight = outTempCU->getCUAboveRight();
+ TComDataCU* left = outTempCU->getCULeft();
+ TComDataCU* colocated0 = outTempCU->getCUColocated(REF_PIC_LIST_0);
+ TComDataCU* colocated1 = outTempCU->getCUColocated(REF_PIC_LIST_1);
+
+ costCU = outTempCU->m_avgCost[depth] * outTempCU->m_count[depth];
+ countCU = outTempCU->m_count[depth];
+ if (above)
+ {
+ costCUAbove = above->m_avgCost[depth] * above->m_count[depth];
+ countCUAbove = above->m_count[depth];
+ }
+ if (aboveLeft)
+ {
+ costCUAboveLeft = aboveLeft->m_avgCost[depth] * aboveLeft->m_count[depth];
+ countCUAboveLeft = aboveLeft->m_count[depth];
+ }
+ if (aboveRight)
+ {
+ costCUAboveRight = aboveRight->m_avgCost[depth] * aboveRight->m_count[depth];
+ countCUAboveRight = aboveRight->m_count[depth];
+ }
+ if (left)
+ {
+ costCULeft = left->m_avgCost[depth] * left->m_count[depth];
+ countCULeft = left->m_count[depth];
+ }
+ if (colocated0)
+ {
+ costCUColocated0 = colocated0->m_avgCost[depth] * colocated0->m_count[depth];
+ countCUColocated0 = colocated0->m_count[depth];
+ }
+ if (colocated1)
+ {
+ costCUColocated1 = colocated1->m_avgCost[depth] * colocated1->m_count[depth];
+ countCUColocated1 = colocated1->m_count[depth];
+ }
+
+ totalCost = costCU + costCUAbove + costCUAboveLeft + costCUAboveRight + costCULeft + costCUColocated0 + costCUColocated1;
+ totalCount = countCU + countCUAbove + countCUAboveLeft + countCUAboveRight + countCULeft + countCUColocated0 + countCUColocated1;
+ if (totalCount != 0)
+ avgCost = totalCost / totalCount;
+
+ float lambda = 1.0f;
+
+ if (outBestCU->m_totalCost < lambda * avgCost && avgCost != 0 && depth != 0)
+ {
+ m_entropyCoder->resetBits();
+ m_entropyCoder->encodeSplitFlag(outBestCU, 0, depth, true);
+ outBestCU->m_totalBits += m_entropyCoder->getNumberOfWrittenBits(); // split bits
+ outBestCU->m_totalCost = m_rdCost->calcRdCost(outBestCU->m_totalDistortion, outBestCU->m_totalBits);
+ /* Copy Best data to Picture for next partition prediction. */
+ outBestCU->copyToPic((UChar)depth);
+
+ /* Copy Yuv data to picture Yuv */
+ xCopyYuv2Pic(outBestCU->getPic(), outBestCU->getAddr(), outBestCU->getZorderIdxInCU(), depth, depth, outBestCU, lpelx, tpely);
+ return;
+ }
+ }
+#endif
#if 0 // turn ON this to enable early exit
+ //early exit when RD cost of best mode is less than the cumulative RD cost of 4 subpartition
UInt64 nxnCost = 0;
if (outBestCU != 0 && depth > 0)
{
@@ -612,7 +684,22 @@
m_rdSbacCoders[nextDepth][CI_CURR_BEST]->load(m_rdSbacCoders[nextDepth][CI_NEXT_BEST]);
}
xCompressInterCU(subBestPartCU, subTempPartCU, outTempCU, nextDepth, nextDepth_partIndex);
-
+#if EARLY_EXIT
+ for (int k = 0; k < 4; k++)
+ {
+ outTempCU->m_avgCost[k] = subTempPartCU->m_avgCost[k];
+ outTempCU->m_count[k] = subTempPartCU->m_count[k];
+ }
+ if (subBestPartCU->getPredictionMode(0) != MODE_INTRA)
+ {
+ UInt64 tempavgCost = subBestPartCU->m_totalCost;
+ UInt64 temp = outTempCU->m_avgCost[depth + 1] * outTempCU->m_count[depth + 1];
+ outTempCU->m_count[depth + 1] += 1;
+ outTempCU->getPic()->getPicSym()->getCU(outTempCU->getAddr())->m_count[depth + 1] += 1;
+ outTempCU->m_avgCost[depth + 1] = (temp + tempavgCost) / outTempCU->m_count[depth + 1];
+ outTempCU->getPic()->getPicSym()->getCU(outTempCU->getAddr())->m_avgCost[depth + 1] = outTempCU->m_avgCost[depth + 1];
+ }
+#endif
/* Adding costs from best SUbCUs */
outTempCU->copyPartFrom(subBestPartCU, nextDepth_partIndex, nextDepth, true); // Keep best part data to current temporary data.
xCopyYuv2Tmp(subBestPartCU->getTotalNumPart() * nextDepth_partIndex, nextDepth);
@@ -708,6 +795,16 @@
* Copy recon data from Temp structure to Best structure */
if (outBestCU)
{
+ if (depth == 0)
+ {
+ UInt64 tempavgCost = outBestCU->m_totalCost;
+ UInt64 temp = outTempCU->m_avgCost[depth] * outTempCU->m_count[depth];
+ outTempCU->m_count[depth] += 1;
+ outTempCU->getPic()->getPicSym()->getCU(outTempCU->getAddr())->m_count[depth] += 1;
+
+ outTempCU->m_avgCost[depth] = (temp + tempavgCost) / outTempCU->m_count[depth];
+ outTempCU->getPic()->getPicSym()->getCU(outTempCU->getAddr())->m_avgCost[depth] = outTempCU->m_avgCost[depth];
+ }
if (outTempCU->m_totalCost < outBestCU->m_totalCost)
{
outBestCU = outTempCU;
More information about the x265-devel
mailing list