[x265] [PATCH 1 of 3] optimize: rewrite TComTrQuant::xGetCodedLevel
Min Chen
chenm003 at 163.com
Wed Mar 19 01:34:02 CET 2014
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1395189192 25200
# Node ID e9f08d038ff0dc17152002d2b2b70138d08465d7
# Parent dc700298419d382e58c49d4ea62a3d7398b4beaf
optimize: rewrite TComTrQuant::xGetCodedLevel
diff -r dc700298419d -r e9f08d038ff0 source/Lib/TLibCommon/TComTrQuant.cpp
--- a/source/Lib/TLibCommon/TComTrQuant.cpp Tue Mar 18 08:46:15 2014 -0500
+++ b/source/Lib/TLibCommon/TComTrQuant.cpp Tue Mar 18 17:33:12 2014 -0700
@@ -630,19 +630,34 @@
uint32_t level;
uint32_t oneCtx = 4 * ctxSet + c1;
uint32_t absCtx = ctxSet + c2;
+ double curCostSig = 0;
+ costCoeff[scanPos] = MAX_DOUBLE;
if (scanPos == lastScanPos)
{
- level = xGetCodedLevel(costCoeff[scanPos], costCoeff0[scanPos], costSig[scanPos],
- levelDouble, maxAbsLevel, baseLevel, 0, oneCtx, absCtx, goRiceParam,
+ level = xGetCodedLevel(costCoeff[scanPos], curCostSig, costSig[scanPos],
+ levelDouble, maxAbsLevel, baseLevel, oneCtx, absCtx, goRiceParam,
c1c2Idx, qbits, scaleFactor, 1);
}
else
{
- uint16_t ctxSig = getSigCtxInc(patternSigCtx, log2TrSize, trSize, blkPos, codingParameters);
- level = xGetCodedLevel(costCoeff[scanPos], costCoeff0[scanPos], costSig[scanPos],
- levelDouble, maxAbsLevel, baseLevel, ctxSig, oneCtx, absCtx, goRiceParam,
- c1c2Idx, qbits, scaleFactor, 0);
+ const uint32_t ctxSig = getSigCtxInc(patternSigCtx, log2TrSize, trSize, blkPos, codingParameters);
+ if (maxAbsLevel < 3)
+ {
+ costSig[scanPos] = xGetRateSigCoef(0, ctxSig);
+ costCoeff[scanPos] = costCoeff0[scanPos] + costSig[scanPos];
+ }
+ if (maxAbsLevel != 0)
+ {
+ curCostSig = xGetRateSigCoef(1, ctxSig);
+ level = xGetCodedLevel(costCoeff[scanPos], curCostSig, costSig[scanPos],
+ levelDouble, maxAbsLevel, baseLevel, oneCtx, absCtx, goRiceParam,
+ c1c2Idx, qbits, scaleFactor, 0);
+ }
+ else
+ {
+ level = 0;
+ }
sigRateDelta[blkPos] = m_estBitsSbac->significantBits[ctxSig][1] - m_estBitsSbac->significantBits[ctxSig][0];
}
deltaU[blkPos] = (levelDouble - ((int)level << qbits)) >> (qbits - 8);
@@ -1125,12 +1140,11 @@
* This method calculates the best quantized transform level for a given scan position.
*/
inline uint32_t TComTrQuant::xGetCodedLevel(double& codedCost,
- double& codedCost0,
+ const double curCostSig,
double& codedCostSig,
int levelDouble,
uint32_t maxAbsLevel,
uint32_t baseLevel,
- uint32_t ctxNumSig,
uint32_t ctxNumOne,
uint32_t ctxNumAbs,
uint32_t absGoRice,
@@ -1139,43 +1153,45 @@
double scaleFactor,
bool last) const
{
- double curCostSig = 0;
uint32_t bestAbsLevel = 0;
- if (!last && maxAbsLevel < 3)
+ if (!last && maxAbsLevel == 0)
{
- codedCostSig = xGetRateSigCoef(0, ctxNumSig);
- codedCost = codedCost0 + codedCostSig;
- if (maxAbsLevel == 0)
- {
- return bestAbsLevel;
- }
- }
- else
- {
- codedCost = MAX_DOUBLE;
+ assert(0);
}
- if (!last)
- {
- curCostSig = xGetRateSigCoef(1, ctxNumSig);
- }
+ int32_t minAbsLevel = maxAbsLevel - 1;
+ if (minAbsLevel < 1)
+ minAbsLevel = 1;
- uint32_t minAbsLevel = (maxAbsLevel > 1 ? maxAbsLevel - 1 : 1);
+ // NOTE: (A + B) ^ 2 = (A ^ 2) + 2 * A * B + (B ^ 2)
+ assert(abs((double)levelDouble - (maxAbsLevel << qbits)) < INT_MAX);
+ const int32_t err1 = levelDouble - (maxAbsLevel << qbits); // A
+ double err2 = (double)((int64_t)err1 * err1); // A^ 2
+ const int64_t err3 = (int64_t)2 * err1 * ((int64_t)1 << qbits); // 2 * A * B
+ const int64_t err4 = ((int64_t)1 << qbits) * ((int64_t)1 << qbits); // B ^ 2
+ const double errInc = (err3 + err4) * scaleFactor;
+
+ err2 *= scaleFactor;
+
+ double bestCodedCost = codedCost;
+ double bestCodedCostSig = codedCostSig;
for (int absLevel = maxAbsLevel; absLevel >= minAbsLevel; absLevel--)
{
- double err = double(levelDouble - (absLevel << qbits));
- double curCost = err * err * scaleFactor + xGetICRateCost(absLevel, absLevel - baseLevel, ctxNumOne, ctxNumAbs, absGoRice, c1c2Idx);
+ assert(fabs((double)err2 - double(levelDouble - (absLevel << qbits)) * double(levelDouble - (absLevel << qbits)) * scaleFactor) < 1e-5);
+ double curCost = err2 + xGetICRateCost(absLevel, absLevel - baseLevel, ctxNumOne, ctxNumAbs, absGoRice, c1c2Idx);
curCost += curCostSig;
- if (curCost < codedCost)
+ if (curCost < bestCodedCost)
{
bestAbsLevel = absLevel;
- codedCost = curCost;
- codedCostSig = curCostSig;
+ bestCodedCost = curCost;
+ bestCodedCostSig = curCostSig;
}
+ err2 += errInc;
}
-
+ codedCost = bestCodedCost;
+ codedCostSig = bestCodedCostSig;
return bestAbsLevel;
}
@@ -1186,7 +1202,7 @@
* \param absGoRice Rice parameter for coeff_abs_level_minus3
* \returns cost of given absolute transform level
*/
-inline double TComTrQuant::xGetICRateCost(uint32_t absLevel,
+FORCEINLINE double TComTrQuant::xGetICRateCost(uint32_t absLevel,
int32_t diffLevel,
uint32_t ctxNumOne,
uint32_t ctxNumAbs,
diff -r dc700298419d -r e9f08d038ff0 source/Lib/TLibCommon/TComTrQuant.h
--- a/source/Lib/TLibCommon/TComTrQuant.h Tue Mar 18 08:46:15 2014 -0500
+++ b/source/Lib/TLibCommon/TComTrQuant.h Tue Mar 18 17:33:12 2014 -0700
@@ -62,7 +62,7 @@
typedef struct
{
int significantCoeffGroupBits[NUM_SIG_CG_FLAG_CTX][2];
- int significantBits[NUM_SIG_FLAG_CTX][2];
+ uint32_t significantBits[NUM_SIG_FLAG_CTX][2];
int lastXBits[10];
int lastYBits[10];
int greaterOneBits[NUM_ONE_FLAG_CTX][2];
@@ -193,8 +193,8 @@
// RDOQ functions
uint32_t xRateDistOptQuant(TComDataCU* cu, int32_t* srcCoeff, TCoeff* dstCoeff, uint32_t trSize, TextType ttype, uint32_t absPartIdx, int32_t *lastPos);
- inline uint32_t xGetCodedLevel(double& codedCost, double& codedCost0, double& codedCostSig, int levelDouble,
- uint32_t maxAbsLevel, uint32_t baseLevel, uint32_t ctxNumSig, uint32_t ctxNumOne, uint32_t ctxNumAbs, uint32_t absGoRice,
+ inline uint32_t xGetCodedLevel(double& codedCost, const double curCostSig, double& codedCostSig, int levelDouble,
+ uint32_t maxAbsLevel, uint32_t baseLevel, uint32_t ctxNumOne, uint32_t ctxNumAbs, uint32_t absGoRice,
uint32_t c1c2Idx, int qbits, double scale, bool bLast) const;
inline double xGetICRateCost(uint32_t absLevel, int32_t diffLevel, uint32_t ctxNumOne, uint32_t ctxNumAbs, uint32_t absGoRice, uint32_t c1c2Idx) const;
@@ -205,7 +205,7 @@
inline double xGetRateSigCoeffGroup(uint16_t sigCoeffGroup, uint16_t ctxNumSig) const { return m_lambda * m_estBitsSbac->significantCoeffGroupBits[ctxNumSig][sigCoeffGroup]; }
- inline double xGetRateSigCoef(uint16_t sig, uint16_t ctxNumSig) const { return m_lambda * m_estBitsSbac->significantBits[ctxNumSig][sig]; }
+ inline double xGetRateSigCoef(uint32_t sig, uint32_t ctxNumSig) const { return m_lambda * m_estBitsSbac->significantBits[ctxNumSig][sig]; }
inline double xGetICost(double rage) const { return m_lambda * rage; } ///< Get the cost for a specific rate
More information about the x265-devel
mailing list