[x265-commits] [x265] optimize: rewrite TComTrQuant::xGetCodedLevel
Min Chen
chenm003 at 163.com
Fri Mar 21 21:03:37 CET 2014
details: http://hg.videolan.org/x265/rev/190f1b500219
branches:
changeset: 6568:190f1b500219
user: Min Chen <chenm003 at 163.com>
date: Tue Mar 18 17:33:12 2014 -0700
description:
optimize: rewrite TComTrQuant::xGetCodedLevel
Subject: [x265] replace parameters (oneCtx, absCtx) by pointer m_estBitsSbac->..
details: http://hg.videolan.org/x265/rev/da5f379974c0
branches:
changeset: 6569:da5f379974c0
user: Min Chen <chenm003 at 163.com>
date: Tue Mar 18 17:33:36 2014 -0700
description:
replace parameters (oneCtx, absCtx) by pointer m_estBitsSbac->..
Subject: [x265] use mask operator to avoid branch
details: http://hg.videolan.org/x265/rev/79b76dcaacd8
branches:
changeset: 6570:79b76dcaacd8
user: Min Chen <chenm003 at 163.com>
date: Tue Mar 18 17:33:53 2014 -0700
description:
use mask operator to avoid branch
Subject: [x265] vbv: fix race condition in processRowEncoder, store row qp directly in m_pic->m_rowDiagQp.
details: http://hg.videolan.org/x265/rev/21eb4a43e02f
branches: stable
changeset: 6571:21eb4a43e02f
user: Aarthi Thirumalai
date: Fri Mar 21 15:00:11 2014 +0530
description:
vbv: fix race condition in processRowEncoder, store row qp directly in m_pic->m_rowDiagQp.
Subject: [x265] Merge with stable
details: http://hg.videolan.org/x265/rev/07670cfdc215
branches:
changeset: 6572:07670cfdc215
user: Steve Borho <steve at borho.org>
date: Fri Mar 21 13:41:27 2014 -0500
description:
Merge with stable
Subject: [x265] TComTrQuant: nits (no change)
details: http://hg.videolan.org/x265/rev/fdd7c6168cf4
branches:
changeset: 6573:fdd7c6168cf4
user: Steve Borho <steve at borho.org>
date: Fri Mar 21 14:44:35 2014 -0500
description:
TComTrQuant: nits (no change)
diffstat:
source/Lib/TLibCommon/TComRom.cpp | 2 +-
source/Lib/TLibCommon/TComRom.h | 2 +-
source/Lib/TLibCommon/TComTrQuant.cpp | 172 +++++++++++++++++----------------
source/Lib/TLibCommon/TComTrQuant.h | 14 +-
4 files changed, 96 insertions(+), 94 deletions(-)
diffs (truncated from 418 to 300 lines):
diff -r fe3fcd9838c0 -r fdd7c6168cf4 source/Lib/TLibCommon/TComRom.cpp
--- a/source/Lib/TLibCommon/TComRom.cpp Thu Mar 20 19:06:54 2014 -0500
+++ b/source/Lib/TLibCommon/TComRom.cpp Fri Mar 21 14:44:35 2014 -0500
@@ -434,7 +434,7 @@ uint64_t g_nSymbolCounter = 0;
// ====================================================================================================================
const uint32_t g_minInGroup[10] = { 0, 1, 2, 3, 4, 6, 8, 12, 16, 24 };
-const uint32_t g_groupIdx[32] = { 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9 };
+const uint8_t g_groupIdx[32] = { 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9 };
// Rice parameters for absolute transform levels
const uint8_t g_goRiceRange[5] = { 7, 14, 26, 46, 78 };
diff -r fe3fcd9838c0 -r fdd7c6168cf4 source/Lib/TLibCommon/TComRom.h
--- a/source/Lib/TLibCommon/TComRom.h Thu Mar 20 19:06:54 2014 -0500
+++ b/source/Lib/TLibCommon/TComRom.h Fri Mar 21 14:44:35 2014 -0500
@@ -128,7 +128,7 @@ extern const int16_t g_chromaFilter[8][N
// Scanning order & context mapping table
// ====================================================================================================================
-extern const uint32_t g_groupIdx[32];
+extern const uint8_t g_groupIdx[32];
extern const uint32_t g_minInGroup[10];
extern const uint8_t g_goRiceRange[5]; //!< maximum value coded with Rice codes
diff -r fe3fcd9838c0 -r fdd7c6168cf4 source/Lib/TLibCommon/TComTrQuant.cpp
--- a/source/Lib/TLibCommon/TComTrQuant.cpp Thu Mar 20 19:06:54 2014 -0500
+++ b/source/Lib/TLibCommon/TComTrQuant.cpp Fri Mar 21 14:44:35 2014 -0500
@@ -586,6 +586,7 @@ uint32_t TComTrQuant::xRateDistOptQuant(
const uint32_t cgPosX = cgBlkPos - (cgPosY << codingParameters.log2TrSizeCG);
const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
memset(&rdStats, 0, sizeof(coeffGroupRDStats));
+ assert((trSize >> 2) == (1 << codingParameters.log2TrSizeCG));
const int patternSigCtx = TComTrQuant::calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, codingParameters.log2TrSizeCG);
for (int scanPosinCG = cgSize - 1; scanPosinCG >= 0; scanPosinCG--)
{
@@ -600,8 +601,7 @@ uint32_t TComTrQuant::xRateDistOptQuant(
uint32_t maxAbsLevel = (levelDouble + (1 << (qbits - 1))) >> qbits;
- double err = double(levelDouble);
- costCoeff0[scanPos] = err * err * scaleFactor;
+ costCoeff0[scanPos] = ((uint64_t)levelDouble * levelDouble) * scaleFactor;
blockUncodedCost += costCoeff0[scanPos];
dstCoeff[blkPos] = maxAbsLevel;
@@ -628,33 +628,50 @@ uint32_t TComTrQuant::xRateDistOptQuant(
//===== coefficient level estimation =====
uint32_t level;
- uint32_t oneCtx = 4 * ctxSet + c1;
- uint32_t absCtx = ctxSet + c2;
+ const uint32_t oneCtx = 4 * ctxSet + c1;
+ const uint32_t absCtx = ctxSet + c2;
+ const int *greaterOneBits = m_estBitsSbac->greaterOneBits[oneCtx];
+ const int *levelAbsBits = m_estBitsSbac->levelAbsBits[absCtx];
+ double curCostSig = 0;
+ costCoeff[scanPos] = MAX_DOUBLE;
if (scanPos == lastScanPos)
{
- level = xGetCodedLevel(costCoeff[scanPos], costCoeff0[scanPos], costSig[scanPos],
- levelDouble, maxAbsLevel, baseLevel, 0, oneCtx, absCtx, goRiceParam,
+ level = xGetCodedLevel(costCoeff[scanPos], curCostSig, costSig[scanPos],
+ levelDouble, maxAbsLevel, baseLevel, greaterOneBits, levelAbsBits, goRiceParam,
c1c2Idx, qbits, scaleFactor, 1);
}
else
{
- uint16_t ctxSig = getSigCtxInc(patternSigCtx, log2TrSize, trSize, blkPos, codingParameters);
- level = xGetCodedLevel(costCoeff[scanPos], costCoeff0[scanPos], costSig[scanPos],
- levelDouble, maxAbsLevel, baseLevel, ctxSig, oneCtx, absCtx, goRiceParam,
- c1c2Idx, qbits, scaleFactor, 0);
+ const uint32_t ctxSig = getSigCtxInc(patternSigCtx, log2TrSize, trSize, blkPos, codingParameters);
+ if (maxAbsLevel < 3)
+ {
+ costSig[scanPos] = xGetRateSigCoef(0, ctxSig);
+ costCoeff[scanPos] = costCoeff0[scanPos] + costSig[scanPos];
+ }
+ if (maxAbsLevel != 0)
+ {
+ curCostSig = xGetRateSigCoef(1, ctxSig);
+ level = xGetCodedLevel(costCoeff[scanPos], curCostSig, costSig[scanPos],
+ levelDouble, maxAbsLevel, baseLevel, greaterOneBits, levelAbsBits, goRiceParam,
+ c1c2Idx, qbits, scaleFactor, 0);
+ }
+ else
+ {
+ level = 0;
+ }
sigRateDelta[blkPos] = m_estBitsSbac->significantBits[ctxSig][1] - m_estBitsSbac->significantBits[ctxSig][0];
}
deltaU[blkPos] = (levelDouble - ((int)level << qbits)) >> (qbits - 8);
if (level > 0)
{
- int rateNow = xGetICRate(level, level - baseLevel, oneCtx, absCtx, goRiceParam, c1c2Idx);
- rateIncUp[blkPos] = xGetICRate(level + 1, level + 1 - baseLevel, oneCtx, absCtx, goRiceParam, c1c2Idx) - rateNow;
- rateIncDown[blkPos] = xGetICRate(level - 1, level - 1 - baseLevel, oneCtx, absCtx, goRiceParam, c1c2Idx) - rateNow;
+ int rateNow = xGetICRate(level, level - baseLevel, greaterOneBits, levelAbsBits, goRiceParam, c1c2Idx);
+ rateIncUp[blkPos] = xGetICRate(level + 1, level + 1 - baseLevel, greaterOneBits, levelAbsBits, goRiceParam, c1c2Idx) - rateNow;
+ rateIncDown[blkPos] = xGetICRate(level - 1, level - 1 - baseLevel, greaterOneBits, levelAbsBits, goRiceParam, c1c2Idx) - rateNow;
}
else // level == 0
{
- rateIncUp[blkPos] = m_estBitsSbac->greaterOneBits[oneCtx][0];
+ rateIncUp[blkPos] = greaterOneBits[0];
}
dstCoeff[blkPos] = level;
baseCost += costCoeff[scanPos];
@@ -666,16 +683,13 @@ uint32_t TComTrQuant::xRateDistOptQuant(
goRiceParam++;
}
}
- if (level >= 1)
- {
- c1Idx++;
- }
+ c1Idx -= (-(int32_t)level) >> 31;
//===== update bin model =====
if (level > 1)
{
c1 = 0;
- c2 += (c2 < 2);
+ c2 += (uint32_t)(c2 - 2) >> 31;
c2Idx++;
}
else if ((c1 < 3) && (c1 > 0) && level)
@@ -692,10 +706,8 @@ uint32_t TComTrQuant::xRateDistOptQuant(
c1Idx = 0;
c2Idx = 0;
ctxSet = (scanPos == SCAN_SET_SIZE || ttype != TEXT_LUMA) ? 0 : 2;
- if (c1 == 0)
- {
- ctxSet++;
- }
+ assert(c1 >= 0);
+ ctxSet -= ((int32_t)(c1 - 1) >> 31);
c1 = 1;
}
}
@@ -774,10 +786,10 @@ uint32_t TComTrQuant::xRateDistOptQuant(
uint32_t blkPos = codingParameters.scan[scanPos];
if (dstCoeff[blkPos])
{
- dstCoeff[blkPos] = 0;
costCoeff[scanPos] = costCoeff0[scanPos];
costSig[scanPos] = 0;
}
+ dstCoeff[blkPos] = 0;
}
} // end if ( d64CostAllZeros < baseCost )
}
@@ -1015,17 +1027,17 @@ uint32_t TComTrQuant::xRateDistOptQuant(
* \param height height of the block
* \returns pattern for current coefficient group
*/
-uint32_t TComTrQuant::calcPatternSigCtx(const uint64_t sigCoeffGroupFlag64, uint32_t cgPosX, uint32_t cgPosY, uint32_t log2TrSizeCG)
+uint32_t TComTrQuant::calcPatternSigCtx(const uint64_t sigCoeffGroupFlag64, const uint32_t cgPosX, const uint32_t cgPosY, const uint32_t log2TrSizeCG)
{
if (log2TrSizeCG == 0) return 0;
const uint32_t trSizeCG = 1 << log2TrSizeCG;
assert(trSizeCG <= 32);
const uint32_t sigPos = sigCoeffGroupFlag64 >> (1 + (cgPosY << log2TrSizeCG) + cgPosX);
- uint32_t sigRight = (cgPosX < (trSizeCG - 1)) && ((sigPos & 1) != 0);
- uint32_t sigLower = (cgPosY < (trSizeCG - 1)) && ((sigPos & (1 << (trSizeCG - 1))) != 0);
+ const uint32_t sigRight = ((int32_t)(cgPosX - (trSizeCG - 1)) >> 31) & (sigPos & 1);
+ const uint32_t sigLower = ((int32_t)(cgPosY - (trSizeCG - 1)) >> 31) & (sigPos >> (trSizeCG - 2)) & 2;
- return sigRight + (sigLower << 1);
+ return sigRight + sigLower;
}
/** Context derivation process of coeff_abs_significant_flag
@@ -1125,57 +1137,60 @@ uint32_t TComTrQuant::getSigCtxInc(const
* This method calculates the best quantized transform level for a given scan position.
*/
inline uint32_t TComTrQuant::xGetCodedLevel(double& codedCost,
- double& codedCost0,
+ const double curCostSig,
double& codedCostSig,
int levelDouble,
uint32_t maxAbsLevel,
uint32_t baseLevel,
- uint32_t ctxNumSig,
- uint32_t ctxNumOne,
- uint32_t ctxNumAbs,
+ const int *greaterOneBits,
+ const int *levelAbsBits,
uint32_t absGoRice,
uint32_t c1c2Idx,
int qbits,
double scaleFactor,
bool last) const
{
- double curCostSig = 0;
uint32_t bestAbsLevel = 0;
- if (!last && maxAbsLevel < 3)
+ if (!last && maxAbsLevel == 0)
{
- codedCostSig = xGetRateSigCoef(0, ctxNumSig);
- codedCost = codedCost0 + codedCostSig;
- if (maxAbsLevel == 0)
- {
- return bestAbsLevel;
- }
- }
- else
- {
- codedCost = MAX_DOUBLE;
+ assert(0);
}
- if (!last)
- {
- curCostSig = xGetRateSigCoef(1, ctxNumSig);
- }
+ int32_t minAbsLevel = maxAbsLevel - 1;
+ if (minAbsLevel < 1)
+ minAbsLevel = 1;
- uint32_t minAbsLevel = (maxAbsLevel > 1 ? maxAbsLevel - 1 : 1);
+ // NOTE: (A + B) ^ 2 = (A ^ 2) + 2 * A * B + (B ^ 2)
+ assert(abs((double)levelDouble - (maxAbsLevel << qbits)) < INT_MAX);
+ const int32_t err1 = levelDouble - (maxAbsLevel << qbits); // A
+ double err2 = (double)((int64_t)err1 * err1); // A^ 2
+ const int64_t err3 = (int64_t)2 * err1 * ((int64_t)1 << qbits); // 2 * A * B
+ const int64_t err4 = ((int64_t)1 << qbits) * ((int64_t)1 << qbits); // B ^ 2
+ const double errInc = (err3 + err4) * scaleFactor;
+
+ err2 *= scaleFactor;
+
+ double bestCodedCost = codedCost;
+ double bestCodedCostSig = codedCostSig;
+ int diffLevel = maxAbsLevel - baseLevel;
for (int absLevel = maxAbsLevel; absLevel >= minAbsLevel; absLevel--)
{
- double err = double(levelDouble - (absLevel << qbits));
- double curCost = err * err * scaleFactor + xGetICRateCost(absLevel, absLevel - baseLevel, ctxNumOne, ctxNumAbs, absGoRice, c1c2Idx);
+ assert(fabs((double)err2 - double(levelDouble - (absLevel << qbits)) * double(levelDouble - (absLevel << qbits)) * scaleFactor) < 1e-5);
+ double curCost = err2 + xGetICRateCost(absLevel, diffLevel, greaterOneBits, levelAbsBits, absGoRice, c1c2Idx);
curCost += curCostSig;
- if (curCost < codedCost)
+ if (curCost < bestCodedCost)
{
bestAbsLevel = absLevel;
- codedCost = curCost;
- codedCostSig = curCostSig;
+ bestCodedCost = curCost;
+ bestCodedCostSig = curCostSig;
}
+ err2 += errInc;
+ diffLevel--;
}
-
+ codedCost = bestCodedCost;
+ codedCostSig = bestCodedCostSig;
return bestAbsLevel;
}
@@ -1188,15 +1203,13 @@ inline uint32_t TComTrQuant::xGetCodedLe
*/
inline double TComTrQuant::xGetICRateCost(uint32_t absLevel,
int32_t diffLevel,
- uint32_t ctxNumOne,
- uint32_t ctxNumAbs,
+ const int *greaterOneBits,
+ const int *levelAbsBits,
uint32_t absGoRice,
uint32_t c1c2Idx) const
{
assert(absLevel > 0);
uint32_t rate = xGetIEPRate();
- const int *greaterOneBits = m_estBitsSbac->greaterOneBits[ctxNumOne];
- const int *levelAbsBits = m_estBitsSbac->levelAbsBits[ctxNumAbs];
if (diffLevel < 0)
{
@@ -1245,9 +1258,9 @@ inline double TComTrQuant::xGetICRateCos
}
inline int TComTrQuant::xGetICRate(uint32_t absLevel,
- int32_t diffLevel,
- uint32_t ctxNumOne,
- uint32_t ctxNumAbs,
+ int32_t diffLevel,
+ const int *greaterOneBits,
+ const int *levelAbsBits,
uint32_t absGoRice,
uint32_t c1c2Idx) const
{
@@ -1259,8 +1272,6 @@ inline int TComTrQuant::xGetICRate(uint3
return 0;
}
int rate = 0;
- const int *greaterOneBits = m_estBitsSbac->greaterOneBits[ctxNumOne];
- const int *levelAbsBits = m_estBitsSbac->levelAbsBits[ctxNumAbs];
More information about the x265-commits
mailing list