[x265] [PATCH] TComTrQuant: switch from Double to UInt64 cost scaling *BUGGY*
Steve Borho
steve at borho.org
Wed Jun 19 07:56:21 CEST 2013
# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1371618448 18000
# Wed Jun 19 00:07:28 2013 -0500
# Node ID 661b0c1b961147e25b8fdd71059188c636390510
# Parent f9e9b981602e811b47701ba14216baba1c919735
TComTrQuant: switch from Double to UInt64 cost scaling *BUGGY*
This loses too much PSNR and I frame compression. Must be a bug lurking
somewhere.
diff -r f9e9b981602e -r 661b0c1b9611 source/Lib/TLibCommon/TComTrQuant.cpp
--- a/source/Lib/TLibCommon/TComTrQuant.cpp Tue Jun 18 21:40:42 2013 -0500
+++ b/source/Lib/TLibCommon/TComTrQuant.cpp Wed Jun 19 00:07:28 2013 -0500
@@ -46,10 +46,10 @@
typedef struct
{
Int iNNZbeforePos0;
- Double d64CodedLevelandDist; // distortion and level cost only
- Double d64UncodedDist; // all zero coded block distortion
- Double d64SigCost;
- Double d64SigCost_0;
+ UInt64 ui64CodedLevelandDist; // distortion and level cost only
+ UInt64 ui64UncodedDist; // all zero coded block distortion
+ UInt64 ui64SigCost;
+ UInt64 ui64SigCost_0;
} coeffGroupRDStats;
//! \ingroup TLibCommon
@@ -1196,7 +1196,7 @@
UInt uiBitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
UInt uiGoRiceParam = 0;
- Double d64BlockUncodedCost = 0;
+ UInt64 ui64BlockUncodedCost = 0;
const UInt uiLog2BlkSize = g_aucConvertToBit[uiWidth] + 2;
const UInt uiMaxNumCoeff = uiWidth * uiHeight;
Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
@@ -1204,17 +1204,16 @@
assert(scalingListType < 6);
Int iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
- Double *pdErrScaleOrg = getErrScaleCoeff(scalingListType, uiLog2TrSize - 2, m_cQP.m_iRem);
+ UInt64 *puiErrScale = getErrScaleCoeff(scalingListType, uiLog2TrSize - 2, m_cQP.m_iRem);
Int *piQCoefOrg = getQuantCoeff(scalingListType, m_cQP.m_iRem, uiLog2TrSize - 2);
Int *piQCoef = piQCoefOrg;
- Double *pdErrScale = pdErrScaleOrg;
Int iQBitsC = iQBits - ARL_C_PRECISION;
Int iAddC = 1 << (iQBitsC - 1);
UInt uiScanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, uiWidth, eTType == TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
- Double pdCostCoeff[32 * 32];
- Double pdCostSig[32 * 32];
- Double pdCostCoeff0[32 * 32];
+ UInt64 uiCostCoeff[32 * 32];
+ UInt64 uiCostSig[32 * 32];
+ UInt64 uiCostCoeff0[32 * 32];
Int rateIncUp[32 * 32];
Int rateIncDown[32 * 32];
@@ -1222,19 +1221,17 @@
Int deltaU[32 * 32];
const UInt * scanCG;
+ scanCG = g_auiSigLastScan[uiScanIdx][uiLog2BlkSize > 3 ? uiLog2BlkSize - 2 - 1 : 0];
+ if (uiLog2BlkSize == 3)
{
- scanCG = g_auiSigLastScan[uiScanIdx][uiLog2BlkSize > 3 ? uiLog2BlkSize - 2 - 1 : 0];
- if (uiLog2BlkSize == 3)
- {
- scanCG = g_sigLastScan8x8[uiScanIdx];
- }
- else if (uiLog2BlkSize == 5)
- {
- scanCG = g_sigLastScanCG32x32;
- }
+ scanCG = g_sigLastScan8x8[uiScanIdx];
+ }
+ else if (uiLog2BlkSize == 5)
+ {
+ scanCG = g_sigLastScanCG32x32;
}
const UInt uiCGSize = (1 << MLS_CG_SIZE); // 16
- Double pdCostCoeffGroupSig[MLS_GRP_NUM];
+ UInt64 uiCostCoeffGroupSig[MLS_GRP_NUM];
UInt uiSigCoeffGroupFlag[MLS_GRP_NUM];
UInt uiNumBlkSide = uiWidth / MLS_CG_SIZE;
Int iCGLastScanPos = -1;
@@ -1242,7 +1239,7 @@
UInt uiCtxSet = 0;
Int c1 = 1;
Int c2 = 0;
- Double d64BaseCost = 0;
+ UInt64 ui64BaseCost = 0;
Int iLastScanPos = -1;
UInt c1Idx = 0;
@@ -1251,7 +1248,7 @@
const UInt *scan = g_auiSigLastScan[uiScanIdx][uiLog2BlkSize - 1];
- ::memset(uiSigCoeffGroupFlag, 0, sizeof(UInt) * MLS_GRP_NUM);
+ ::memset(uiSigCoeffGroupFlag, 0, sizeof(UInt) * MLS_GRP_NUM);
UInt uiCGNum = uiWidth * uiHeight >> MLS_CG_SIZE;
Int iScanPos;
@@ -1269,29 +1266,28 @@
{
iScanPos = iCGScanPos * uiCGSize + iScanPosinCG;
//===== quantization =====
- UInt uiBlkPos = scan[iScanPos];
+ UInt uiBlkPos = scan[iScanPos];
// set coeff
Int uiQ = piQCoef[uiBlkPos];
- Double dTemp = pdErrScale[uiBlkPos];
- Int lLevelDouble = plSrcCoeff[uiBlkPos];
- lLevelDouble = (Int)min<Int64>((Int64)abs((Int)lLevelDouble) * uiQ, MAX_INT - (1 << (iQBits - 1)));
+ UInt64 uiErrScale = puiErrScale[uiBlkPos];
+ Int lLevelDouble = plSrcCoeff[uiBlkPos];
+ lLevelDouble = (Int)min<Int64>((Int64)abs((Int)lLevelDouble) * uiQ, MAX_INT - (1 << (iQBits - 1)));
if (m_bUseAdaptQpSelect)
{
- piArlDstCoeff[uiBlkPos] = (Int)((lLevelDouble + iAddC) >> iQBitsC);
+ piArlDstCoeff[uiBlkPos] = (Int)((lLevelDouble + iAddC) >> iQBitsC);
}
- UInt uiMaxAbsLevel = (lLevelDouble + (1 << (iQBits - 1))) >> iQBits;
+ UInt uiMaxAbsLevel = (lLevelDouble + (1 << (iQBits - 1))) >> iQBits;
- Double dErr = Double(lLevelDouble);
- pdCostCoeff0[iScanPos] = dErr * dErr * dTemp;
- d64BlockUncodedCost += pdCostCoeff0[iScanPos];
- piDstCoeff[uiBlkPos] = uiMaxAbsLevel;
+ uiCostCoeff0[iScanPos] = xApplyScale(lLevelDouble * lLevelDouble, uiErrScale);
+ ui64BlockUncodedCost += uiCostCoeff0[iScanPos];
+ piDstCoeff[uiBlkPos] = uiMaxAbsLevel;
if (uiMaxAbsLevel > 0 && iLastScanPos < 0)
{
- iLastScanPos = iScanPos;
- uiCtxSet = (iScanPos < SCAN_SET_SIZE || eTType != TEXT_LUMA) ? 0 : 2;
- iCGLastScanPos = iCGScanPos;
+ iLastScanPos = iScanPos;
+ uiCtxSet = (iScanPos < SCAN_SET_SIZE || eTType != TEXT_LUMA) ? 0 : 2;
+ iCGLastScanPos = iCGScanPos;
}
if (iLastScanPos >= 0)
@@ -1300,25 +1296,26 @@
rateIncDown[uiBlkPos] = 0;
deltaU[uiBlkPos] = 0;
sigRateDelta[uiBlkPos] = 0;
+
//===== coefficient level estimation =====
UInt uiLevel;
- UInt uiOneCtx = 4 * uiCtxSet + c1;
- UInt uiAbsCtx = uiCtxSet + c2;
+ UInt uiOneCtx = 4 * uiCtxSet + c1;
+ UInt uiAbsCtx = uiCtxSet + c2;
if (iScanPos == iLastScanPos)
{
- uiLevel = xGetCodedLevel(pdCostCoeff[iScanPos], pdCostCoeff0[iScanPos], pdCostSig[iScanPos],
- lLevelDouble, uiMaxAbsLevel, 0, uiOneCtx, uiAbsCtx, uiGoRiceParam,
- c1Idx, c2Idx, iQBits, dTemp, 1);
+ uiLevel = xGetCodedLevel(uiCostCoeff[iScanPos], uiCostCoeff0[iScanPos], uiCostSig[iScanPos],
+ lLevelDouble, uiMaxAbsLevel, 0, uiOneCtx, uiAbsCtx, uiGoRiceParam,
+ c1Idx, c2Idx, iQBits, uiErrScale, 1);
}
else
{
- UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
- UInt uiPosX = uiBlkPos - (uiPosY << uiLog2BlkSize);
- UShort uiCtxSig = getSigCtxInc(patternSigCtx, uiScanIdx, uiPosX, uiPosY, uiLog2BlkSize, eTType);
- uiLevel = xGetCodedLevel(pdCostCoeff[iScanPos], pdCostCoeff0[iScanPos], pdCostSig[iScanPos],
- lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam,
- c1Idx, c2Idx, iQBits, dTemp, 0);
+ UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
+ UInt uiPosX = uiBlkPos - (uiPosY << uiLog2BlkSize);
+ UShort uiCtxSig = getSigCtxInc(patternSigCtx, uiScanIdx, uiPosX, uiPosY, uiLog2BlkSize, eTType);
+ uiLevel = xGetCodedLevel(uiCostCoeff[iScanPos], uiCostCoeff0[iScanPos], uiCostSig[iScanPos],
+ lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam,
+ c1Idx, c2Idx, iQBits, uiErrScale, 0);
sigRateDelta[uiBlkPos] = m_pcEstBitsSbac->significantBits[uiCtxSig][1] - m_pcEstBitsSbac->significantBits[uiCtxSig][0];
}
deltaU[uiBlkPos] = (lLevelDouble - ((Int)uiLevel << iQBits)) >> (iQBits - 8);
@@ -1333,7 +1330,7 @@
rateIncUp[uiBlkPos] = m_pcEstBitsSbac->m_greaterOneBits[uiOneCtx][0];
}
piDstCoeff[uiBlkPos] = uiLevel;
- d64BaseCost += pdCostCoeff[iScanPos];
+ ui64BaseCost += uiCostCoeff[iScanPos];
baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
if (uiLevel >= baseLevel)
@@ -1378,19 +1375,19 @@
}
else
{
- pdCostCoeff[iScanPos] = 0;
- d64BaseCost += pdCostCoeff0[iScanPos];
+ uiCostCoeff[iScanPos] = 0;
+ ui64BaseCost += uiCostCoeff0[iScanPos];
}
- rdStats.d64SigCost += pdCostSig[iScanPos];
+ rdStats.ui64SigCost += uiCostSig[iScanPos];
if (iScanPosinCG == 0)
{
- rdStats.d64SigCost_0 = pdCostSig[iScanPos];
+ rdStats.ui64SigCost_0 = uiCostSig[iScanPos];
}
if (piDstCoeff[uiBlkPos])
{
uiSigCoeffGroupFlag[uiCGBlkPos] = 1;
- rdStats.d64CodedLevelandDist += pdCostCoeff[iScanPos] - pdCostSig[iScanPos];
- rdStats.d64UncodedDist += pdCostCoeff0[iScanPos];
+ rdStats.ui64CodedLevelandDist += uiCostCoeff[iScanPos] - uiCostSig[iScanPos];
+ rdStats.ui64UncodedDist += uiCostCoeff0[iScanPos];
if (iScanPosinCG != 0)
{
rdStats.iNNZbeforePos0++;
@@ -1400,14 +1397,14 @@
if (iCGLastScanPos >= 0)
{
- pdCostCoeffGroupSig[iCGScanPos] = 0;
+ uiCostCoeffGroupSig[iCGScanPos] = 0;
if (iCGScanPos)
{
if (uiSigCoeffGroupFlag[uiCGBlkPos] == 0)
{
- UInt uiCtxSig = getSigCoeffGroupCtxInc(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
- d64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.d64SigCost;
- pdCostCoeffGroupSig[iCGScanPos] = xGetRateSigCoeffGroup(0, uiCtxSig);
+ UInt uiCtxSig = getSigCoeffGroupCtxInc(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
+ ui64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.ui64SigCost;
+ uiCostCoeffGroupSig[iCGScanPos] = xGetRateSigCoeffGroup(0, uiCtxSig);
}
else
{
@@ -1415,34 +1412,34 @@
{
if (rdStats.iNNZbeforePos0 == 0)
{
- d64BaseCost -= rdStats.d64SigCost_0;
- rdStats.d64SigCost -= rdStats.d64SigCost_0;
+ ui64BaseCost -= rdStats.ui64SigCost_0;
+ rdStats.ui64SigCost -= rdStats.ui64SigCost_0;
}
// rd-cost if SigCoeffGroupFlag = 0, initialization
- Double d64CostZeroCG = d64BaseCost;
+ UInt64 ui64CostZeroCG = ui64BaseCost;
// add SigCoeffGroupFlag cost to total cost
- UInt uiCtxSig = getSigCoeffGroupCtxInc(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
+ UInt uiCtxSig = getSigCoeffGroupCtxInc(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
if (iCGScanPos < iCGLastScanPos)
{
- d64BaseCost += xGetRateSigCoeffGroup(1, uiCtxSig);
- d64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig);
- pdCostCoeffGroupSig[iCGScanPos] = xGetRateSigCoeffGroup(1, uiCtxSig);
+ ui64BaseCost += xGetRateSigCoeffGroup(1, uiCtxSig);
+ ui64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig);
+ uiCostCoeffGroupSig[iCGScanPos] = xGetRateSigCoeffGroup(1, uiCtxSig);
}
// try to convert the current coeff group from non-zero to all-zero
- d64CostZeroCG += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels
- d64CostZeroCG -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels
- d64CostZeroCG -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels
+ ui64CostZeroCG += rdStats.ui64UncodedDist; // distortion for resetting non-zero levels to zero levels
+ ui64CostZeroCG -= rdStats.ui64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels
+ ui64CostZeroCG -= rdStats.ui64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels
// if we can save cost, change this block to all-zero block
- if (d64CostZeroCG < d64BaseCost)
+ if (ui64CostZeroCG < ui64BaseCost)
{
uiSigCoeffGroupFlag[uiCGBlkPos] = 0;
- d64BaseCost = d64CostZeroCG;
+ ui64BaseCost = ui64CostZeroCG;
if (iCGScanPos < iCGLastScanPos)
{
- pdCostCoeffGroupSig[iCGScanPos] = xGetRateSigCoeffGroup(0, uiCtxSig);
+ uiCostCoeffGroupSig[iCGScanPos] = xGetRateSigCoeffGroup(0, uiCtxSig);
}
// reset coeffs to 0 in this block
for (Int iScanPosinCG = uiCGSize - 1; iScanPosinCG >= 0; iScanPosinCG--)
@@ -1453,8 +1450,8 @@
if (piDstCoeff[uiBlkPos])
{
piDstCoeff[uiBlkPos] = 0;
- pdCostCoeff[iScanPos] = pdCostCoeff0[iScanPos];
- pdCostSig[iScanPos] = 0;
+ uiCostCoeff[iScanPos] = uiCostCoeff0[iScanPos];
+ uiCostSig[iScanPos] = 0;
}
}
} // end if ( d64CostAllZeros < d64BaseCost )
@@ -1474,21 +1471,21 @@
return;
}
- Double d64BestCost = 0;
+ UInt64 ui64BestCost = 0;
Int ui16CtxCbf = 0;
Int iBestLastIdxP1 = 0;
if (!pcCU->isIntra(uiAbsPartIdx) && eTType == TEXT_LUMA && pcCU->getTransformIdx(uiAbsPartIdx) == 0)
{
- ui16CtxCbf = 0;
- d64BestCost = d64BlockUncodedCost + xGetICost(m_pcEstBitsSbac->blockRootCbpBits[ui16CtxCbf][0]);
- d64BaseCost += xGetICost(m_pcEstBitsSbac->blockRootCbpBits[ui16CtxCbf][1]);
+ ui16CtxCbf = 0;
+ ui64BestCost = ui64BlockUncodedCost + xGetICost(m_pcEstBitsSbac->blockRootCbpBits[ui16CtxCbf][0]);
+ ui64BaseCost += xGetICost(m_pcEstBitsSbac->blockRootCbpBits[ui16CtxCbf][1]);
}
else
{
- ui16CtxCbf = pcCU->getCtxQtCbf(eTType, pcCU->getTransformIdx(uiAbsPartIdx));
- ui16CtxCbf = (eTType ? TEXT_CHROMA : eTType) * NUM_QT_CBF_CTX + ui16CtxCbf;
- d64BestCost = d64BlockUncodedCost + xGetICost(m_pcEstBitsSbac->blockCbpBits[ui16CtxCbf][0]);
- d64BaseCost += xGetICost(m_pcEstBitsSbac->blockCbpBits[ui16CtxCbf][1]);
+ ui16CtxCbf = pcCU->getCtxQtCbf(eTType, pcCU->getTransformIdx(uiAbsPartIdx));
+ ui16CtxCbf = (eTType ? TEXT_CHROMA : eTType) * NUM_QT_CBF_CTX + ui16CtxCbf;
+ ui64BestCost = ui64BlockUncodedCost + xGetICost(m_pcEstBitsSbac->blockCbpBits[ui16CtxCbf][0]);
+ ui64BaseCost += xGetICost(m_pcEstBitsSbac->blockCbpBits[ui16CtxCbf][1]);
}
Bool bFoundLast = false;
@@ -1496,7 +1493,7 @@
{
UInt uiCGBlkPos = scanCG[iCGScanPos];
- d64BaseCost -= pdCostCoeffGroupSig[iCGScanPos];
+ ui64BaseCost -= uiCostCoeffGroupSig[iCGScanPos];
if (uiSigCoeffGroupFlag[uiCGBlkPos])
{
for (Int iScanPosinCG = uiCGSize - 1; iScanPosinCG >= 0; iScanPosinCG--)
@@ -1510,25 +1507,25 @@
UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
UInt uiPosX = uiBlkPos - (uiPosY << uiLog2BlkSize);
- Double d64CostLast = uiScanIdx == SCAN_VER ? xGetRateLast(uiPosY, uiPosX) : xGetRateLast(uiPosX, uiPosY);
- Double totalCost = d64BaseCost + d64CostLast - pdCostSig[iScanPos];
+ UInt64 ui64CostLast = uiScanIdx == SCAN_VER ? xGetRateLast(uiPosY, uiPosX) : xGetRateLast(uiPosX, uiPosY);
+ UInt64 totalCost = ui64BaseCost + ui64CostLast - uiCostSig[iScanPos];
- if (totalCost < d64BestCost)
+ if (totalCost < ui64BestCost)
{
iBestLastIdxP1 = iScanPos + 1;
- d64BestCost = totalCost;
+ ui64BestCost = totalCost;
}
if (piDstCoeff[uiBlkPos] > 1)
{
bFoundLast = true;
break;
}
- d64BaseCost -= pdCostCoeff[iScanPos];
- d64BaseCost += pdCostCoeff0[iScanPos];
+ ui64BaseCost -= uiCostCoeff[iScanPos];
+ ui64BaseCost += uiCostCoeff0[iScanPos];
}
else
{
- d64BaseCost -= pdCostSig[iScanPos];
+ ui64BaseCost -= uiCostSig[iScanPos];
}
} //end for
@@ -1557,7 +1554,7 @@
{
Int64 rdFactor = (Int64)(
g_invQuantScales[m_cQP.rem()] * g_invQuantScales[m_cQP.rem()] * (1 << (2 * m_cQP.m_iPer))
- / m_dLambda / 16 / (1 << DISTORTION_PRECISION_ADJUSTMENT(2 * (uiBitDepth - 8)))
+ / xGetICost(1) / 16 / (1 << DISTORTION_PRECISION_ADJUSTMENT(2 * (uiBitDepth - 8)))
+ 0.5);
Int lastCG = -1;
Int absSum = 0;
@@ -1789,9 +1786,9 @@
* \returns best quantized transform level for given scan position
* This method calculates the best quantized transform level for a given scan position.
*/
-__inline UInt TComTrQuant::xGetCodedLevel(Double& rd64CodedCost,
- Double& rd64CodedCost0,
- Double& rd64CodedCostSig,
+__inline UInt TComTrQuant::xGetCodedLevel(UInt64& rui64CodedCost,
+ UInt64& rui64CodedCost0,
+ UInt64& rui64CodedCostSig,
Int lLevelDouble,
UInt uiMaxAbsLevel,
UShort ui16CtxNumSig,
@@ -1801,16 +1798,16 @@
UInt c1Idx,
UInt c2Idx,
Int iQBits,
- Double dTemp,
+ UInt64 uiErrorScale,
Bool bLast) const
{
- Double dCurrCostSig = 0;
+ UInt64 uiCurrCostSig = 0;
UInt uiBestAbsLevel = 0;
if (!bLast && uiMaxAbsLevel < 3)
{
- rd64CodedCostSig = xGetRateSigCoef(0, ui16CtxNumSig);
- rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig;
+ rui64CodedCostSig = xGetRateSigCoef(0, ui16CtxNumSig);
+ rui64CodedCost = rui64CodedCost0 + rui64CodedCostSig;
if (uiMaxAbsLevel == 0)
{
return uiBestAbsLevel;
@@ -1818,26 +1815,26 @@
}
else
{
- rd64CodedCost = MAX_DOUBLE;
+ rui64CodedCost = MAX_INT64;
}
if (!bLast)
{
- dCurrCostSig = xGetRateSigCoef(1, ui16CtxNumSig);
+ uiCurrCostSig = xGetRateSigCoef(1, ui16CtxNumSig);
}
- UInt uiMinAbsLevel = (uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1);
- for (Int uiAbsLevel = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel; uiAbsLevel--)
+ UInt uiMinAbsLevel = (uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1);
+ for (Int uiAbsLevel = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel; uiAbsLevel--)
{
- Double dErr = Double(lLevelDouble - (uiAbsLevel << iQBits));
- Double dCurrCost = dErr * dErr * dTemp + xGetICRateCost(uiAbsLevel, ui16CtxNumOne, ui16CtxNumAbs, ui16AbsGoRice, c1Idx, c2Idx);
- dCurrCost += dCurrCostSig;
+ UInt64 uiErr = lLevelDouble - (uiAbsLevel << iQBits);
+ UInt64 uiCurrCost = xApplyScale(uiErr * uiErr, uiErrorScale) + xGetICRateCost(uiAbsLevel, ui16CtxNumOne, ui16CtxNumAbs, ui16AbsGoRice, c1Idx, c2Idx);
+ uiCurrCost += uiCurrCostSig;
- if (dCurrCost < rd64CodedCost)
+ if (uiCurrCost < rui64CodedCost)
{
uiBestAbsLevel = uiAbsLevel;
- rd64CodedCost = dCurrCost;
- rd64CodedCostSig = dCurrCostSig;
+ rui64CodedCost = uiCurrCost;
+ rui64CodedCostSig = uiCurrCostSig;
}
}
@@ -1851,19 +1848,19 @@
* \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
* \returns cost of given absolute transform level
*/
-__inline Double TComTrQuant::xGetICRateCost(UInt uiAbsLevel,
+__inline UInt64 TComTrQuant::xGetICRateCost(UInt uiAbsLevel,
UShort ui16CtxNumOne,
UShort ui16CtxNumAbs,
UShort ui16AbsGoRice,
UInt c1Idx,
UInt c2Idx) const
{
- Double iRate = xGetIEPRate();
- UInt baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
+ UInt iRate = xGetIEPRate();
+ UInt baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
if (uiAbsLevel >= baseLevel)
{
- UInt symbol = uiAbsLevel - baseLevel;
+ UInt symbol = uiAbsLevel - baseLevel;
UInt length;
if (symbol < (COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice))
{
@@ -1974,11 +1971,11 @@
* \param uiPosY Y coordinate of the last significant coefficient
* \returns cost of last significant coefficient
*/
-__inline Double TComTrQuant::xGetRateLast(UInt uiPosX, UInt uiPosY) const
+__inline UInt64 TComTrQuant::xGetRateLast(UInt uiPosX, UInt uiPosY) const
{
- UInt uiCtxX = g_uiGroupIdx[uiPosX];
- UInt uiCtxY = g_uiGroupIdx[uiPosY];
- Double uiCost = m_pcEstBitsSbac->lastXBits[uiCtxX] + m_pcEstBitsSbac->lastYBits[uiCtxY];
+ UInt uiCtxX = g_uiGroupIdx[uiPosX];
+ UInt uiCtxY = g_uiGroupIdx[uiPosY];
+ UInt uiCost = m_pcEstBitsSbac->lastXBits[uiCtxX] + m_pcEstBitsSbac->lastYBits[uiCtxY];
if (uiCtxX > 3)
{
@@ -2020,7 +2017,7 @@
}
/** set quantized matrix coefficient for encode
- * \param scalingList quantaized matrix address
+ * \param scalingList quantized matrix address
*/
Void TComTrQuant::setScalingList(TComScalingList *scalingList)
{
@@ -2042,7 +2039,7 @@
}
/** set quantized matrix coefficient for decode
- * \param scalingList quantaized matrix address
+ * \param scalingList quantized matrix address
*/
Void TComTrQuant::setScalingListDec(TComScalingList *scalingList)
{
@@ -2072,23 +2069,20 @@
Int bitDepth = (size < SCALING_LIST_32x32 && list != 0 && list != 3) ? g_bitDepthC : g_bitDepthY;
Int iTransformShift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize; // Represents scaling through forward transform
- UInt i, uiMaxNumCoeff = g_scalingListSize[size];
- Int *piQuantcoeff;
- Double *pdErrScale;
+ UInt uiMaxNumCoeff = g_scalingListSize[size];
+ Int *piQuantcoeff = getQuantCoeff(list, qp, size);
+ UInt64 *piErrScale = getErrScaleCoeff(list, size, qp);
- piQuantcoeff = getQuantCoeff(list, qp, size);
- pdErrScale = getErrScaleCoeff(list, size, qp);
-
- Double dErrScale = (Double)(1 << SCALE_BITS); // Compensate for scaling of bitcount in Lagrange cost function
+ Double dErrScale = (Double)(1 << SCALE_BITS); // Compensate for scaling of bit count in Lagrange cost function
dErrScale = dErrScale * pow(2.0, -2.0 * iTransformShift); // Compensate for scaling through forward transform
- for (i = 0; i < uiMaxNumCoeff; i++)
+ for (UInt i = 0; i < uiMaxNumCoeff; i++)
{
- pdErrScale[i] = dErrScale / piQuantcoeff[i] / piQuantcoeff[i] / (1 << DISTORTION_PRECISION_ADJUSTMENT(2 * (bitDepth - 8)));
+ piErrScale[i] = (UInt64)floor((1LL<<28) * (dErrScale / piQuantcoeff[i] / piQuantcoeff[i] / (1 << DISTORTION_PRECISION_ADJUSTMENT(2 * (bitDepth - 8)))));
}
}
/** set quantized matrix coefficient for encode
- * \param scalingList quantaized matrix address
+ * \param scalingList quantized matrix address
* \param listId List index
* \param sizeId size index
* \param uiQP Quantization parameter
@@ -2102,12 +2096,11 @@
Int *coeff = scalingList->getScalingListAddress(sizeId, listId);
quantcoeff = getQuantCoeff(listId, qp, sizeId);
-
processScalingListEnc(coeff, quantcoeff, g_quantScales[qp] << 4, height, width, ratio, min(MAX_MATRIX_SIZE_NUM, (Int)g_scalingListSizeX[sizeId]), scalingList->getScalingListDC(sizeId, listId));
}
/** set quantized matrix coefficient for decode
- * \param scalingList quantaized matrix address
+ * \param scalingList quantized matrix address
* \param list List index
* \param size size index
* \param uiQP Quantization parameter
@@ -2133,7 +2126,7 @@
for (size = 0; size < SCALING_LIST_SIZE_NUM; size++)
{
- for (list = 0; list < g_scalingListNum[size]; list++)
+ for (list = 0; list < g_scalingListNum[size]; list++)
{
for (qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
{
@@ -2168,8 +2161,8 @@
}
/** set quantized matrix coefficient for encode
- * \param coeff quantaized matrix address
- * \param quantcoeff quantaized matrix address
+ * \param coeff quantized matrix address
+ * \param quantcoeff quantized matrix address
* \param quantScales Q(QP%6)
* \param height height
* \param width width
@@ -2197,8 +2190,8 @@
}
/** set quantized matrix coefficient for decode
- * \param coeff quantaized matrix address
- * \param dequantcoeff quantaized matrix address
+ * \param coeff quantized matrix address
+ * \param dequantcoeff quantized matrix address
* \param invQuantScales IQ(QP%6))
* \param height height
* \param width width
@@ -2222,6 +2215,17 @@
}
}
+Void TComTrQuant::setLambda(Double dLambdaLuma, Double dLambdaChroma)
+{
+ m_uiLambdaLuma = (UInt64)floor(65536.0 * dLambdaLuma);
+ m_uiLambdaChroma = (UInt64)floor(65536.0 * dLambdaChroma);
+}
+
+Void TComTrQuant::selectLambda(TextType eTType)
+{
+ m_uiLambda = (eTType == TEXT_LUMA) ? m_uiLambdaLuma : m_uiLambdaChroma;
+}
+
/** initialization process of scaling list array
*/
Void TComTrQuant::initScalingList()
@@ -2234,7 +2238,7 @@
{
m_quantCoef[sizeId][listId][qp] = new Int[g_scalingListSize[sizeId]];
m_dequantCoef[sizeId][listId][qp] = new Int[g_scalingListSize[sizeId]];
- m_errScale[sizeId][listId][qp] = new Double[g_scalingListSize[sizeId]];
+ m_errScale[sizeId][listId][qp] = new UInt64[g_scalingListSize[sizeId]];
}
}
}
diff -r f9e9b981602e -r 661b0c1b9611 source/Lib/TLibCommon/TComTrQuant.h
--- a/source/Lib/TLibCommon/TComTrQuant.h Tue Jun 18 21:40:42 2013 -0500
+++ b/source/Lib/TLibCommon/TComTrQuant.h Wed Jun 19 00:07:28 2013 -0500
@@ -142,47 +142,40 @@
UInt uiAbsPartIdx,
Bool useTransformSkip = false);
- Void invtransformNxN(Bool transQuantBypass, TextType eText, UInt uiMode, Short* rpcResidual, UInt uiStride, TCoeff* pcCoeff, UInt uiWidth, UInt uiHeight, Int scalingListType, Bool useTransformSkip = false);
- Void invRecurTransformNxN(TComDataCU* pcCU, UInt uiAbsPartIdx, TextType eTxt, Short* rpcResidual, UInt uiAddr, UInt uiStride, UInt uiWidth, UInt uiHeight,
+ Void invtransformNxN(Bool transQuantBypass, TextType eText, UInt uiMode, Short* rpcResidual, UInt uiStride, TCoeff* pcCoeff, UInt uiWidth, UInt uiHeight, Int scalingListType, Bool useTransformSkip = false);
+ Void invRecurTransformNxN(TComDataCU* pcCU, UInt uiAbsPartIdx, TextType eTxt, Short* rpcResidual, UInt uiAddr, UInt uiStride, UInt uiWidth, UInt uiHeight,
UInt uiMaxTrMode, UInt uiTrMode, TCoeff* rpcCoeff);
// Misc functions
Void setQPforQuant(Int qpy, TextType eTxtType, Int qpBdOffset, Int chromaQPOffset);
- Void setLambda(Double dLambdaLuma, Double dLambdaChroma) { m_dLambdaLuma = dLambdaLuma; m_dLambdaChroma = dLambdaChroma; }
+ Void setLambda(Double dLambdaLuma, Double dLambdaChroma);
- Void selectLambda(TextType eTType) { m_dLambda = (eTType == TEXT_LUMA) ? m_dLambdaLuma : m_dLambdaChroma; }
+ Void selectLambda(TextType eTType);
Void setRDOQOffset(UInt uiRDOQOffset) { m_uiRDOQOffset = uiRDOQOffset; }
estBitsSbacStruct* m_pcEstBitsSbac;
- static Int calcPatternSigCtx(const UInt* sigCoeffGroupFlag, UInt posXCG, UInt posYCG, Int width, Int height);
+ static Int calcPatternSigCtx(const UInt* sigCoeffGroupFlag, UInt posXCG, UInt posYCG, Int width, Int height);
- static Int getSigCtxInc(Int patternSigCtx,
- UInt scanIdx,
- Int posX,
- Int posY,
- Int log2BlkSize,
- TextType textureType);
+ static Int getSigCtxInc(Int patternSigCtx,
+ UInt scanIdx,
+ Int posX,
+ Int posY,
+ Int log2BlkSize,
+ TextType textureType);
static UInt getSigCoeffGroupCtxInc(const UInt* uiSigCoeffGroupFlag,
const UInt uiCGPosX,
const UInt uiCGPosY,
Int width, Int height);
+
Void initScalingList();
Void destroyScalingList();
Void setErrScaleCoeff(UInt list, UInt size, UInt qp);
- Double* getErrScaleCoeff(UInt list, UInt size, UInt qp) { return m_errScale[size][list][qp]; } //!< get Error Scale Coefficent
-
- Int* getQuantCoeff(UInt list, UInt qp, UInt size) { return m_quantCoef[size][list][qp]; } //!< get Quant Coefficent
-
- Int* getDequantCoeff(UInt list, UInt qp, UInt size) { return m_dequantCoef[size][list][qp]; } //!< get DeQuant Coefficent
-
Void setUseScalingList(Bool bUseScalingList) { m_scalingListEnabledFlag = bUseScalingList; }
-
Bool getUseScalingList() { return m_scalingListEnabledFlag; }
-
Void setFlatScalingList();
Void xsetFlatScalingList(UInt list, UInt size, UInt qp);
Void xSetScalingListEnc(TComScalingList *scalingList, UInt list, UInt size, UInt qp);
@@ -191,14 +184,12 @@
Void setScalingListDec(TComScalingList *scalingList);
Void processScalingListEnc(Int *coeff, Int *quantcoeff, Int quantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc);
Void processScalingListDec(Int *coeff, Int *dequantcoeff, Int invQuantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc);
- Void initSliceQpDelta();
- Void storeSliceQpNext(TComSlice* pcSlice);
- Void clearSliceARLCnt();
- Int getQpDelta(Int qp) { return m_qpDelta[qp]; }
-
- Int* getSliceNSamples() { return m_sliceNsamples; }
-
- Double* getSliceSumC() { return m_sliceSumC; }
+ Void initSliceQpDelta();
+ Void storeSliceQpNext(TComSlice* pcSlice);
+ Void clearSliceARLCnt();
+ Int getQpDelta(Int qp) { return m_qpDelta[qp]; }
+ Int* getSliceNSamples() { return m_sliceNsamples; }
+ Double* getSliceSumC() { return m_sliceSumC; }
protected:
@@ -209,9 +200,9 @@
QpParam m_cQP;
- Double m_dLambda;
- Double m_dLambdaLuma;
- Double m_dLambdaChroma;
+ UInt64 m_uiLambda;
+ UInt64 m_uiLambdaLuma;
+ UInt64 m_uiLambdaChroma;
UInt m_uiRDOQOffset;
UInt m_uiMaxTrSize;
@@ -221,13 +212,19 @@
Bool m_bUseAdaptQpSelect;
Bool m_useTransformSkipFast;
Bool m_scalingListEnabledFlag;
- Int *m_quantCoef[SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM][SCALING_LIST_REM_NUM]; ///< array of quantization matrix coefficient 4x4
- Int *m_dequantCoef[SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM][SCALING_LIST_REM_NUM]; ///< array of dequantization matrix coefficient 4x4
+ Int *m_quantCoef[SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM][SCALING_LIST_REM_NUM]; ///< array of quantization matrix coefficient 4x4
+ Int *m_dequantCoef[SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM][SCALING_LIST_REM_NUM]; ///< array of dequantization matrix coefficient 4x4
- Double *m_errScale[SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM][SCALING_LIST_REM_NUM];
+ UInt64 *m_errScale[SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM][SCALING_LIST_REM_NUM];
private:
+ UInt64* getErrScaleCoeff(UInt list, UInt size, UInt qp) { return m_errScale[size][list][qp]; } //!< get Error Scale Coefficent
+
+ Int* getQuantCoeff(UInt list, UInt qp, UInt size) { return m_quantCoef[size][list][qp]; } //!< get Quant Coefficent
+
+ Int* getDequantCoeff(UInt list, UInt qp, UInt size) { return m_dequantCoef[size][list][qp]; } //!< get DeQuant Coefficent
+
// skipping Transform
Void xTransformSkip(Int bitDepth, Short* piBlkResi, UInt uiStride, Int* psCoeff, Int width, Int height);
@@ -246,19 +243,19 @@
// RDOQ functions
- Void xRateDistOptQuant(TComDataCU * pcCU,
- Int * plSrcCoeff,
- TCoeff * piDstCoeff,
- Int * & piArlDstCoeff,
- UInt uiWidth,
- UInt uiHeight,
- UInt & uiAbsSum,
- TextType eTType,
- UInt uiAbsPartIdx);
+ Void xRateDistOptQuant(TComDataCU * pcCU,
+ Int * plSrcCoeff,
+ TCoeff * piDstCoeff,
+ Int * & piArlDstCoeff,
+ UInt uiWidth,
+ UInt uiHeight,
+ UInt & uiAbsSum,
+ TextType eTType,
+ UInt uiAbsPartIdx);
- __inline UInt xGetCodedLevel(Double& rd64CodedCost,
- Double& rd64CodedCost0,
- Double& rd64CodedCostSig,
+ __inline UInt xGetCodedLevel(UInt64& rui64CodedCost,
+ UInt64& rui64CodedCost0,
+ UInt64& rui64CodedCostSig,
Int lLevelDouble,
UInt uiMaxAbsLevel,
UShort ui16CtxNumSig,
@@ -268,10 +265,10 @@
UInt c1Idx,
UInt c2Idx,
Int iQBits,
- Double dTemp,
+ UInt64 uiErrScale,
Bool bLast) const;
- __inline Double xGetICRateCost(UInt uiAbsLevel,
+ __inline UInt64 xGetICRateCost(UInt uiAbsLevel,
UShort ui16CtxNumOne,
UShort ui16CtxNumAbs,
UShort ui16AbsGoRice,
@@ -285,12 +282,12 @@
UInt c1Idx,
UInt c2Idx) const;
- __inline Double xGetRateLast(UInt uiPosX, UInt uiPosY) const;
-
- __inline Double xGetRateSigCoeffGroup(UShort uiSignificanceCoeffGroup, UShort ui16CtxNumSig) const { return m_dLambda * m_pcEstBitsSbac->significantCoeffGroupBits[ui16CtxNumSig][uiSignificanceCoeffGroup]; }
- __inline Double xGetRateSigCoef(UShort uiSignificance, UShort ui16CtxNumSig) const { return m_dLambda * m_pcEstBitsSbac->significantBits[ui16CtxNumSig][uiSignificance]; }
- __inline Double xGetICost(Double dRate) const { return m_dLambda * dRate; } ///< Get the cost for a specific rate
- __inline Double xGetIEPRate() const { return 32768; } ///< Get the cost of an equal probable bit
+ __inline UInt64 xGetRateLast(UInt uiPosX, UInt uiPosY) const;
+ __inline UInt64 xGetRateSigCoeffGroup(UShort uiSignificanceCoeffGroup, UShort ui16CtxNumSig) const { return xGetICost(m_pcEstBitsSbac->significantCoeffGroupBits[ui16CtxNumSig][uiSignificanceCoeffGroup]); }
+ __inline UInt64 xGetRateSigCoef(UShort uiSignificance, UShort ui16CtxNumSig) const { return xGetICost(m_pcEstBitsSbac->significantBits[ui16CtxNumSig][uiSignificance]); }
+ __inline UInt64 xGetICost(UInt64 bits) const { return (m_uiLambda * bits + 32768) >> 16; } ///< Get the cost for a specific rate
+ __inline UInt xGetIEPRate() const { return 32768; } ///< Get the cost of an equal probable bit
+ __inline UInt xApplyScale(UInt64 cost, UInt64 errScale) const { return (UInt)((errScale * cost + (1LL << 27)) >> 28); }
// dequantization
Void xDeQuant(Int bitDepth, const TCoeff* pSrc, Int* pDes, Int iWidth, Int iHeight, Int scalingListType);
More information about the x265-devel
mailing list