[x265] [PATCH] TComTrQuant: switch from Double to UInt64 cost scaling BUGGY

Wed Jun 19 07:56:21 CEST 2013

# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1371618448 18000
#      Wed Jun 19 00:07:28 2013 -0500
# Node ID 661b0c1b961147e25b8fdd71059188c636390510
# Parent  f9e9b981602e811b47701ba14216baba1c919735
TComTrQuant: switch from Double to UInt64 cost scaling *BUGGY*

This loses too much PSNR and I frame compression.  Must be a bug lurking
somewhere.

diff -r f9e9b981602e -r 661b0c1b9611 source/Lib/TLibCommon/TComTrQuant.cpp

--- a/source/Lib/TLibCommon/TComTrQuant.cpp	Tue Jun 18 21:40:42 2013 -0500
+++ b/source/Lib/TLibCommon/TComTrQuant.cpp	Wed Jun 19 00:07:28 2013 -0500
@@ -46,10 +46,10 @@
 typedef struct
 {
     Int    iNNZbeforePos0;
-    Double d64CodedLevelandDist; // distortion and level cost only
-    Double d64UncodedDist;  // all zero coded block distortion
-    Double d64SigCost;
-    Double d64SigCost_0;
+    UInt64 ui64CodedLevelandDist; // distortion and level cost only
+    UInt64 ui64UncodedDist;  // all zero coded block distortion
+    UInt64 ui64SigCost;
+    UInt64 ui64SigCost_0;
 } coeffGroupRDStats;
 
 //! \ingroup TLibCommon
@@ -1196,7 +1196,7 @@
     UInt uiBitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
     Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
     UInt       uiGoRiceParam       = 0;
-    Double     d64BlockUncodedCost = 0;
+    UInt64     ui64BlockUncodedCost = 0;
     const UInt uiLog2BlkSize       = g_aucConvertToBit[uiWidth] + 2;
     const UInt uiMaxNumCoeff       = uiWidth * uiHeight;
     Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
@@ -1204,17 +1204,16 @@
     assert(scalingListType < 6);
 
     Int iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift;                 // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
-    Double *pdErrScaleOrg = getErrScaleCoeff(scalingListType, uiLog2TrSize - 2, m_cQP.m_iRem);
+    UInt64 *puiErrScale = getErrScaleCoeff(scalingListType, uiLog2TrSize - 2, m_cQP.m_iRem);
     Int *piQCoefOrg = getQuantCoeff(scalingListType, m_cQP.m_iRem, uiLog2TrSize - 2);
     Int *piQCoef = piQCoefOrg;
-    Double *pdErrScale = pdErrScaleOrg;
     Int iQBitsC = iQBits - ARL_C_PRECISION;
     Int iAddC =  1 << (iQBitsC - 1);
     UInt uiScanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, uiWidth, eTType == TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
 
-    Double pdCostCoeff[32 * 32];
-    Double pdCostSig[32 * 32];
-    Double pdCostCoeff0[32 * 32];
+    UInt64 uiCostCoeff[32 * 32];
+    UInt64 uiCostSig[32 * 32];
+    UInt64 uiCostCoeff0[32 * 32];
 
     Int rateIncUp[32 * 32];
     Int rateIncDown[32 * 32];
@@ -1222,19 +1221,17 @@
     Int deltaU[32 * 32];
 
     const UInt * scanCG;
+    scanCG = g_auiSigLastScan[uiScanIdx][uiLog2BlkSize > 3 ? uiLog2BlkSize - 2 - 1 : 0];
+    if (uiLog2BlkSize == 3)
     {
-        scanCG = g_auiSigLastScan[uiScanIdx][uiLog2BlkSize > 3 ? uiLog2BlkSize - 2 - 1 : 0];
-        if (uiLog2BlkSize == 3)
-        {
-            scanCG = g_sigLastScan8x8[uiScanIdx];
-        }
-        else if (uiLog2BlkSize == 5)
-        {
-            scanCG = g_sigLastScanCG32x32;
-        }
+        scanCG = g_sigLastScan8x8[uiScanIdx];
+    }
+    else if (uiLog2BlkSize == 5)
+    {
+        scanCG = g_sigLastScanCG32x32;
     }
     const UInt uiCGSize = (1 << MLS_CG_SIZE);       // 16
-    Double pdCostCoeffGroupSig[MLS_GRP_NUM];
+    UInt64 uiCostCoeffGroupSig[MLS_GRP_NUM];
     UInt uiSigCoeffGroupFlag[MLS_GRP_NUM];
     UInt uiNumBlkSide = uiWidth / MLS_CG_SIZE;
     Int iCGLastScanPos = -1;
@@ -1242,7 +1239,7 @@
     UInt    uiCtxSet            = 0;
     Int     c1                  = 1;
     Int     c2                  = 0;
-    Double  d64BaseCost         = 0;
+    UInt64  ui64BaseCost         = 0;
     Int     iLastScanPos        = -1;
 
     UInt    c1Idx     = 0;
@@ -1251,7 +1248,7 @@
 
     const UInt *scan = g_auiSigLastScan[uiScanIdx][uiLog2BlkSize - 1];
 
-    ::memset(uiSigCoeffGroupFlag,   0, sizeof(UInt) * MLS_GRP_NUM);
+    ::memset(uiSigCoeffGroupFlag, 0, sizeof(UInt) * MLS_GRP_NUM);
 
     UInt uiCGNum = uiWidth * uiHeight >> MLS_CG_SIZE;
     Int iScanPos;
@@ -1269,29 +1266,28 @@
         {
             iScanPos = iCGScanPos * uiCGSize + iScanPosinCG;
             //===== quantization =====
-            UInt    uiBlkPos          = scan[iScanPos];
+            UInt uiBlkPos = scan[iScanPos];
             // set coeff
             Int uiQ  = piQCoef[uiBlkPos];
-            Double dTemp = pdErrScale[uiBlkPos];
-            Int lLevelDouble          = plSrcCoeff[uiBlkPos];
-            lLevelDouble              = (Int)min<Int64>((Int64)abs((Int)lLevelDouble) * uiQ, MAX_INT - (1 << (iQBits - 1)));
+            UInt64 uiErrScale = puiErrScale[uiBlkPos];
+            Int lLevelDouble = plSrcCoeff[uiBlkPos];
+            lLevelDouble = (Int)min<Int64>((Int64)abs((Int)lLevelDouble) * uiQ, MAX_INT - (1 << (iQBits - 1)));
 
             if (m_bUseAdaptQpSelect)
             {
-                piArlDstCoeff[uiBlkPos]   = (Int)((lLevelDouble + iAddC) >> iQBitsC);
+                piArlDstCoeff[uiBlkPos] = (Int)((lLevelDouble + iAddC) >> iQBitsC);
             }
-            UInt uiMaxAbsLevel        = (lLevelDouble + (1 << (iQBits - 1))) >> iQBits;
+            UInt uiMaxAbsLevel       = (lLevelDouble + (1 << (iQBits - 1))) >> iQBits;
 
-            Double dErr               = Double(lLevelDouble);
-            pdCostCoeff0[iScanPos]  = dErr * dErr * dTemp;
-            d64BlockUncodedCost      += pdCostCoeff0[iScanPos];
-            piDstCoeff[uiBlkPos]    = uiMaxAbsLevel;
+            uiCostCoeff0[iScanPos]   = xApplyScale(lLevelDouble * lLevelDouble, uiErrScale);
+            ui64BlockUncodedCost    += uiCostCoeff0[iScanPos];
+            piDstCoeff[uiBlkPos]     = uiMaxAbsLevel;
 
             if (uiMaxAbsLevel > 0 && iLastScanPos < 0)
             {
-                iLastScanPos            = iScanPos;
-                uiCtxSet                = (iScanPos < SCAN_SET_SIZE || eTType != TEXT_LUMA) ? 0 : 2;
-                iCGLastScanPos          = iCGScanPos;
+                iLastScanPos   = iScanPos;
+                uiCtxSet       = (iScanPos < SCAN_SET_SIZE || eTType != TEXT_LUMA) ? 0 : 2;
+                iCGLastScanPos = iCGScanPos;
             }
 
             if (iLastScanPos >= 0)
@@ -1300,25 +1296,26 @@
                 rateIncDown[uiBlkPos] = 0;
                 deltaU[uiBlkPos] = 0;
                 sigRateDelta[uiBlkPos] = 0;
+
                 //===== coefficient level estimation =====
                 UInt  uiLevel;
-                UInt  uiOneCtx         = 4 * uiCtxSet + c1;
-                UInt  uiAbsCtx         = uiCtxSet + c2;
+                UInt  uiOneCtx = 4 * uiCtxSet + c1;
+                UInt  uiAbsCtx = uiCtxSet + c2;
 
                 if (iScanPos == iLastScanPos)
                 {
-                    uiLevel              = xGetCodedLevel(pdCostCoeff[iScanPos], pdCostCoeff0[iScanPos], pdCostSig[iScanPos],
-                                                          lLevelDouble, uiMaxAbsLevel, 0, uiOneCtx, uiAbsCtx, uiGoRiceParam,
-                                                          c1Idx, c2Idx, iQBits, dTemp, 1);
+                    uiLevel = xGetCodedLevel(uiCostCoeff[iScanPos], uiCostCoeff0[iScanPos], uiCostSig[iScanPos],
+                                             lLevelDouble, uiMaxAbsLevel, 0, uiOneCtx, uiAbsCtx, uiGoRiceParam,
+                                             c1Idx, c2Idx, iQBits, uiErrScale, 1);
                 }
                 else
                 {
-                    UInt   uiPosY        = uiBlkPos >> uiLog2BlkSize;
-                    UInt   uiPosX        = uiBlkPos - (uiPosY << uiLog2BlkSize);
-                    UShort uiCtxSig      = getSigCtxInc(patternSigCtx, uiScanIdx, uiPosX, uiPosY, uiLog2BlkSize, eTType);
-                    uiLevel              = xGetCodedLevel(pdCostCoeff[iScanPos], pdCostCoeff0[iScanPos], pdCostSig[iScanPos],
-                                                          lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam,
-                                                          c1Idx, c2Idx, iQBits, dTemp, 0);
+                    UInt uiPosY     = uiBlkPos >> uiLog2BlkSize;
+                    UInt uiPosX     = uiBlkPos - (uiPosY << uiLog2BlkSize);
+                    UShort uiCtxSig = getSigCtxInc(patternSigCtx, uiScanIdx, uiPosX, uiPosY, uiLog2BlkSize, eTType);
+                    uiLevel         = xGetCodedLevel(uiCostCoeff[iScanPos], uiCostCoeff0[iScanPos], uiCostSig[iScanPos],
+                                                     lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam,
+                                                     c1Idx, c2Idx, iQBits, uiErrScale, 0);
                     sigRateDelta[uiBlkPos] = m_pcEstBitsSbac->significantBits[uiCtxSig][1] - m_pcEstBitsSbac->significantBits[uiCtxSig][0];
                 }
                 deltaU[uiBlkPos] = (lLevelDouble - ((Int)uiLevel << iQBits)) >> (iQBits - 8);
@@ -1333,7 +1330,7 @@
                     rateIncUp[uiBlkPos] = m_pcEstBitsSbac->m_greaterOneBits[uiOneCtx][0];
                 }
                 piDstCoeff[uiBlkPos] = uiLevel;
-                d64BaseCost           += pdCostCoeff[iScanPos];
+                ui64BaseCost        += uiCostCoeff[iScanPos];
 
                 baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
                 if (uiLevel >= baseLevel)
@@ -1378,19 +1375,19 @@
             }
             else
             {
-                pdCostCoeff[iScanPos] = 0;
-                d64BaseCost += pdCostCoeff0[iScanPos];
+                uiCostCoeff[iScanPos] = 0;
+                ui64BaseCost += uiCostCoeff0[iScanPos];
             }
-            rdStats.d64SigCost += pdCostSig[iScanPos];
+            rdStats.ui64SigCost += uiCostSig[iScanPos];
             if (iScanPosinCG == 0)
             {
-                rdStats.d64SigCost_0 = pdCostSig[iScanPos];
+                rdStats.ui64SigCost_0 = uiCostSig[iScanPos];
             }
             if (piDstCoeff[uiBlkPos])
             {
                 uiSigCoeffGroupFlag[uiCGBlkPos] = 1;
-                rdStats.d64CodedLevelandDist += pdCostCoeff[iScanPos] - pdCostSig[iScanPos];
-                rdStats.d64UncodedDist += pdCostCoeff0[iScanPos];
+                rdStats.ui64CodedLevelandDist += uiCostCoeff[iScanPos] - uiCostSig[iScanPos];
+                rdStats.ui64UncodedDist += uiCostCoeff0[iScanPos];
                 if (iScanPosinCG != 0)
                 {
                     rdStats.iNNZbeforePos0++;
@@ -1400,14 +1397,14 @@
 
         if (iCGLastScanPos >= 0)
         {
-            pdCostCoeffGroupSig[iCGScanPos] = 0;
+            uiCostCoeffGroupSig[iCGScanPos] = 0;
             if (iCGScanPos)
             {
                 if (uiSigCoeffGroupFlag[uiCGBlkPos] == 0)
                 {
-                    UInt  uiCtxSig = getSigCoeffGroupCtxInc(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
-                    d64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.d64SigCost;
-                    pdCostCoeffGroupSig[iCGScanPos] = xGetRateSigCoeffGroup(0, uiCtxSig);
+                    UInt uiCtxSig = getSigCoeffGroupCtxInc(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
+                    ui64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.ui64SigCost;
+                    uiCostCoeffGroupSig[iCGScanPos] = xGetRateSigCoeffGroup(0, uiCtxSig);
                 }
                 else
                 {
@@ -1415,34 +1412,34 @@
                     {
                         if (rdStats.iNNZbeforePos0 == 0)
                         {
-                            d64BaseCost -= rdStats.d64SigCost_0;
-                            rdStats.d64SigCost -= rdStats.d64SigCost_0;
+                            ui64BaseCost -= rdStats.ui64SigCost_0;
+                            rdStats.ui64SigCost -= rdStats.ui64SigCost_0;
                         }
                         // rd-cost if SigCoeffGroupFlag = 0, initialization
-                        Double d64CostZeroCG = d64BaseCost;
+                        UInt64 ui64CostZeroCG = ui64BaseCost;
 
                         // add SigCoeffGroupFlag cost to total cost
-                        UInt  uiCtxSig = getSigCoeffGroupCtxInc(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
+                        UInt uiCtxSig = getSigCoeffGroupCtxInc(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
                         if (iCGScanPos < iCGLastScanPos)
                         {
-                            d64BaseCost  += xGetRateSigCoeffGroup(1, uiCtxSig);
-                            d64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig);
-                            pdCostCoeffGroupSig[iCGScanPos] = xGetRateSigCoeffGroup(1, uiCtxSig);
+                            ui64BaseCost += xGetRateSigCoeffGroup(1, uiCtxSig);
+                            ui64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig);
+                            uiCostCoeffGroupSig[iCGScanPos] = xGetRateSigCoeffGroup(1, uiCtxSig);
                         }
 
                         // try to convert the current coeff group from non-zero to all-zero
-                        d64CostZeroCG += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels
-                        d64CostZeroCG -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels
-                        d64CostZeroCG -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels
+                        ui64CostZeroCG += rdStats.ui64UncodedDist; // distortion for resetting non-zero levels to zero levels
+                        ui64CostZeroCG -= rdStats.ui64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels
+                        ui64CostZeroCG -= rdStats.ui64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels
 
                         // if we can save cost, change this block to all-zero block
-                        if (d64CostZeroCG < d64BaseCost)
+                        if (ui64CostZeroCG < ui64BaseCost)
                         {
                             uiSigCoeffGroupFlag[uiCGBlkPos] = 0;
-                            d64BaseCost = d64CostZeroCG;
+                            ui64BaseCost = ui64CostZeroCG;
                             if (iCGScanPos < iCGLastScanPos)
                             {
-                                pdCostCoeffGroupSig[iCGScanPos] = xGetRateSigCoeffGroup(0, uiCtxSig);
+                                uiCostCoeffGroupSig[iCGScanPos] = xGetRateSigCoeffGroup(0, uiCtxSig);
                             }
                             // reset coeffs to 0 in this block
                             for (Int iScanPosinCG = uiCGSize - 1; iScanPosinCG >= 0; iScanPosinCG--)
@@ -1453,8 +1450,8 @@
                                 if (piDstCoeff[uiBlkPos])
                                 {
                                     piDstCoeff[uiBlkPos] = 0;
-                                    pdCostCoeff[iScanPos] = pdCostCoeff0[iScanPos];
-                                    pdCostSig[iScanPos] = 0;
+                                    uiCostCoeff[iScanPos] = uiCostCoeff0[iScanPos];
+                                    uiCostSig[iScanPos] = 0;
                                 }
                             }
                         } // end if ( d64CostAllZeros < d64BaseCost )
@@ -1474,21 +1471,21 @@
         return;
     }
 
-    Double  d64BestCost         = 0;
+    UInt64  ui64BestCost        = 0;
     Int     ui16CtxCbf          = 0;
     Int     iBestLastIdxP1      = 0;
     if (!pcCU->isIntra(uiAbsPartIdx) && eTType == TEXT_LUMA && pcCU->getTransformIdx(uiAbsPartIdx) == 0)
     {
-        ui16CtxCbf   = 0;
-        d64BestCost  = d64BlockUncodedCost + xGetICost(m_pcEstBitsSbac->blockRootCbpBits[ui16CtxCbf][0]);
-        d64BaseCost += xGetICost(m_pcEstBitsSbac->blockRootCbpBits[ui16CtxCbf][1]);
+        ui16CtxCbf    = 0;
+        ui64BestCost  = ui64BlockUncodedCost + xGetICost(m_pcEstBitsSbac->blockRootCbpBits[ui16CtxCbf][0]);
+        ui64BaseCost += xGetICost(m_pcEstBitsSbac->blockRootCbpBits[ui16CtxCbf][1]);
     }
     else
     {
-        ui16CtxCbf   = pcCU->getCtxQtCbf(eTType, pcCU->getTransformIdx(uiAbsPartIdx));
-        ui16CtxCbf   = (eTType ? TEXT_CHROMA : eTType) * NUM_QT_CBF_CTX + ui16CtxCbf;
-        d64BestCost  = d64BlockUncodedCost + xGetICost(m_pcEstBitsSbac->blockCbpBits[ui16CtxCbf][0]);
-        d64BaseCost += xGetICost(m_pcEstBitsSbac->blockCbpBits[ui16CtxCbf][1]);
+        ui16CtxCbf    = pcCU->getCtxQtCbf(eTType, pcCU->getTransformIdx(uiAbsPartIdx));
+        ui16CtxCbf    = (eTType ? TEXT_CHROMA : eTType) * NUM_QT_CBF_CTX + ui16CtxCbf;
+        ui64BestCost  = ui64BlockUncodedCost + xGetICost(m_pcEstBitsSbac->blockCbpBits[ui16CtxCbf][0]);
+        ui64BaseCost += xGetICost(m_pcEstBitsSbac->blockCbpBits[ui16CtxCbf][1]);
     }
 
     Bool bFoundLast = false;
@@ -1496,7 +1493,7 @@
     {
         UInt uiCGBlkPos = scanCG[iCGScanPos];
 
-        d64BaseCost -= pdCostCoeffGroupSig[iCGScanPos];
+        ui64BaseCost -= uiCostCoeffGroupSig[iCGScanPos];
         if (uiSigCoeffGroupFlag[uiCGBlkPos])
         {
             for (Int iScanPosinCG = uiCGSize - 1; iScanPosinCG >= 0; iScanPosinCG--)
@@ -1510,25 +1507,25 @@
                     UInt   uiPosY       = uiBlkPos >> uiLog2BlkSize;
                     UInt   uiPosX       = uiBlkPos - (uiPosY << uiLog2BlkSize);
 
-                    Double d64CostLast = uiScanIdx == SCAN_VER ? xGetRateLast(uiPosY, uiPosX) : xGetRateLast(uiPosX, uiPosY);
-                    Double totalCost = d64BaseCost + d64CostLast - pdCostSig[iScanPos];
+                    UInt64 ui64CostLast = uiScanIdx == SCAN_VER ? xGetRateLast(uiPosY, uiPosX) : xGetRateLast(uiPosX, uiPosY);
+                    UInt64 totalCost = ui64BaseCost + ui64CostLast - uiCostSig[iScanPos];
 
-                    if (totalCost < d64BestCost)
+                    if (totalCost < ui64BestCost)
                     {
                         iBestLastIdxP1  = iScanPos + 1;
-                        d64BestCost     = totalCost;
+                        ui64BestCost    = totalCost;
                     }
                     if (piDstCoeff[uiBlkPos] > 1)
                     {
                         bFoundLast = true;
                         break;
                     }
-                    d64BaseCost      -= pdCostCoeff[iScanPos];
-                    d64BaseCost      += pdCostCoeff0[iScanPos];
+                    ui64BaseCost -= uiCostCoeff[iScanPos];
+                    ui64BaseCost += uiCostCoeff0[iScanPos];
                 }
                 else
                 {
-                    d64BaseCost      -= pdCostSig[iScanPos];
+                    ui64BaseCost -= uiCostSig[iScanPos];
                 }
             } //end for
 
@@ -1557,7 +1554,7 @@
     {
         Int64 rdFactor = (Int64)(
                 g_invQuantScales[m_cQP.rem()] * g_invQuantScales[m_cQP.rem()] * (1 << (2 * m_cQP.m_iPer))
-                / m_dLambda / 16 / (1 << DISTORTION_PRECISION_ADJUSTMENT(2 * (uiBitDepth - 8)))
+                / xGetICost(1) / 16 / (1 << DISTORTION_PRECISION_ADJUSTMENT(2 * (uiBitDepth - 8)))
                 + 0.5);
         Int lastCG = -1;
         Int absSum = 0;
@@ -1789,9 +1786,9 @@
  * \returns best quantized transform level for given scan position
  * This method calculates the best quantized transform level for a given scan position.
  */
-__inline UInt TComTrQuant::xGetCodedLevel(Double& rd64CodedCost,
-                                          Double& rd64CodedCost0,
-                                          Double& rd64CodedCostSig,
+__inline UInt TComTrQuant::xGetCodedLevel(UInt64& rui64CodedCost,
+                                          UInt64& rui64CodedCost0,
+                                          UInt64& rui64CodedCostSig,
                                           Int     lLevelDouble,
                                           UInt    uiMaxAbsLevel,
                                           UShort  ui16CtxNumSig,
@@ -1801,16 +1798,16 @@
                                           UInt    c1Idx,
                                           UInt    c2Idx,
                                           Int     iQBits,
-                                          Double  dTemp,
+                                          UInt64  uiErrorScale,
                                           Bool    bLast) const
 {
-    Double dCurrCostSig   = 0;
+    UInt64 uiCurrCostSig  = 0;
     UInt   uiBestAbsLevel = 0;
 
     if (!bLast && uiMaxAbsLevel < 3)
     {
-        rd64CodedCostSig    = xGetRateSigCoef(0, ui16CtxNumSig);
-        rd64CodedCost       = rd64CodedCost0 + rd64CodedCostSig;
+        rui64CodedCostSig    = xGetRateSigCoef(0, ui16CtxNumSig);
+        rui64CodedCost       = rui64CodedCost0 + rui64CodedCostSig;
         if (uiMaxAbsLevel == 0)
         {
             return uiBestAbsLevel;
@@ -1818,26 +1815,26 @@
     }
     else
     {
-        rd64CodedCost       = MAX_DOUBLE;
+        rui64CodedCost = MAX_INT64;
     }
 
     if (!bLast)
     {
-        dCurrCostSig        = xGetRateSigCoef(1, ui16CtxNumSig);
+        uiCurrCostSig = xGetRateSigCoef(1, ui16CtxNumSig);
     }
 
-    UInt uiMinAbsLevel    = (uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1);
-    for (Int uiAbsLevel  = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel; uiAbsLevel--)
+    UInt uiMinAbsLevel = (uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1);
+    for (Int uiAbsLevel = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel; uiAbsLevel--)
     {
-        Double dErr         = Double(lLevelDouble  - (uiAbsLevel << iQBits));
-        Double dCurrCost    = dErr * dErr * dTemp + xGetICRateCost(uiAbsLevel, ui16CtxNumOne, ui16CtxNumAbs, ui16AbsGoRice, c1Idx, c2Idx);
-        dCurrCost          += dCurrCostSig;
+        UInt64 uiErr         = lLevelDouble - (uiAbsLevel << iQBits);
+        UInt64 uiCurrCost    = xApplyScale(uiErr * uiErr, uiErrorScale) + xGetICRateCost(uiAbsLevel, ui16CtxNumOne, ui16CtxNumAbs, ui16AbsGoRice, c1Idx, c2Idx);
+        uiCurrCost          += uiCurrCostSig;
 
-        if (dCurrCost < rd64CodedCost)
+        if (uiCurrCost < rui64CodedCost)
         {
             uiBestAbsLevel    = uiAbsLevel;
-            rd64CodedCost     = dCurrCost;
-            rd64CodedCostSig  = dCurrCostSig;
+            rui64CodedCost    = uiCurrCost;
+            rui64CodedCostSig = uiCurrCostSig;
         }
     }
 
@@ -1851,19 +1848,19 @@
  * \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
  * \returns cost of given absolute transform level
  */
-__inline Double TComTrQuant::xGetICRateCost(UInt uiAbsLevel,
+__inline UInt64 TComTrQuant::xGetICRateCost(UInt uiAbsLevel,
                                             UShort ui16CtxNumOne,
                                             UShort ui16CtxNumAbs,
                                             UShort ui16AbsGoRice,
                                             UInt c1Idx,
                                             UInt c2Idx) const
 {
-    Double iRate = xGetIEPRate();
-    UInt baseLevel  =  (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
+    UInt iRate = xGetIEPRate();
+    UInt baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
 
     if (uiAbsLevel >= baseLevel)
     {
-        UInt symbol     = uiAbsLevel - baseLevel;
+        UInt symbol = uiAbsLevel - baseLevel;
         UInt length;
         if (symbol < (COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice))
         {
@@ -1974,11 +1971,11 @@
  * \param uiPosY Y coordinate of the last significant coefficient
  * \returns cost of last significant coefficient
  */
-__inline Double TComTrQuant::xGetRateLast(UInt uiPosX, UInt uiPosY) const
+__inline UInt64 TComTrQuant::xGetRateLast(UInt uiPosX, UInt uiPosY) const
 {
-    UInt uiCtxX   = g_uiGroupIdx[uiPosX];
-    UInt uiCtxY   = g_uiGroupIdx[uiPosY];
-    Double uiCost = m_pcEstBitsSbac->lastXBits[uiCtxX] + m_pcEstBitsSbac->lastYBits[uiCtxY];
+    UInt uiCtxX = g_uiGroupIdx[uiPosX];
+    UInt uiCtxY = g_uiGroupIdx[uiPosY];
+    UInt uiCost = m_pcEstBitsSbac->lastXBits[uiCtxX] + m_pcEstBitsSbac->lastYBits[uiCtxY];
 
     if (uiCtxX > 3)
     {
@@ -2020,7 +2017,7 @@
 }
 
 /** set quantized matrix coefficient for encode
- * \param scalingList quantaized matrix address
+ * \param scalingList quantized matrix address
  */
 Void TComTrQuant::setScalingList(TComScalingList *scalingList)
 {
@@ -2042,7 +2039,7 @@
 }
 
 /** set quantized matrix coefficient for decode
- * \param scalingList quantaized matrix address
+ * \param scalingList quantized matrix address
  */
 Void TComTrQuant::setScalingListDec(TComScalingList *scalingList)
 {
@@ -2072,23 +2069,20 @@
     Int bitDepth = (size < SCALING_LIST_32x32 && list != 0 && list != 3) ? g_bitDepthC : g_bitDepthY;
     Int iTransformShift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize; // Represents scaling through forward transform
 
-    UInt i, uiMaxNumCoeff = g_scalingListSize[size];
-    Int *piQuantcoeff;
-    Double *pdErrScale;
+    UInt uiMaxNumCoeff = g_scalingListSize[size];
+    Int *piQuantcoeff  = getQuantCoeff(list, qp, size);
+    UInt64 *piErrScale = getErrScaleCoeff(list, size, qp);
 
-    piQuantcoeff   = getQuantCoeff(list, qp, size);
-    pdErrScale     = getErrScaleCoeff(list, size, qp);
-
-    Double dErrScale = (Double)(1 << SCALE_BITS);                          // Compensate for scaling of bitcount in Lagrange cost function
+    Double dErrScale = (Double)(1 << SCALE_BITS);                          // Compensate for scaling of bit count in Lagrange cost function
     dErrScale = dErrScale * pow(2.0, -2.0 * iTransformShift);              // Compensate for scaling through forward transform
-    for (i = 0; i < uiMaxNumCoeff; i++)
+    for (UInt i = 0; i < uiMaxNumCoeff; i++)
     {
-        pdErrScale[i] = dErrScale / piQuantcoeff[i] / piQuantcoeff[i] / (1 << DISTORTION_PRECISION_ADJUSTMENT(2 * (bitDepth - 8)));
+        piErrScale[i] = (UInt64)floor((1LL<<28) * (dErrScale / piQuantcoeff[i] / piQuantcoeff[i] / (1 << DISTORTION_PRECISION_ADJUSTMENT(2 * (bitDepth - 8)))));
     }
 }
 
 /** set quantized matrix coefficient for encode
- * \param scalingList quantaized matrix address
+ * \param scalingList quantized matrix address
  * \param listId List index
  * \param sizeId size index
  * \param uiQP Quantization parameter
@@ -2102,12 +2096,11 @@
     Int *coeff = scalingList->getScalingListAddress(sizeId, listId);
 
     quantcoeff   = getQuantCoeff(listId, qp, sizeId);
-
     processScalingListEnc(coeff, quantcoeff, g_quantScales[qp] << 4, height, width, ratio, min(MAX_MATRIX_SIZE_NUM, (Int)g_scalingListSizeX[sizeId]), scalingList->getScalingListDC(sizeId, listId));
 }
 
 /** set quantized matrix coefficient for decode
- * \param scalingList quantaized matrix address
+ * \param scalingList quantized matrix address
  * \param list List index
  * \param size size index
  * \param uiQP Quantization parameter
@@ -2133,7 +2126,7 @@
 
     for (size = 0; size < SCALING_LIST_SIZE_NUM; size++)
     {
-        for (list = 0; list <  g_scalingListNum[size]; list++)
+        for (list = 0; list < g_scalingListNum[size]; list++)
         {
             for (qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
             {
@@ -2168,8 +2161,8 @@
 }
 
 /** set quantized matrix coefficient for encode
- * \param coeff quantaized matrix address
- * \param quantcoeff quantaized matrix address
+ * \param coeff quantized matrix address
+ * \param quantcoeff quantized matrix address
  * \param quantScales Q(QP%6)
  * \param height height
  * \param width width
@@ -2197,8 +2190,8 @@
 }
 
 /** set quantized matrix coefficient for decode
- * \param coeff quantaized matrix address
- * \param dequantcoeff quantaized matrix address
+ * \param coeff quantized matrix address
+ * \param dequantcoeff quantized matrix address
  * \param invQuantScales IQ(QP%6))
  * \param height height
  * \param width width
@@ -2222,6 +2215,17 @@
     }
 }
 
+Void TComTrQuant::setLambda(Double dLambdaLuma, Double dLambdaChroma)
+{ 
+    m_uiLambdaLuma = (UInt64)floor(65536.0 * dLambdaLuma);
+    m_uiLambdaChroma = (UInt64)floor(65536.0 * dLambdaChroma);
+}
+
+Void TComTrQuant::selectLambda(TextType eTType)
+{
+    m_uiLambda = (eTType == TEXT_LUMA) ? m_uiLambdaLuma : m_uiLambdaChroma;
+}
+
 /** initialization process of scaling list array
  */
 Void TComTrQuant::initScalingList()
@@ -2234,7 +2238,7 @@
             {
                 m_quantCoef[sizeId][listId][qp] = new Int[g_scalingListSize[sizeId]];
                 m_dequantCoef[sizeId][listId][qp] = new Int[g_scalingListSize[sizeId]];
-                m_errScale[sizeId][listId][qp] = new Double[g_scalingListSize[sizeId]];
+                m_errScale[sizeId][listId][qp] = new UInt64[g_scalingListSize[sizeId]];
             }
         }
     }
diff -r f9e9b981602e -r 661b0c1b9611 source/Lib/TLibCommon/TComTrQuant.h
--- a/source/Lib/TLibCommon/TComTrQuant.h	Tue Jun 18 21:40:42 2013 -0500
+++ b/source/Lib/TLibCommon/TComTrQuant.h	Wed Jun 19 00:07:28 2013 -0500
@@ -142,47 +142,40 @@
                       UInt         uiAbsPartIdx,
                       Bool         useTransformSkip = false);
 
-    Void invtransformNxN(Bool transQuantBypass, TextType eText, UInt uiMode, Short* rpcResidual, UInt uiStride, TCoeff*   pcCoeff, UInt uiWidth, UInt uiHeight,  Int scalingListType, Bool useTransformSkip = false);
-    Void invRecurTransformNxN(TComDataCU* pcCU, UInt uiAbsPartIdx, TextType eTxt, Short* rpcResidual, UInt uiAddr,   UInt uiStride, UInt uiWidth, UInt uiHeight,
+    Void invtransformNxN(Bool transQuantBypass, TextType eText, UInt uiMode, Short* rpcResidual, UInt uiStride, TCoeff* pcCoeff, UInt uiWidth, UInt uiHeight, Int scalingListType, Bool useTransformSkip = false);
+    Void invRecurTransformNxN(TComDataCU* pcCU, UInt uiAbsPartIdx, TextType eTxt, Short* rpcResidual, UInt uiAddr, UInt uiStride, UInt uiWidth, UInt uiHeight,
                               UInt uiMaxTrMode,  UInt uiTrMode, TCoeff* rpcCoeff);
 
     // Misc functions
     Void setQPforQuant(Int qpy, TextType eTxtType, Int qpBdOffset, Int chromaQPOffset);
 
-    Void setLambda(Double dLambdaLuma, Double dLambdaChroma) { m_dLambdaLuma = dLambdaLuma; m_dLambdaChroma = dLambdaChroma; }
+    Void setLambda(Double dLambdaLuma, Double dLambdaChroma);
 
-    Void selectLambda(TextType eTType) { m_dLambda = (eTType == TEXT_LUMA) ? m_dLambdaLuma : m_dLambdaChroma; }
+    Void selectLambda(TextType eTType);
 
     Void setRDOQOffset(UInt uiRDOQOffset) { m_uiRDOQOffset = uiRDOQOffset; }
 
     estBitsSbacStruct* m_pcEstBitsSbac;
 
-    static Int      calcPatternSigCtx(const UInt* sigCoeffGroupFlag, UInt posXCG, UInt posYCG, Int width, Int height);
+    static Int calcPatternSigCtx(const UInt* sigCoeffGroupFlag, UInt posXCG, UInt posYCG, Int width, Int height);
 
-    static Int      getSigCtxInc(Int      patternSigCtx,
-                                 UInt     scanIdx,
-                                 Int      posX,
-                                 Int      posY,
-                                 Int      log2BlkSize,
-                                 TextType textureType);
+    static Int getSigCtxInc(Int      patternSigCtx,
+                            UInt     scanIdx,
+                            Int      posX,
+                            Int      posY,
+                            Int      log2BlkSize,
+                            TextType textureType);
 
     static UInt getSigCoeffGroupCtxInc(const UInt* uiSigCoeffGroupFlag,
                                        const UInt uiCGPosX,
                                        const UInt uiCGPosY,
                                        Int width, Int height);
+
     Void initScalingList();
     Void destroyScalingList();
     Void setErrScaleCoeff(UInt list, UInt size, UInt qp);
-    Double* getErrScaleCoeff(UInt list, UInt size, UInt qp) { return m_errScale[size][list][qp]; }   //!< get Error Scale Coefficent
-
-    Int* getQuantCoeff(UInt list, UInt qp, UInt size) { return m_quantCoef[size][list][qp]; }        //!< get Quant Coefficent
-
-    Int* getDequantCoeff(UInt list, UInt qp, UInt size) { return m_dequantCoef[size][list][qp]; }    //!< get DeQuant Coefficent
-
     Void setUseScalingList(Bool bUseScalingList) { m_scalingListEnabledFlag = bUseScalingList; }
-
     Bool getUseScalingList() { return m_scalingListEnabledFlag; }
-
     Void setFlatScalingList();
     Void xsetFlatScalingList(UInt list, UInt size, UInt qp);
     Void xSetScalingListEnc(TComScalingList *scalingList, UInt list, UInt size, UInt qp);
@@ -191,14 +184,12 @@
     Void setScalingListDec(TComScalingList *scalingList);
     Void processScalingListEnc(Int *coeff, Int *quantcoeff, Int quantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc);
     Void processScalingListDec(Int *coeff, Int *dequantcoeff, Int invQuantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc);
-    Void    initSliceQpDelta();
-    Void    storeSliceQpNext(TComSlice* pcSlice);
-    Void    clearSliceARLCnt();
-    Int     getQpDelta(Int qp) { return m_qpDelta[qp]; }
-
-    Int*    getSliceNSamples() { return m_sliceNsamples; }
-
-    Double* getSliceSumC()    { return m_sliceSumC; }
+    Void initSliceQpDelta();
+    Void storeSliceQpNext(TComSlice* pcSlice);
+    Void clearSliceARLCnt();
+    Int  getQpDelta(Int qp) { return m_qpDelta[qp]; }
+    Int* getSliceNSamples() { return m_sliceNsamples; }
+    Double* getSliceSumC()  { return m_sliceSumC; }
 
 protected:
 
@@ -209,9 +200,9 @@
 
     QpParam  m_cQP;
 
-    Double   m_dLambda;
-    Double   m_dLambdaLuma;
-    Double   m_dLambdaChroma;
+    UInt64   m_uiLambda;
+    UInt64   m_uiLambdaLuma;
+    UInt64   m_uiLambdaChroma;
 
     UInt     m_uiRDOQOffset;
     UInt     m_uiMaxTrSize;
@@ -221,13 +212,19 @@
     Bool     m_bUseAdaptQpSelect;
     Bool     m_useTransformSkipFast;
     Bool     m_scalingListEnabledFlag;
-    Int      *m_quantCoef[SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM][SCALING_LIST_REM_NUM];     ///< array of quantization matrix coefficient 4x4
-    Int      *m_dequantCoef[SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM][SCALING_LIST_REM_NUM];   ///< array of dequantization matrix coefficient 4x4
+    Int     *m_quantCoef[SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM][SCALING_LIST_REM_NUM];     ///< array of quantization matrix coefficient 4x4
+    Int     *m_dequantCoef[SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM][SCALING_LIST_REM_NUM];   ///< array of dequantization matrix coefficient 4x4
 
-    Double   *m_errScale[SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM][SCALING_LIST_REM_NUM];
+    UInt64  *m_errScale[SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM][SCALING_LIST_REM_NUM];
 
 private:
 
+    UInt64* getErrScaleCoeff(UInt list, UInt size, UInt qp) { return m_errScale[size][list][qp]; }    //!< get Error Scale Coefficent
+
+    Int* getQuantCoeff(UInt list, UInt qp, UInt size) { return m_quantCoef[size][list][qp]; }        //!< get Quant Coefficent
+
+    Int* getDequantCoeff(UInt list, UInt qp, UInt size) { return m_dequantCoef[size][list][qp]; }    //!< get DeQuant Coefficent
+
     // skipping Transform
     Void xTransformSkip(Int bitDepth, Short* piBlkResi, UInt uiStride, Int* psCoeff, Int width, Int height);
 
@@ -246,19 +243,19 @@
 
     // RDOQ functions
 
-    Void           xRateDistOptQuant(TComDataCU * pcCU,
-                                     Int *        plSrcCoeff,
-                                     TCoeff *     piDstCoeff,
-                                     Int * &      piArlDstCoeff,
-                                     UInt         uiWidth,
-                                     UInt         uiHeight,
-                                     UInt &       uiAbsSum,
-                                     TextType     eTType,
-                                     UInt         uiAbsPartIdx);
+    Void xRateDistOptQuant(TComDataCU * pcCU,
+                           Int *        plSrcCoeff,
+                           TCoeff *     piDstCoeff,
+                           Int * &      piArlDstCoeff,
+                           UInt         uiWidth,
+                           UInt         uiHeight,
+                           UInt &       uiAbsSum,
+                           TextType     eTType,
+                           UInt         uiAbsPartIdx);
 
-    __inline UInt xGetCodedLevel(Double& rd64CodedCost,
-                                 Double& rd64CodedCost0,
-                                 Double& rd64CodedCostSig,
+    __inline UInt xGetCodedLevel(UInt64& rui64CodedCost,
+                                 UInt64& rui64CodedCost0,
+                                 UInt64& rui64CodedCostSig,
                                  Int     lLevelDouble,
                                  UInt    uiMaxAbsLevel,
                                  UShort  ui16CtxNumSig,
@@ -268,10 +265,10 @@
                                  UInt    c1Idx,
                                  UInt    c2Idx,
                                  Int     iQBits,
-                                 Double  dTemp,
+                                 UInt64  uiErrScale,
                                  Bool    bLast) const;
 
-    __inline Double xGetICRateCost(UInt uiAbsLevel,
+    __inline UInt64 xGetICRateCost(UInt uiAbsLevel,
                                    UShort ui16CtxNumOne,
                                    UShort ui16CtxNumAbs,
                                    UShort ui16AbsGoRice,
@@ -285,12 +282,12 @@
                             UInt   c1Idx,
                             UInt   c2Idx) const;
 
-    __inline Double xGetRateLast(UInt uiPosX, UInt uiPosY) const;
-
-    __inline Double xGetRateSigCoeffGroup(UShort uiSignificanceCoeffGroup, UShort ui16CtxNumSig) const { return m_dLambda * m_pcEstBitsSbac->significantCoeffGroupBits[ui16CtxNumSig][uiSignificanceCoeffGroup]; }
-    __inline Double xGetRateSigCoef(UShort uiSignificance, UShort ui16CtxNumSig) const { return m_dLambda * m_pcEstBitsSbac->significantBits[ui16CtxNumSig][uiSignificance]; }
-    __inline Double xGetICost(Double dRate) const { return m_dLambda * dRate; } ///< Get the cost for a specific rate
-    __inline Double xGetIEPRate() const           { return 32768; }             ///< Get the cost of an equal probable bit
+    __inline UInt64 xGetRateLast(UInt uiPosX, UInt uiPosY) const;
+    __inline UInt64 xGetRateSigCoeffGroup(UShort uiSignificanceCoeffGroup, UShort ui16CtxNumSig) const { return xGetICost(m_pcEstBitsSbac->significantCoeffGroupBits[ui16CtxNumSig][uiSignificanceCoeffGroup]); }
+    __inline UInt64 xGetRateSigCoef(UShort uiSignificance, UShort ui16CtxNumSig) const { return xGetICost(m_pcEstBitsSbac->significantBits[ui16CtxNumSig][uiSignificance]); }
+    __inline UInt64 xGetICost(UInt64 bits) const                    { return (m_uiLambda * bits + 32768) >> 16; }       ///< Get the cost for a specific rate
+    __inline UInt   xGetIEPRate() const                             { return 32768; }                                   ///< Get the cost of an equal probable bit
+    __inline UInt   xApplyScale(UInt64 cost, UInt64 errScale) const { return (UInt)((errScale * cost + (1LL << 27)) >> 28); }
 
     // dequantization
     Void xDeQuant(Int bitDepth, const TCoeff* pSrc, Int* pDes, Int iWidth, Int iHeight, Int scalingListType);



[x265] [PATCH] TComTrQuant: switch from Double to UInt64 cost scaling *BUGGY*

[x265] [PATCH] TComTrQuant: switch from Double to UInt64 cost scaling BUGGY