[x265] [PATCH 1 of 3] optimize: rewrite TComTrQuant::xGetCodedLevel

Fri Mar 21 02:41:39 CET 2014

On Tue, Mar 18, 2014 at 7:34 PM, Min Chen <chenm003 at 163.com> wrote:
> # HG changeset patch
> # User Min Chen <chenm003 at 163.com>
> # Date 1395189192 25200
> # Node ID e9f08d038ff0dc17152002d2b2b70138d08465d7
> # Parent  dc700298419d382e58c49d4ea62a3d7398b4beaf
> optimize: rewrite TComTrQuant::xGetCodedLevel
>
> diff -r dc700298419d -r e9f08d038ff0 source/Lib/TLibCommon/TComTrQuant.cpp
> --- a/source/Lib/TLibCommon/TComTrQuant.cpp     Tue Mar 18 08:46:15 2014 -0500
> +++ b/source/Lib/TLibCommon/TComTrQuant.cpp     Tue Mar 18 17:33:12 2014 -0700
> @@ -630,19 +630,34 @@
>                  uint32_t level;
>                  uint32_t oneCtx = 4 * ctxSet + c1;
>                  uint32_t absCtx = ctxSet + c2;
> +                double curCostSig = 0;
>
> +                costCoeff[scanPos] = MAX_DOUBLE;
>                  if (scanPos == lastScanPos)
>                  {
> -                    level = xGetCodedLevel(costCoeff[scanPos], costCoeff0[scanPos], costSig[scanPos],
> -                                           levelDouble, maxAbsLevel, baseLevel, 0, oneCtx, absCtx, goRiceParam,
> +                    level = xGetCodedLevel(costCoeff[scanPos], curCostSig, costSig[scanPos],
> +                                           levelDouble, maxAbsLevel, baseLevel, oneCtx, absCtx, goRiceParam,
>                                             c1c2Idx, qbits, scaleFactor, 1);
>                  }
>                  else
>                  {
> -                    uint16_t ctxSig = getSigCtxInc(patternSigCtx, log2TrSize, trSize, blkPos, codingParameters);
> -                    level           = xGetCodedLevel(costCoeff[scanPos], costCoeff0[scanPos], costSig[scanPos],
> -                                                     levelDouble, maxAbsLevel, baseLevel, ctxSig, oneCtx, absCtx, goRiceParam,
> -                                                     c1c2Idx, qbits, scaleFactor, 0);
> +                    const uint32_t ctxSig = getSigCtxInc(patternSigCtx, log2TrSize, trSize, blkPos, codingParameters);
> +                    if (maxAbsLevel < 3)
> +                    {
> +                        costSig[scanPos] = xGetRateSigCoef(0, ctxSig);
> +                        costCoeff[scanPos] = costCoeff0[scanPos] + costSig[scanPos];
> +                    }
> +                    if (maxAbsLevel != 0)
> +                    {
> +                        curCostSig = xGetRateSigCoef(1, ctxSig);
> +                        level = xGetCodedLevel(costCoeff[scanPos], curCostSig, costSig[scanPos],
> +                                               levelDouble, maxAbsLevel, baseLevel, oneCtx, absCtx, goRiceParam,
> +                                               c1c2Idx, qbits, scaleFactor, 0);
> +                    }
> +                    else
> +                    {
> +                        level = 0;
> +                    }
>                      sigRateDelta[blkPos] = m_estBitsSbac->significantBits[ctxSig][1] - m_estBitsSbac->significantBits[ctxSig][0];
>                  }
>                  deltaU[blkPos] = (levelDouble - ((int)level << qbits)) >> (qbits - 8);
> @@ -1125,12 +1140,11 @@
>   * This method calculates the best quantized transform level for a given scan position.
>   */
>  inline uint32_t TComTrQuant::xGetCodedLevel(double&  codedCost,
> -                                            double&  codedCost0,
> +                                            const double curCostSig,
>                                              double&  codedCostSig,
>                                              int      levelDouble,
>                                              uint32_t maxAbsLevel,
>                                              uint32_t baseLevel,
> -                                            uint32_t ctxNumSig,
>                                              uint32_t ctxNumOne,
>                                              uint32_t ctxNumAbs,
>                                              uint32_t absGoRice,
> @@ -1139,43 +1153,45 @@
>                                              double   scaleFactor,
>                                              bool     last) const
>  {
> -    double curCostSig   = 0;
>      uint32_t   bestAbsLevel = 0;
>
> -    if (!last && maxAbsLevel < 3)
> +    if (!last && maxAbsLevel == 0)
>      {
> -        codedCostSig = xGetRateSigCoef(0, ctxNumSig);
> -        codedCost    = codedCost0 + codedCostSig;
> -        if (maxAbsLevel == 0)
> -        {
> -            return bestAbsLevel;
> -        }
> -    }
> -    else
> -    {
> -        codedCost = MAX_DOUBLE;
> +        assert(0);
>      }
>
> -    if (!last)
> -    {
> -        curCostSig = xGetRateSigCoef(1, ctxNumSig);
> -    }
> +    int32_t minAbsLevel = maxAbsLevel - 1;
> +    if (minAbsLevel < 1)
> +        minAbsLevel = 1;
>
> -    uint32_t minAbsLevel = (maxAbsLevel > 1 ? maxAbsLevel - 1 : 1);
> +    // NOTE: (A + B) ^ 2 = (A ^ 2) + 2 * A * B + (B ^ 2)
> +    assert(abs((double)levelDouble - (maxAbsLevel << qbits)) < INT_MAX);
> +    const int32_t err1 = levelDouble - (maxAbsLevel << qbits);            // A
> +          double err2 = (double)((int64_t)err1 * err1);                   // A^ 2
> +    const int64_t err3 = (int64_t)2 * err1 * ((int64_t)1 << qbits);       // 2 * A * B
> +    const int64_t err4 = ((int64_t)1 << qbits) * ((int64_t)1 << qbits);   // B ^ 2
> +    const double errInc = (err3 + err4) * scaleFactor;
> +
> +    err2 *= scaleFactor;
> +
> +    double bestCodedCost = codedCost;
> +    double bestCodedCostSig = codedCostSig;
>      for (int absLevel = maxAbsLevel; absLevel >= minAbsLevel; absLevel--)
>      {
> -        double err     = double(levelDouble  - (absLevel << qbits));
> -        double curCost = err * err * scaleFactor + xGetICRateCost(absLevel, absLevel - baseLevel, ctxNumOne, ctxNumAbs, absGoRice, c1c2Idx);
> +        assert(fabs((double)err2 - double(levelDouble  - (absLevel << qbits)) * double(levelDouble  - (absLevel << qbits)) * scaleFactor) < 1e-5);
> +        double curCost = err2 + xGetICRateCost(absLevel, absLevel - baseLevel, ctxNumOne, ctxNumAbs, absGoRice, c1c2Idx);
>          curCost       += curCostSig;
>
> -        if (curCost < codedCost)
> +        if (curCost < bestCodedCost)
>          {
>              bestAbsLevel = absLevel;
> -            codedCost    = curCost;
> -            codedCostSig = curCostSig;
> +            bestCodedCost = curCost;
> +            bestCodedCostSig = curCostSig;
>          }
> +        err2 += errInc;
>      }
> -
> +    codedCost = bestCodedCost;
> +    codedCostSig = bestCodedCostSig;
>      return bestAbsLevel;
>  }
>
> @@ -1186,7 +1202,7 @@
>   * \param absGoRice Rice parameter for coeff_abs_level_minus3
>   * \returns cost of given absolute transform level
>   */
> -inline double TComTrQuant::xGetICRateCost(uint32_t absLevel,
> +FORCEINLINE double TComTrQuant::xGetICRateCost(uint32_t absLevel,

this macro isn't defined for all compilers:

/Users/steve/repos/x265/source/Lib/TLibCommon/TComTrQuant.cpp:1204:1:
error: unknown type name 'FORCEINLINE'

>                                            int32_t  diffLevel,
>                                            uint32_t ctxNumOne,
>                                            uint32_t ctxNumAbs,
> diff -r dc700298419d -r e9f08d038ff0 source/Lib/TLibCommon/TComTrQuant.h
> --- a/source/Lib/TLibCommon/TComTrQuant.h       Tue Mar 18 08:46:15 2014 -0500
> +++ b/source/Lib/TLibCommon/TComTrQuant.h       Tue Mar 18 17:33:12 2014 -0700
> @@ -62,7 +62,7 @@
>  typedef struct
>  {
>      int significantCoeffGroupBits[NUM_SIG_CG_FLAG_CTX][2];
> -    int significantBits[NUM_SIG_FLAG_CTX][2];
> +    uint32_t significantBits[NUM_SIG_FLAG_CTX][2];
>      int lastXBits[10];
>      int lastYBits[10];
>      int greaterOneBits[NUM_ONE_FLAG_CTX][2];
> @@ -193,8 +193,8 @@
>      // RDOQ functions
>      uint32_t xRateDistOptQuant(TComDataCU* cu, int32_t* srcCoeff, TCoeff* dstCoeff, uint32_t trSize, TextType ttype, uint32_t absPartIdx, int32_t *lastPos);
>
> -    inline uint32_t xGetCodedLevel(double& codedCost, double& codedCost0, double& codedCostSig, int levelDouble,
> -                                   uint32_t maxAbsLevel, uint32_t baseLevel, uint32_t ctxNumSig, uint32_t ctxNumOne, uint32_t ctxNumAbs, uint32_t absGoRice,
> +    inline uint32_t xGetCodedLevel(double& codedCost, const double curCostSig, double& codedCostSig, int levelDouble,
> +                                   uint32_t maxAbsLevel, uint32_t baseLevel, uint32_t ctxNumOne, uint32_t ctxNumAbs, uint32_t absGoRice,
>                                     uint32_t c1c2Idx, int qbits, double scale, bool bLast) const;
>
>      inline double xGetICRateCost(uint32_t absLevel, int32_t  diffLevel, uint32_t ctxNumOne, uint32_t ctxNumAbs, uint32_t absGoRice, uint32_t c1c2Idx) const;
> @@ -205,7 +205,7 @@
>
>      inline double xGetRateSigCoeffGroup(uint16_t sigCoeffGroup, uint16_t ctxNumSig) const { return m_lambda * m_estBitsSbac->significantCoeffGroupBits[ctxNumSig][sigCoeffGroup]; }
>
> -    inline double xGetRateSigCoef(uint16_t sig, uint16_t ctxNumSig) const { return m_lambda * m_estBitsSbac->significantBits[ctxNumSig][sig]; }
> +    inline double xGetRateSigCoef(uint32_t sig, uint32_t ctxNumSig) const { return m_lambda * m_estBitsSbac->significantBits[ctxNumSig][sig]; }
>
>      inline double xGetICost(double rage) const { return m_lambda * rage; } ///< Get the cost for a specific rate
>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel


-- 
Steve Borho