[x265] [PATCH 6 of 6] reduce operators on rdoQuant() compute sigHide cost

Thu Sep 10 05:43:15 CEST 2015

Nice! Do you see any tangible performance improvement from these fixes to
rdoquant on dekstop/server systems?

Pradeep Ramachandran, PhD
Solution Architect,
Multicoreware Inc.
Ph:   +91 99627 82018

On Thu, Sep 10, 2015 at 4:03 AM, Min Chen <chenm003 at 163.com> wrote:

> # HG changeset patch
> # User Min Chen <chenm003 at 163.com>
> # Date 1441837075 18000
> # Node ID 4a64832b90b8927f409e9239856281b02a1a2475
> # Parent  41afa68a2c9c9e591c2e95264e2edc987df45d62
> reduce operators on rdoQuant() compute sigHide cost
> ---
>  source/common/quant.cpp |   50
> ++++++++++++++++++++++++++--------------------
>  1 files changed, 28 insertions(+), 22 deletions(-)
>
> diff -r 41afa68a2c9c -r 4a64832b90b8 source/common/quant.cpp
> --- a/source/common/quant.cpp   Wed Sep 09 17:17:25 2015 -0500
> +++ b/source/common/quant.cpp   Wed Sep 09 17:17:55 2015 -0500
> @@ -555,7 +555,7 @@
>  template<uint32_t log2TrSize>
>  uint32_t Quant::rdoQuant(const CUData& cu, int16_t* dstCoeff, TextType
> ttype, uint32_t absPartIdx, bool usePsy)
>  {
> -    int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize;
> /* Represents scaling through forward transform */
> +    const int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH -
> log2TrSize; /* Represents scaling through forward transform */
>      int scalingListType = (cu.isIntra(absPartIdx) ? 0 : 3) + ttype;
>      const uint32_t usePsyMask = usePsy ? -1 : 0;
>
> @@ -567,7 +567,7 @@
>      int add = (1 << (qbits - 1));
>      const int32_t* qCoef = m_scalingList->m_quantCoef[log2TrSize -
> 2][scalingListType][rem];
>
> -    int numCoeff = 1 << (log2TrSize * 2);
> +    const int numCoeff = 1 << (log2TrSize * 2);
>      uint32_t numSig = primitives.nquant(m_resiDctCoeff, qCoef, dstCoeff,
> qbits, add, numCoeff);
>      X265_CHECK((int)numSig == primitives.cu[log2TrSize -
> 2].count_nonzero(dstCoeff), "numSig differ\n");
>      if (!numSig)
> @@ -583,7 +583,7 @@
>      const int32_t* unquantScale = m_scalingList->m_dequantCoef[log2TrSize
> - 2][scalingListType][rem];
>      int unquantShift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - transformShift
> + (m_scalingList->m_bEnabled ? 4 : 0);
>      int unquantRound = (unquantShift > per) ? 1 << (unquantShift - per -
> 1) : 0;
> -    int scaleBits = SCALE_BITS - 2 * transformShift;
> +    const int scaleBits = SCALE_BITS - 2 * transformShift;
>
>  #define UNQUANT(lvl)    (((lvl) * (unquantScale[blkPos] << per) +
> unquantRound) >> unquantShift)
>  #define SIGCOST(bits)   ((lambda2 * (bits)) >> 8)
> @@ -1046,7 +1046,7 @@
>                      c2 += (uint32_t)(c2 - 2) >> 31;
>                      c2Idx++;
>                  }
> -                else if ((c1 < 3) && (c1 > 0) && level)
> +                else if (((c1 == 1) | (c1 == 2)) && level)
>                      c1++;
>
>                  if (dstCoeff[blkPos])
> @@ -1276,7 +1276,7 @@
>
>                      int64_t minCostInc = MAX_INT64, curCost = MAX_INT64;
>                      int minPos = -1;
> -                    int16_t finalChange = 0, curChange = 0;
> +                    int8_t finalChange = 0, curChange = 0;
>
>                      for (n = (lastCG ? lastNZPosInCG : SCAN_SET_SIZE -
> 1); n >= 0; --n)
>                      {
> @@ -1285,21 +1285,21 @@
>                          int absLevel    = abs(dstCoeff[blkPos]);
>
>                          int d = abs(signCoef) - UNQUANT(absLevel);
> -                        int64_t origDist = (((int64_t)d * d)) <<
> scaleBits;
> +                        const int64_t origDist = (((int64_t)d * d));
>
> -#define DELTARDCOST(d, deltabits) ((((int64_t)d * d) << scaleBits) -
> origDist + ((lambda2 * (int64_t)(deltabits)) >> 8))
> +#define DELTARDCOST(d0, d, deltabits) ((((int64_t)d * d - d0) <<
> scaleBits) + ((lambda2 * (int64_t)(deltabits)) >> 8))
>
>                          if (dstCoeff[blkPos])
>                          {
>                              d = abs(signCoef) - UNQUANT(absLevel + 1);
> -                            int64_t costUp = DELTARDCOST(d,
> rateIncUp[blkPos]);
> +                            int64_t costUp = DELTARDCOST(origDist, d,
> rateIncUp[blkPos]);
>
>                              /* if decrementing would make the coeff 0, we
> can include the
>                               * significant coeff flag cost savings */
>                              d = abs(signCoef) - UNQUANT(absLevel - 1);
>                              int isOne = (abs(dstCoeff[blkPos]) == 1);
>                              int downBits = rateIncDown[blkPos] - (isOne ?
> (IEP_RATE + sigRateDelta[blkPos]) : 0);
> -                            int64_t costDown = DELTARDCOST(d, downBits);
> +                            int64_t costDown = DELTARDCOST(origDist, d,
> downBits);
>
>                              costDown -= (lastCG & (n == lastNZPosInCG) &
> isOne) * 4 * IEP_RATE;
>                              curCost = ((n == firstNZPosInCG) & isOne) ?
> MAX_INT64 : costDown;
> @@ -1308,8 +1308,7 @@
>                              if (costUp < costDown)
>                                  curCost = costUp;
>                          }
> -                        //else if ((n < firstNZPosInCG) & (signbit !=
> ((uint32_t)signCoef >> 31)))
> -                        else if (n < firstNZPosInCG && signbit !=
> (signCoef >= 0 ? 0 : 1U))
> +                        else if ((n < firstNZPosInCG) & (signbit !=
> ((uint32_t)signCoef >> 31)))
>                          {
>                              /* don't try to make a new coded coeff before
> the first coeff if its
>                               * sign would be different than the first
> coeff, the inferred sign would
> @@ -1320,7 +1319,7 @@
>                          {
>                              /* evaluate changing an uncoded coeff 0 to a
> coded coeff +/-1 */
>                              d = abs(signCoef) - UNQUANT(1);
> -                            curCost = DELTARDCOST(d, rateIncUp[blkPos] +
> IEP_RATE + sigRateDelta[blkPos]);
> +                            curCost = DELTARDCOST(origDist, d,
> rateIncUp[blkPos] + IEP_RATE + sigRateDelta[blkPos]);
>                              curChange = 1;
>                          }
>
> @@ -1332,19 +1331,26 @@
>                          }
>                      }
>
> -                    if (dstCoeff[minPos] == 32767 || dstCoeff[minPos] ==
> -32768)
> +                    // if (dstCoeff[minPos] == 32767 || dstCoeff[minPos]
> == -32768)
> +                    if (((uint16_t)dstCoeff[minPos] + 1 ==
> (uint16_t)0x8000) | ((uint16_t)dstCoeff[minPos] == (uint16_t)0x8000))
>                          /* don't allow sign hiding to violate the SPEC
> range */
>                          finalChange = -1;
>
> -                    if (dstCoeff[minPos] == 0)
> -                        numSig++;
> -                    else if (finalChange == -1 && abs(dstCoeff[minPos])
> == 1)
> -                        numSig--;
> +                    // NOTE: Reference code
> +                    //if (dstCoeff[minPos] == 0)
> +                    //    numSig++;
> +                    //else if (finalChange == -1 && abs(dstCoeff[minPos])
> == 1)
> +                    //    numSig--;
> +                    numSig += (dstCoeff[minPos] == 0) - ((finalChange ==
> -1) & (abs(dstCoeff[minPos]) == 1));
>
> -                    if (m_resiDctCoeff[minPos] >= 0)
> -                        dstCoeff[minPos] += finalChange;
> -                    else
> -                        dstCoeff[minPos] -= finalChange;
> +
> +                    // NOTE: Reference code
> +                    //if (m_resiDctCoeff[minPos] >= 0)
> +                    //    dstCoeff[minPos] += finalChange;
> +                    //else
> +                    //    dstCoeff[minPos] -= finalChange;
> +                    const int16_t resiCoeffSign =
> ((int16_t)m_resiDctCoeff[minPos] >> 16);
> +                    dstCoeff[minPos] += (((int16_t)finalChange ^
> resiCoeffSign) - resiCoeffSign);
>                  }
>              }
>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150910/d066a265/attachment.html>