[x265] trquant: store QpParam for each component

Steve Borho steve at borho.org
Wed Jul 23 07:03:02 CEST 2014


On 07/21, Satoshi Nakagawa wrote:
> # HG changeset patch
> # User Satoshi Nakagawa <nakagawa424 at oki.com>
> # Date 1405905842 -32400
> #      Mon Jul 21 10:24:02 2014 +0900
> # Node ID b40af94fd00f5f23a22854aaf498ffef32910110
> # Parent  eb983d29c11acc03b91e07fe93c31503fa3a4732
> trquant: store QpParam for each component

Hello Satoshi,

This patch looked harmless, so I had queued it without sending it
through regression testing, but it turned out to cause hash mismatches.

The quickest repro case I know of is this:

x265 BasketBallDrive_1920x1080_50.y4m --bitrate 4000 --rd 5 --hash 1 -f 50

it causes a hash mistake right away and eventually a decoder crash. If
you can fix it today, I won't back this out.

> diff -r eb983d29c11a -r b40af94fd00f source/Lib/TLibCommon/TComLoopFilter.cpp
> --- a/source/Lib/TLibCommon/TComLoopFilter.cpp	Thu Jul 17 09:29:39 2014 +0200
> +++ b/source/Lib/TLibCommon/TComLoopFilter.cpp	Mon Jul 21 10:24:02 2014 +0900
> @@ -48,7 +48,6 @@
>  // ====================================================================================================================
>  // Constants
>  // ====================================================================================================================
> -#define QpUV(iQpY, chFmt)  (((iQpY) < 0) ? (iQpY) : (((iQpY) > 57) ? ((iQpY) - 6) : g_chromaScale[chFmt][(iQpY)]))
>  #define DEFAULT_INTRA_TC_OFFSET 2 ///< Default intra TC offset
>  
>  // ====================================================================================================================
> @@ -441,9 +440,6 @@
>      pixel* tmpsrc = src;
>  
>      int stride = reconYuv->getStride();
> -    int qp = 0;
> -    int qpP = 0;
> -    int qpQ = 0;
>      uint32_t numParts = cu->m_pic->getNumPartInCUSize() >> depth;
>  
>      uint32_t log2UnitSize = g_log2UnitSize;
> @@ -457,8 +453,8 @@
>      uint32_t  partQ = 0;
>      TComDataCU* cuP = cu;
>      TComDataCU* cuQ = cu;
> -    int  betaOffsetDiv2 = cuQ->m_slice->m_pps->deblockingFilterBetaOffsetDiv2;
> -    int  tcOffsetDiv2 = cuQ->m_slice->m_pps->deblockingFilterTcOffsetDiv2;
> +    int  betaOffset = cuQ->m_slice->m_pps->deblockingFilterBetaOffsetDiv2 << 1;
> +    int  tcOffset = cuQ->m_slice->m_pps->deblockingFilterTcOffsetDiv2 << 1;
>  
>      if (dir == EDGE_VER)
>      {
> @@ -480,7 +476,7 @@
>          bs = blockingStrength[bsAbsIdx];
>          if (bs)
>          {
> -            qpQ = cu->getQP(bsAbsIdx);
> +            int qpQ = cu->getQP(bsAbsIdx);
>              partQ = bsAbsIdx;
>              // Derive neighboring PU index
>              if (dir == EDGE_VER)
> @@ -492,12 +488,12 @@
>                  cuP = cuQ->getPUAbove(partP, partQ);
>              }
>  
> -            qpP = cuP->getQP(partP);
> -            qp = (qpP + qpQ + 1) >> 1;
> +            int qpP = cuP->getQP(partP);
> +            int qp = (qpP + qpQ + 1) >> 1;
>              int bitdepthScale = 1 << (X265_DEPTH - 8);
>  
> -            int indexTC = Clip3(0, MAX_QP + DEFAULT_INTRA_TC_OFFSET, int(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + (tcOffsetDiv2 << 1)));
> -            int indexB = Clip3(0, MAX_QP, qp + (betaOffsetDiv2 << 1));
> +            int indexTC = Clip3(0, MAX_QP + DEFAULT_INTRA_TC_OFFSET, int(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
> +            int indexB = Clip3(0, MAX_QP, qp + betaOffset);
>  
>              int tc =  sm_tcTable[indexTC] * bitdepthScale;
>              int beta = sm_betaTable[indexB] * bitdepthScale;
> @@ -544,13 +540,11 @@
>  
>  void TComLoopFilter::xEdgeFilterChroma(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int dir, int edge, uint8_t blockingStrength[])
>  {
> +    int chFmt = cu->getChromaFormat();
>      TComPicYuv* reconYuv = cu->m_pic->getPicYuvRec();
>      int stride = reconYuv->getCStride();
>      pixel* srcCb = reconYuv->getCbAddr(cu->getAddr(), absZOrderIdx);
>      pixel* srcCr = reconYuv->getCrAddr(cu->getAddr(), absZOrderIdx);
> -    int qp = 0;
> -    int qpP = 0;
> -    int qpQ = 0;
>      uint32_t log2UnitSizeH = g_log2UnitSize - cu->getHorzChromaShift();
>      uint32_t log2UnitSizeV = g_log2UnitSize - cu->getVertChromaShift();
>      uint32_t unitSizeChromaH = 1 << log2UnitSizeH;
> @@ -565,7 +559,7 @@
>      uint32_t  partQ;
>      TComDataCU* cuP;
>      TComDataCU* cuQ = cu;
> -    int tcOffsetDiv2 = cu->m_slice->m_pps->deblockingFilterTcOffsetDiv2;
> +    int tcOffset = cu->m_slice->m_pps->deblockingFilterTcOffsetDiv2 << 1;
>  
>      // Vertical Position
>      uint32_t edgeNumInLCUVert = g_zscanToRaster[absZOrderIdx] % lcuWidthInBaseUnits + edge;
> @@ -611,7 +605,7 @@
>  
>          if (bs > 1)
>          {
> -            qpQ = cu->getQP(bsAbsIdx);
> +            int qpQ = cu->getQP(bsAbsIdx);
>              partQ = bsAbsIdx;
>              // Derive neighboring PU index
>              if (dir == EDGE_VER)
> @@ -623,7 +617,7 @@
>                  cuP = cuQ->getPUAbove(partP, partQ);
>              }
>  
> -            qpP = cuP->getQP(partP);
> +            int qpP = cuP->getQP(partP);
>  
>              if (cu->m_slice->m_pps->bTransquantBypassEnabled)
>              {
> @@ -636,10 +630,17 @@
>              {
>                  int chromaQPOffset  = (chromaIdx == 0) ? cu->m_slice->m_pps->chromaCbQpOffset : cu->m_slice->m_pps->chromaCrQpOffset;
>                  pixel* piTmpSrcChroma = (chromaIdx == 0) ? tmpSrcCb : tmpSrcCr;
> -                qp = QpUV((((qpP + qpQ + 1) >> 1) + chromaQPOffset), cu->getChromaFormat());
> +                int qp = ((qpP + qpQ + 1) >> 1) + chromaQPOffset;
> +                if (qp >= 30)
> +                {
> +                    if (chFmt == CHROMA_420)
> +                        qp = g_chromaScale[qp];
> +                    else
> +                        qp = X265_MIN(qp, 51);
> +                }
>                  int iBitdepthScale = 1 << (X265_DEPTH - 8);
>  
> -                int iIndexTC = Clip3(0, MAX_QP + DEFAULT_INTRA_TC_OFFSET, qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + (tcOffsetDiv2 << 1));
> +                int iIndexTC = Clip3(0, MAX_QP + DEFAULT_INTRA_TC_OFFSET, qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset);
>                  int iTc =  sm_tcTable[iIndexTC] * iBitdepthScale;
>  
>                  for (uint32_t uiStep = 0; uiStep < loopLength; uiStep++)
> diff -r eb983d29c11a -r b40af94fd00f source/Lib/TLibCommon/TComRom.cpp
> --- a/source/Lib/TLibCommon/TComRom.cpp	Thu Jul 17 09:29:39 2014 +0200
> +++ b/source/Lib/TLibCommon/TComRom.cpp	Mon Jul 21 10:24:02 2014 +0900
> @@ -450,12 +450,11 @@
>      {  9, -25, 43, -57, 70, -80, 87, -90, 90, -87, 80, -70, 57, -43, 25, -9, -9, 25, -43, 57, -70, 80, -87, 90, -90, 87, -80, 70, -57, 43, -25,  9 },
>      {  4, -13, 22, -31, 38, -46, 54, -61, 67, -73, 78, -82, 85, -88, 90, -90, 90, -90, 88, -85, 82, -78, 73, -67, 61, -54, 46, -38, 31, -22, 13, -4 }
>  };
> -const uint8_t g_chromaScale[NUM_CHROMA_FORMAT][chromaQPMappingTableSize] =
> +
> +const uint8_t g_chromaScale[chromaQPMappingTableSize] =
>  {
> -    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
> -    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 },
> -    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51 },
> -    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51 }
> +    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
> +    51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51
>  };
>  
>  const uint8_t g_chroma422IntraAngleMappingTable[36] =
> diff -r eb983d29c11a -r b40af94fd00f source/Lib/TLibCommon/TComRom.h
> --- a/source/Lib/TLibCommon/TComRom.h	Thu Jul 17 09:29:39 2014 +0200
> +++ b/source/Lib/TLibCommon/TComRom.h	Mon Jul 21 10:24:02 2014 +0900
> @@ -74,7 +74,7 @@
>  // ====================================================================================================================
>  static const int chromaQPMappingTableSize = 70;
>  
> -extern const uint8_t g_chromaScale[NUM_CHROMA_FORMAT][chromaQPMappingTableSize];
> +extern const uint8_t g_chromaScale[chromaQPMappingTableSize];
>  extern const uint8_t g_chroma422IntraAngleMappingTable[36];
>  // Data structure related table & variable
>  // ====================================================================================================================
> diff -r eb983d29c11a -r b40af94fd00f source/Lib/TLibCommon/TComTrQuant.cpp
> --- a/source/Lib/TLibCommon/TComTrQuant.cpp	Thu Jul 17 09:29:39 2014 +0200
> +++ b/source/Lib/TLibCommon/TComTrQuant.cpp	Mon Jul 21 10:24:02 2014 +0900
> @@ -70,8 +70,6 @@
>  
>  TComTrQuant::TComTrQuant()
>  {
> -    m_qpParam.clear();
> -
>      // allocate temporary buffers
>      // OPT_ME: I may reduce this to short and output matched, but I am not sure it is right.
>      m_tmpCoeff = X265_MALLOC(int32_t, MAX_CU_SIZE * MAX_CU_SIZE);
> @@ -109,37 +107,42 @@
>  
>  /** Set qP for Quantization.
>   * \param qpy QPy
> - * \param bLowpass
> - * \param sliceType
>   * \param ttype
>   * \param qpBdOffset
>   * \param chromaQPOffset
>   *
>   * return void
>   */
> +
> +void TComTrQuant::setQPforQuant(int qpy, int qpBdOffset)
> +{
> +    m_qpParam[TEXT_LUMA].setQpParam(qpy + qpBdOffset);
> +}
> +
>  void TComTrQuant::setQPforQuant(int qpy, TextType ttype, int qpBdOffset, int chromaQPOffset, int chFmt)
>  {
> -    int qpScaled;
> +    X265_CHECK(ttype == TEXT_CHROMA_U || ttype == TEXT_CHROMA_V, "invalid ttype\n");
>  
> -    if (ttype == TEXT_LUMA)
> +    int qp = Clip3(-qpBdOffset, 57, qpy + chromaQPOffset);
> +    if (qp >= 30)
>      {
> -        qpScaled = qpy + qpBdOffset;
> -        qpScaled = Clip3(0, MAX_QP + QP_BD_OFFSET, qpScaled);
> +        if (chFmt == CHROMA_420)
> +            qp = g_chromaScale[qp];
> +        else
> +            qp = X265_MIN(qp, 51);
>      }
> -    else
> -    {
> -        qpScaled = Clip3(-qpBdOffset, 57, qpy + chromaQPOffset);
> +    m_qpParam[ttype].setQpParam(qp + qpBdOffset);
> +}
>  
> -        if (qpScaled < 0)
> -        {
> -            qpScaled = qpScaled + qpBdOffset;
> -        }
> -        else
> -        {
> -            qpScaled = g_chromaScale[chFmt][qpScaled] + qpBdOffset;
> -        }
> -    }
> -    m_qpParam.setQpParam(qpScaled);
> +void TComTrQuant::setQPforQuant(TComDataCU* cu)
> +{
> +    int qpy = cu->getQP(0);
> +    int chFmt = cu->getChromaFormat();
> +    const PPS* pps = cu->m_slice->m_pps;
> +
> +    setQPforQuant(qpy, QP_BD_OFFSET);
> +    setQPforQuant(qpy, TEXT_CHROMA_U, QP_BD_OFFSET, pps->chromaCbQpOffset, chFmt);
> +    setQPforQuant(qpy, TEXT_CHROMA_V, QP_BD_OFFSET, pps->chromaCrQpOffset, chFmt);
>  }
>  
>  // To minimize the distortion only. No rate is considered.
> @@ -279,11 +282,13 @@
>  
>      int scalingListType = (cu->isIntra(absPartIdx) ? 0 : 3) + ttype;
>      X265_CHECK(scalingListType < 6, "scaling list type out of range\n");
> -    int32_t *quantCoeff = getQuantCoeff(scalingListType, m_qpParam.m_rem, log2TrSize - 2);
> +    int rem = m_qpParam[ttype].m_rem;
> +    int per = m_qpParam[ttype].m_per;
> +    int32_t *quantCoeff = getQuantCoeff(scalingListType, rem, log2TrSize - 2);
>  
>      int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; // Represents scaling through forward transform
>  
> -    int qbits = QUANT_SHIFT + m_qpParam.m_per + transformShift;
> +    int qbits = QUANT_SHIFT + per + transformShift;
>      int add = (cu->m_slice->m_sliceType == I_SLICE ? 171 : 85) << (qbits - 9);
>  
>      int numCoeff = 1 << log2TrSize * 2;
> @@ -373,8 +378,8 @@
>      }
>  
>      // Values need to pass as input parameter in dequant
> -    int per = m_qpParam.m_per;
> -    int rem = m_qpParam.m_rem;
> +    int rem = m_qpParam[ttype].m_rem;
> +    int per = m_qpParam[ttype].m_per;
>      int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize;
>      int shift = QUANT_IQUANT_SHIFT - QUANT_SHIFT - transformShift;
>      int numCoeff = 1 << log2TrSize * 2;
> @@ -390,7 +395,7 @@
>          // CHECK_ME: the code is not verify since this is DEAD path
>          int scalingListType = (!bIntra ? 3 : 0) + ttype;
>          X265_CHECK(scalingListType < 6, "scalingListType invalid %d\n", scalingListType);
> -        int32_t *dequantCoef = getDequantCoeff(scalingListType, m_qpParam.m_rem, log2TrSize - 2);
> +        int32_t *dequantCoef = getDequantCoeff(scalingListType, rem, log2TrSize - 2);
>          primitives.dequant_scaling(coeff, dequantCoef, m_tmpCoeff, numCoeff, per, shift);
>      }
>  
> @@ -509,9 +514,11 @@
>  
>      X265_CHECK(scalingListType < 6, "scaling list type out of range\n");
>  
> -    int qbits = QUANT_SHIFT + m_qpParam.m_per + transformShift; // Right shift of non-RDOQ quantizer;  level = (coeff*Q + offset)>>q_bits
> +    int rem = m_qpParam[ttype].m_rem;
> +    int per = m_qpParam[ttype].m_per;
> +    int qbits = QUANT_SHIFT + per + transformShift; // Right shift of non-RDOQ quantizer;  level = (coeff*Q + offset)>>q_bits
>      int add = (1 << (qbits - 1));
> -    int32_t *qCoef = getQuantCoeff(scalingListType, m_qpParam.m_rem, log2TrSize - 2);
> +    int32_t *qCoef = getQuantCoeff(scalingListType, rem, log2TrSize - 2);
>  
>      int numCoeff = 1 << log2TrSize * 2;
>      int scaledCoeff[32 * 32];
> @@ -524,7 +531,7 @@
>      x265_emms();
>      selectLambda(ttype);
>  
> -    double *errScale = getErrScaleCoeff(scalingListType, log2TrSize - 2, m_qpParam.m_rem);
> +    double *errScale = getErrScaleCoeff(scalingListType, log2TrSize - 2, rem);
>  
>      double blockUncodedCost = 0;
>      double costCoeff[32 * 32];
> @@ -859,7 +866,7 @@
>      if (cu->m_slice->m_pps->bSignHideEnabled && numSig >= 2)
>      {
>          int64_t rdFactor = (int64_t)(
> -                g_invQuantScales[m_qpParam.rem()] * g_invQuantScales[m_qpParam.rem()] * (1 << (2 * m_qpParam.m_per))
> +                g_invQuantScales[rem] * g_invQuantScales[rem] * (1 << (2 * per))
>                  / (m_lambda * (16 << DISTORTION_PRECISION_ADJUSTMENT(2 * (X265_DEPTH - 8))))
>                  + 0.5);
>          int lastCG = 1;
> diff -r eb983d29c11a -r b40af94fd00f source/Lib/TLibCommon/TComTrQuant.h
> --- a/source/Lib/TLibCommon/TComTrQuant.h	Thu Jul 17 09:29:39 2014 +0200
> +++ b/source/Lib/TLibCommon/TComTrQuant.h	Mon Jul 21 10:24:02 2014 +0900
> @@ -54,8 +54,6 @@
>  // Constants
>  // ====================================================================================================================
>  
> -#define QP_BITS 15
> -
>  // ====================================================================================================================
>  // Type definition
>  // ====================================================================================================================
> @@ -81,39 +79,34 @@
>  {
>  public:
>  
> -    QpParam() {}
> +    QpParam()
> +    {
> +        m_rem = 0;
> +        m_per = 0;
> +        m_qp  = 0;
> +    }
>  
> +    int m_rem;
> +    int m_per;
>      int m_qp;
> -    int m_per;
> -    int m_rem;
> -
> -    int m_bits;
>  
>  public:
>  
>      void setQpParam(int qpScaled)
>      {
> -        m_qp   = qpScaled;
> -        m_per  = qpScaled / 6;
> -        m_rem  = qpScaled % 6;
> -        m_bits = QP_BITS + m_per;
> +        if (m_qp != qpScaled)
> +        {
> +            m_rem  = qpScaled % 6;
> +            m_per  = qpScaled / 6;
> +            m_qp   = qpScaled;
> +        }
>      }
>  
> -    void clear()
> -    {
> -        m_qp   = 0;
> -        m_per  = 0;
> -        m_rem  = 0;
> -        m_bits = 0;
> -    }
> +    int rem()   const { return m_rem; }
>  
>      int per()   const { return m_per; }
>  
> -    int rem()   const { return m_rem; }
> -
> -    int bits()  const { return m_bits; }
> -
> -    int qp() { return m_qp; }
> +    int qp()    const { return m_qp; }
>  };
>  
>  /// transform and quantization class
> @@ -134,7 +127,9 @@
>      void invtransformNxN(bool transQuantBypass, int16_t* residual, uint32_t stride, coeff_t* coeff, uint32_t log2TrSize, TextType ttype, bool bIntra, bool useTransformSkip, uint32_t numSig);
>  
>      // Misc functions
> +    void setQPforQuant(int qpy, int qpBdOffset);
>      void setQPforQuant(int qpy, TextType ttype, int qpBdOffset, int chromaQPOffset, int chFmt);
> +    void setQPforQuant(TComDataCU* cu);
>      void setLambdas(double lambdaY, double lambdaCb, double lambdaCr) { m_lambdas[0] = lambdaY; m_lambdas[1] = lambdaCb; m_lambdas[2] = lambdaCr; }
>  
>      void selectLambda(TextType ttype) { m_lambda = m_lambdas[ttype]; }
> @@ -204,7 +199,7 @@
>  
>  protected:
>  
> -    QpParam  m_qpParam;
> +    QpParam  m_qpParam[3];
>  
>      double   m_lambda;
>      double   m_lambdas[3];
> diff -r eb983d29c11a -r b40af94fd00f source/Lib/TLibEncoder/TEncSearch.cpp
> --- a/source/Lib/TLibEncoder/TEncSearch.cpp	Thu Jul 17 09:29:39 2014 +0200
> +++ b/source/Lib/TLibEncoder/TEncSearch.cpp	Mon Jul 21 10:24:02 2014 +0900
> @@ -438,8 +438,6 @@
>          m_sbacCoder->estBit(m_trQuant.m_estBitsSbac, log2TrSize, TEXT_LUMA);
>  
>      //--- transform and quantization ---
> -    int chFmt = cu->getChromaFormat();
> -    m_trQuant.setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
>      uint32_t numSig = m_trQuant.transformNxN(cu, residual, stride, coeff, log2TrSize, TEXT_LUMA, absPartIdx, useTransformSkip);
>  
>      //--- set coded block flag ---
> @@ -509,10 +507,6 @@
>          m_sbacCoder->estBit(m_trQuant.m_estBitsSbac, log2TrSizeC, TEXT_CHROMA);
>  
>      //--- transform and quantization ---
> -    int chFmt = cu->getChromaFormat();
> -    int curChromaQpOffset = (ttype == TEXT_CHROMA_U) ? cu->m_slice->m_pps->chromaCbQpOffset : cu->m_slice->m_pps->chromaCrQpOffset;
> -
> -    m_trQuant.setQPforQuant(cu->getQP(0), TEXT_CHROMA, QP_BD_OFFSET, curChromaQpOffset, chFmt);
>      uint32_t numSig = m_trQuant.transformNxN(cu, residual, stride, coeff, log2TrSizeC, ttype, absPartIdx, useTransformSkipC);
>  
>      //--- set coded block flag ---
> @@ -839,7 +833,6 @@
>  
>          //----- code luma block with given intra prediction mode and store Cbf-----
>          uint32_t lumaPredMode = cu->getLumaIntraDir(absPartIdx);
> -        int      chFmt        = cu->getChromaFormat();
>          uint32_t stride       = fencYuv->getStride();
>          pixel*   fenc         = fencYuv->getLumaAddr(absPartIdx);
>          pixel*   pred         = predYuv->getLumaAddr(absPartIdx);
> @@ -872,7 +865,6 @@
>          primitives.calcresidual[sizeIdx](fenc, pred, residual, stride);
>  
>          //===== transform and quantization =====
> -        m_trQuant.setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
>          uint32_t numSig = m_trQuant.transformNxN(cu, residual, stride, coeff, log2TrSize, TEXT_LUMA, absPartIdx, useTransformSkip);
>  
>          //--- set coded block flag ---
> @@ -1344,8 +1336,6 @@
>                  primitives.calcresidual[sizeIdxC](fenc, pred, residual, stride);
>  
>                  //--- transform and quantization ---
> -                int curChromaQpOffset = (ttype == TEXT_CHROMA_U) ? cu->m_slice->m_pps->chromaCbQpOffset : cu->m_slice->m_pps->chromaCrQpOffset;
> -                m_trQuant.setQPforQuant(cu->getQP(0), TEXT_CHROMA, QP_BD_OFFSET, curChromaQpOffset, chFmt);
>                  uint32_t numSig = m_trQuant.transformNxN(cu, residual, stride, coeff, log2TrSizeC, ttype, absPartIdxC, useTransformSkipC);
>  
>                  //--- set coded block flag ---
> @@ -2344,6 +2334,8 @@
>          return;
>      }
>  
> +    m_trQuant.setQPforQuant(cu);
> +
>      outResiYuv->subtract(fencYuv, predYuv, log2CUSize);
>  
>      // Residual coding.
> @@ -2469,6 +2461,9 @@
>          cu->clearCbf(0, cu->getDepth(0));
>          return;
>      }
> +
> +    m_trQuant.setQPforQuant(cu);
> +
>      if (cu->getPredictionMode(0) == MODE_INTER)
>      {
>          residualTransformQuantInter(cu, 0, resiYuv, cu->getDepth(0), true);
> @@ -2541,17 +2536,13 @@
>          const uint32_t strideResiY = resiYuv->m_width;
>          const uint32_t strideResiC = resiYuv->m_cwidth;
>  
> -        m_trQuant.setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
>          uint32_t numSigY = m_trQuant.transformNxN(cu, curResiY, strideResiY, coeffCurY,
>                                                    log2TrSize, TEXT_LUMA, absPartIdx, false, curuseRDOQ);
>  
>          cu->setCbfSubParts(numSigY ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
>  
>          if (numSigY)
> -        {
> -            m_trQuant.setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
>              m_trQuant.invtransformNxN(cu->getCUTransquantBypass(absPartIdx), curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSigY);
> -        }
>          else
>              primitives.blockfill_s[sizeIdx](curResiY, strideResiY, 0);
>  
> @@ -2570,13 +2561,8 @@
>                  cu->setTransformSkipPartRange(0, TEXT_CHROMA_U, absPartIdxC, tuIterator.absPartIdxStep);
>                  cu->setTransformSkipPartRange(0, TEXT_CHROMA_V, absPartIdxC, tuIterator.absPartIdxStep);
>  
> -                int curChromaQpOffset = cu->m_slice->m_pps->chromaCbQpOffset;
> -                m_trQuant.setQPforQuant(cu->getQP(0), TEXT_CHROMA, QP_BD_OFFSET, curChromaQpOffset, chFmt);
>                  uint32_t numSigU = m_trQuant.transformNxN(cu, curResiU, strideResiC, coeffCurU + subTUOffset,
>                                                            log2TrSizeC, TEXT_CHROMA_U, absPartIdxC, false, curuseRDOQ);
> -
> -                curChromaQpOffset = cu->m_slice->m_pps->chromaCrQpOffset;
> -                m_trQuant.setQPforQuant(cu->getQP(0), TEXT_CHROMA, QP_BD_OFFSET, curChromaQpOffset, chFmt);
>                  uint32_t numSigV = m_trQuant.transformNxN(cu, curResiV, strideResiC, coeffCurV + subTUOffset,
>                                                            log2TrSizeC, TEXT_CHROMA_V, absPartIdxC, false, curuseRDOQ);
>  
> @@ -2584,20 +2570,12 @@
>                  cu->setCbfPartRange(numSigV ? setCbf : 0, TEXT_CHROMA_V, absPartIdxC, tuIterator.absPartIdxStep);
>  
>                  if (numSigU)
> -                {
> -                    curChromaQpOffset = cu->m_slice->m_pps->chromaCbQpOffset;
> -                    m_trQuant.setQPforQuant(cu->getQP(0), TEXT_CHROMA, QP_BD_OFFSET, curChromaQpOffset, chFmt);
>                      m_trQuant.invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), curResiU, strideResiC, coeffCurU + subTUOffset, log2TrSizeC, TEXT_CHROMA_U, false, false, numSigU);
> -                }
>                  else
>                      primitives.blockfill_s[sizeIdxC](curResiU, strideResiC, 0);
>  
>                  if (numSigV)
> -                {
> -                    curChromaQpOffset = cu->m_slice->m_pps->chromaCrQpOffset;
> -                    m_trQuant.setQPforQuant(cu->getQP(0), TEXT_CHROMA, QP_BD_OFFSET, curChromaQpOffset, chFmt);
>                      m_trQuant.invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), curResiV, strideResiC, coeffCurV + subTUOffset, log2TrSizeC, TEXT_CHROMA_V, false, false, numSigV);
> -                }
>                  else
>                      primitives.blockfill_s[sizeIdxC](curResiV, strideResiC, 0);
>              }
> @@ -2722,7 +2700,6 @@
>          if (m_bEnableRDOQ && curuseRDOQ)
>              m_sbacCoder->estBit(m_trQuant.m_estBitsSbac, log2TrSize, TEXT_LUMA);
>  
> -        m_trQuant.setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
>          numSigY = m_trQuant.transformNxN(cu, resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, coeffCurY,
>                                           log2TrSize, TEXT_LUMA, absPartIdx, false, curuseRDOQ);
>  
> @@ -2751,14 +2728,8 @@
>                  if (m_bEnableRDOQ && curuseRDOQ)
>                      m_sbacCoder->estBit(m_trQuant.m_estBitsSbac, log2TrSizeC, TEXT_CHROMA);
>  
> -                //Cb transform
> -                int curChromaQpOffset = cu->m_slice->m_pps->chromaCbQpOffset;
> -                m_trQuant.setQPforQuant(cu->getQP(0), TEXT_CHROMA, QP_BD_OFFSET, curChromaQpOffset, chFmt);
>                  numSigU[tuIterator.section] = m_trQuant.transformNxN(cu, resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurU + subTUOffset,
>                                                                       log2TrSizeC, TEXT_CHROMA_U, absPartIdxC, false, curuseRDOQ);
> -                //Cr transform
> -                curChromaQpOffset = cu->m_slice->m_pps->chromaCrQpOffset;
> -                m_trQuant.setQPforQuant(cu->getQP(0), TEXT_CHROMA, QP_BD_OFFSET, curChromaQpOffset, chFmt);
>                  numSigV[tuIterator.section] = m_trQuant.transformNxN(cu, resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurV + subTUOffset,
>                                                                       log2TrSizeC, TEXT_CHROMA_V, absPartIdxC, false, curuseRDOQ);
>  
> @@ -2812,7 +2783,6 @@
>  
>          if (numSigY)
>          {
> -            m_trQuant.setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
>              m_trQuant.invtransformNxN(cu->getCUTransquantBypass(absPartIdx), curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSigY); //this is for inter mode only
>  
>              const uint32_t nonZeroDistY = primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, curResiY, strideResiY);
> @@ -2904,14 +2874,11 @@
>                  int16_t *curResiV = m_qtTempShortYuv[qtLayer].getCrAddr(absPartIdxC);
>  
>                  distU = m_rdCost.scaleChromaDistCb(primitives.ssd_s[log2TrSizeC - 2](resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth));
> -
>                  if (outZeroDist)
>                      *outZeroDist += distU;
>  
>                  if (numSigU[tuIterator.section])
>                  {
> -                    int curChromaQpOffset = cu->m_slice->m_pps->chromaCbQpOffset;
> -                    m_trQuant.setQPforQuant(cu->getQP(0), TEXT_CHROMA, QP_BD_OFFSET, curChromaQpOffset, chFmt);
>                      m_trQuant.invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), curResiU, strideResiC, coeffCurU + subTUOffset,
>                                                log2TrSizeC, TEXT_CHROMA_U, false, false, numSigU[tuIterator.section]);
>                      uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth,
> @@ -2994,8 +2961,6 @@
>  
>                  if (numSigV[tuIterator.section])
>                  {
> -                    int curChromaQpOffset = cu->m_slice->m_pps->chromaCrQpOffset;
> -                    m_trQuant.setQPforQuant(cu->getQP(0), TEXT_CHROMA, QP_BD_OFFSET, curChromaQpOffset, chFmt);
>                      m_trQuant.invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), curResiV, strideResiC, coeffCurV + subTUOffset,
>                                                log2TrSizeC, TEXT_CHROMA_V, false, false, numSigV[tuIterator.section]);
>                      uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth,
> @@ -3094,7 +3059,6 @@
>              if (m_bEnableRDOQ)
>                  m_sbacCoder->estBit(m_trQuant.m_estBitsSbac, log2TrSize, TEXT_LUMA);
>  
> -            m_trQuant.setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
>              uint32_t numSigTSkipY = m_trQuant.transformNxN(cu, resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, tsCoeffY,
>                                                             log2TrSize, TEXT_LUMA, absPartIdx, true, curuseRDOQ);
>              cu->setCbfSubParts(numSigTSkipY ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
> @@ -3106,7 +3070,6 @@
>                  m_sbacCoder->codeCoeffNxN(cu, tsCoeffY, absPartIdx, log2TrSize, TEXT_LUMA);
>                  const uint32_t skipSingleBitsY = m_sbacCoder->getNumberOfWrittenBits();
>  
> -                m_trQuant.setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
>                  m_trQuant.invtransformNxN(cu->getCUTransquantBypass(absPartIdx), tsResiY, trSize, tsCoeffY, log2TrSize, TEXT_LUMA, false, true, numSigTSkipY);
>  
>                  nonZeroDistY = primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, tsResiY, trSize);
> @@ -3177,12 +3140,8 @@
>                  if (m_bEnableRDOQ)
>                      m_sbacCoder->estBit(m_trQuant.m_estBitsSbac, log2TrSizeC, TEXT_CHROMA);
>  
> -                int curChromaQpOffset = cu->m_slice->m_pps->chromaCbQpOffset;
> -                m_trQuant.setQPforQuant(cu->getQP(0), TEXT_CHROMA, QP_BD_OFFSET, curChromaQpOffset, chFmt);
>                  uint32_t numSigTSkipU = m_trQuant.transformNxN(cu, resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth, tsCoeffU,
>                                                                 log2TrSizeC, TEXT_CHROMA_U, absPartIdxC, true, curuseRDOQ);
> -                curChromaQpOffset = cu->m_slice->m_pps->chromaCrQpOffset;
> -                m_trQuant.setQPforQuant(cu->getQP(0), TEXT_CHROMA, QP_BD_OFFSET, curChromaQpOffset, chFmt);
>                  uint32_t numSigTSkipV = m_trQuant.transformNxN(cu, resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth, tsCoeffV,
>                                                                 log2TrSizeC, TEXT_CHROMA_V, absPartIdxC, true, curuseRDOQ);
>  
> @@ -3198,8 +3157,6 @@
>                      m_sbacCoder->codeCoeffNxN(cu, tsCoeffU, absPartIdxC, log2TrSizeC, TEXT_CHROMA_U);
>                      singleBitsComp[TEXT_CHROMA_U][tuIterator.section] = m_sbacCoder->getNumberOfWrittenBits();
>  
> -                    curChromaQpOffset = cu->m_slice->m_pps->chromaCbQpOffset;
> -                    m_trQuant.setQPforQuant(cu->getQP(0), TEXT_CHROMA, QP_BD_OFFSET, curChromaQpOffset, chFmt);
>                      m_trQuant.invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), tsResiU, trSizeC, tsCoeffU,
>                                                log2TrSizeC, TEXT_CHROMA_U, false, true, numSigTSkipU);
>                      uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth,
> @@ -3241,8 +3198,6 @@
>                      m_sbacCoder->codeCoeffNxN(cu, tsCoeffV, absPartIdxC, log2TrSizeC, TEXT_CHROMA_V);
>                      singleBitsComp[TEXT_CHROMA_V][tuIterator.section] = m_sbacCoder->getNumberOfWrittenBits() - singleBitsComp[TEXT_CHROMA_U][tuIterator.section];
>  
> -                    curChromaQpOffset = cu->m_slice->m_pps->chromaCrQpOffset;
> -                    m_trQuant.setQPforQuant(cu->getQP(0), TEXT_CHROMA, QP_BD_OFFSET, curChromaQpOffset, chFmt);
>                      m_trQuant.invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), tsResiV, trSizeC, tsCoeffV,
>                                                log2TrSizeC, TEXT_CHROMA_V, false, true, numSigTSkipV);
>                      uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth,
> diff -r eb983d29c11a -r b40af94fd00f source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp	Thu Jul 17 09:29:39 2014 +0200
> +++ b/source/encoder/analysis.cpp	Mon Jul 21 10:24:02 2014 +0900
> @@ -388,6 +388,8 @@
>      // We need to split, so don't try these modes.
>      if (bInsidePicture)
>      {
> +        m_trQuant.setQPforQuant(outTempCU);
> +
>          checkIntra(outBestCU, outTempCU, SIZE_2Nx2N);
>  
>          if (depth == g_maxCUDepth - g_addCUDepth)
> @@ -1033,7 +1035,6 @@
>      {
>          X265_CHECK(outBestCU->getPartitionSize(0) != SIZE_NONE, "no best prediction size\n");
>          X265_CHECK(outBestCU->getPredictionMode(0) != MODE_NONE, "no best prediction mode\n");
> -        X265_CHECK(outBestCU->m_totalRDCost != MAX_INT64, "no best prediction cost\n");
>      }
>  
>      x265_emms();
> @@ -1075,6 +1076,8 @@
>      // We need to split, so don't try these modes.
>      if (bInsidePicture)
>      {
> +        m_trQuant.setQPforQuant(outTempCU);
> +
>          // do inter modes, SKIP and 2Nx2N
>          if (slice->m_sliceType != I_SLICE)
>          {
> @@ -1775,6 +1778,8 @@
>      // set context models
>      m_sbacCoder->load(m_rdSbacCoders[depth][CI_CURR_BEST]);
>  
> +    m_trQuant.setQPforQuant(cu);
> +
>      xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv, predYuv, outResiYuv, puDistY, false, puCost);
>      xSetIntraResultQT(cu, initTrDepth, 0, outReconYuv);
>  
> @@ -1840,6 +1845,9 @@
>  
>          return;
>      }
> +
> +    m_trQuant.setQPforQuant(cu);
> +
>      if (lcu->getPredictionMode(absPartIdx) == MODE_INTER)
>      {
>          if (!lcu->getSkipFlag(absPartIdx))

-- 
Steve Borho


More information about the x265-devel mailing list