[x265] quant: add m_tqBypass

Fri Jan 30 18:22:11 CET 2015

On 01/30, Satoshi Nakagawa wrote:
> # HG changeset patch
> # User Satoshi Nakagawa <nakagawa424 at oki.com>
> # Date 1422623908 -32400
> #      Fri Jan 30 22:18:28 2015 +0900
> # Node ID ee56555c683e73c4207a98e6626167d4445a8d76
> # Parent  5e5dc3763f6386da9722903033a2b9dd263a5226
> quant: add m_tqBypass

queued, thanks

> diff -r 5e5dc3763f63 -r ee56555c683e source/common/deblock.cpp
> --- a/source/common/deblock.cpp	Thu Jan 29 10:37:54 2015 -0600
> +++ b/source/common/deblock.cpp	Fri Jan 30 22:18:28 2015 +0900
> @@ -401,14 +401,22 @@
>          if (!bs)
>              continue;
>  
> -        int32_t qpQ = cuQ->m_qp[partQ];
> -
>          // Derive neighboring PU index
>          uint32_t partP;
>          const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
>  
> +        if (bCheckNoFilter)
> +        {
> +            // check if each of PUs is lossless coded
> +            maskP = cuP->m_tqBypass[partP] - 1;
> +            maskQ = cuQ->m_tqBypass[partQ] - 1;
> +            if (!(maskP | maskQ))
> +                continue;
> +        }
> +
> +        int32_t qpQ = cuQ->m_qp[partQ];
>          int32_t qpP = cuP->m_qp[partP];
> -        int32_t qp = (qpP + qpQ + 1) >> 1;
> +        int32_t qp  = (qpP + qpQ + 1) >> 1;
>  
>          int32_t indexB = x265_clip3(0, QP_MAX_SPEC, qp + betaOffset);
>  
> @@ -428,13 +436,6 @@
>          if (d >= beta)
>              continue;
>  
> -        if (bCheckNoFilter)
> -        {
> -            // check if each of PUs is lossless coded
> -            maskP = (cuP->m_tqBypass[partP] ? 0 : -1);
> -            maskQ = (cuQ->m_tqBypass[partQ] ? 0 : -1);
> -        }
> -
>          int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
>          int32_t tc = s_tcTable[indexTC] << bitdepthShift;
>  
> @@ -506,33 +507,29 @@
>          if (bs <= 1)
>              continue;
>  
> -        int32_t qpQ = cuQ->m_qp[partQ];
> -
>          // Derive neighboring PU index
>          uint32_t partP;
>          const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
>  
> -        int32_t qpP = cuP->m_qp[partP];
> -
>          if (bCheckNoFilter)
>          {
>              // check if each of PUs is lossless coded
>              maskP = (cuP->m_tqBypass[partP] ? 0 : -1);
>              maskQ = (cuQ->m_tqBypass[partQ] ? 0 : -1);
> +            if (!(maskP | maskQ))
> +                continue;
>          }
>  
> +        int32_t qpQ = cuQ->m_qp[partQ];
> +        int32_t qpP = cuP->m_qp[partP];
> +        int32_t qpA = (qpP + qpQ + 1) >> 1;
> +
>          intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE;
>          for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++)
>          {
> -            int32_t chromaQPOffset  = pps->chromaQpOffset[chromaIdx];
> -            int32_t qp = ((qpP + qpQ + 1) >> 1) + chromaQPOffset;
> +            int32_t qp = qpA + pps->chromaQpOffset[chromaIdx];
>              if (qp >= 30)
> -            {
> -                if (chFmt == X265_CSP_I420)
> -                    qp = g_chromaScale[qp];
> -                else
> -                    qp = X265_MIN(qp, 51);
> -            }
> +                qp = chFmt == X265_CSP_I420 ? g_chromaScale[qp] : X265_MIN(qp, 51);
>  
>              int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset));
>              const int32_t bitdepthShift = X265_DEPTH - 8;
> diff -r 5e5dc3763f63 -r ee56555c683e source/common/quant.cpp
> --- a/source/common/quant.cpp	Thu Jan 29 10:37:54 2015 -0600
> +++ b/source/common/quant.cpp	Fri Jan 30 22:18:28 2015 +0900
> @@ -169,6 +169,7 @@
>      m_resiDctCoeff = X265_MALLOC(int16_t, MAX_TR_SIZE * MAX_TR_SIZE * 2);
>      m_fencDctCoeff = m_resiDctCoeff + (MAX_TR_SIZE * MAX_TR_SIZE);
>      m_fencShortBuf = X265_MALLOC(int16_t, MAX_TR_SIZE * MAX_TR_SIZE);
> +    m_tqBypass = false;
>  
>      return m_resiDctCoeff && m_fencShortBuf;
>  }
> @@ -190,13 +191,16 @@
>      X265_FREE(m_fencShortBuf);
>  }
>  
> -void Quant::setQPforQuant(const CUData& ctu)
> +void Quant::setQPforQuant(const CUData& cu)
>  {
> -    m_nr = m_frameNr ? &m_frameNr[ctu.m_encData->m_frameEncoderID] : NULL;
> -    int qpy = ctu.m_qp[0];
> +    m_tqBypass = !!cu.m_tqBypass[0];
> +    if (m_tqBypass)
> +        return;
> +    m_nr = m_frameNr ? &m_frameNr[cu.m_encData->m_frameEncoderID] : NULL;
> +    int qpy = cu.m_qp[0];
>      m_qpParam[TEXT_LUMA].setQpParam(qpy + QP_BD_OFFSET);
> -    setChromaQP(qpy + ctu.m_slice->m_pps->chromaQpOffset[0], TEXT_CHROMA_U, ctu.m_chromaFormat);
> -    setChromaQP(qpy + ctu.m_slice->m_pps->chromaQpOffset[1], TEXT_CHROMA_V, ctu.m_chromaFormat);
> +    setChromaQP(qpy + cu.m_slice->m_pps->chromaQpOffset[0], TEXT_CHROMA_U, cu.m_chromaFormat);
> +    setChromaQP(qpy + cu.m_slice->m_pps->chromaQpOffset[1], TEXT_CHROMA_V, cu.m_chromaFormat);
>  }
>  
>  void Quant::setChromaQP(int qpin, TextType ttype, int chFmt)
> @@ -326,7 +330,7 @@
>                               coeff_t* coeff, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool useTransformSkip)
>  {
>      const uint32_t sizeIdx = log2TrSize - 2;
> -    if (cu.m_tqBypass[absPartIdx])
> +    if (m_tqBypass)
>      {
>          X265_CHECK(log2TrSize >= 2 && log2TrSize <= 5, "Block size mistake!\n");
>          return primitives.cu[sizeIdx].copy_cnt(coeff, residual, resiStride);
> @@ -406,11 +410,11 @@
>      }
>  }
>  
> -void Quant::invtransformNxN(bool transQuantBypass, int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
> +void Quant::invtransformNxN(int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
>                              uint32_t log2TrSize, TextType ttype, bool bIntra, bool useTransformSkip, uint32_t numSig)
>  {
>      const uint32_t sizeIdx = log2TrSize - 2;
> -    if (transQuantBypass)
> +    if (m_tqBypass)
>      {
>          primitives.cu[sizeIdx].cpy1Dto2D_shl(residual, coeff, resiStride, 0);
>          return;
> diff -r 5e5dc3763f63 -r ee56555c683e source/common/quant.h
> --- a/source/common/quant.h	Thu Jan 29 10:37:54 2015 -0600
> +++ b/source/common/quant.h	Fri Jan 30 22:18:28 2015 +0900
> @@ -93,6 +93,7 @@
>  
>      NoiseReduction*    m_nr;
>      NoiseReduction*    m_frameNr; // Array of NR structures, one for each frameEncoder
> +    bool               m_tqBypass;
>  
>      Quant();
>      ~Quant();
> @@ -102,12 +103,12 @@
>      bool allocNoiseReduction(const x265_param& param);
>  
>      /* CU setup */
> -    void setQPforQuant(const CUData& ctu);
> +    void setQPforQuant(const CUData& cu);
>  
>      uint32_t transformNxN(const CUData& cu, const pixel* fenc, uint32_t fencStride, const int16_t* residual, uint32_t resiStride, coeff_t* coeff,
>                            uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool useTransformSkip);
>  
> -    void invtransformNxN(bool transQuantBypass, int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
> +    void invtransformNxN(int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
>                           uint32_t log2TrSize, TextType ttype, bool bIntra, bool useTransformSkip, uint32_t numSig);
>  
>      /* static methods shared with entropy.cpp */
> diff -r 5e5dc3763f63 -r ee56555c683e source/encoder/entropy.cpp
> --- a/source/encoder/entropy.cpp	Thu Jan 29 10:37:54 2015 -0600
> +++ b/source/encoder/entropy.cpp	Fri Jan 30 22:18:28 2015 +0900
> @@ -1419,7 +1419,7 @@
>  
>      bool bHideFirstSign = cu.m_slice->m_pps->bSignHideEnabled && !tqBypass;
>  
> -    if (cu.m_slice->m_pps->bTransformSkipEnabled && !tqBypass && (trSize == 4))
> +    if (log2TrSize <= MAX_LOG2_TS_SIZE && !tqBypass && cu.m_slice->m_pps->bTransformSkipEnabled)
>          codeTransformSkipFlags(cu.m_transformSkip[ttype][absPartIdx], ttype);
>  
>      bool bIsLuma = ttype == TEXT_LUMA;
> diff -r 5e5dc3763f63 -r ee56555c683e source/encoder/search.cpp
> --- a/source/encoder/search.cpp	Thu Jan 29 10:37:54 2015 -0600
> +++ b/source/encoder/search.cpp	Fri Jan 30 22:18:28 2015 +0900
> @@ -294,7 +294,7 @@
>          uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffY, log2TrSize, TEXT_LUMA, absPartIdx, false);
>          if (numSig)
>          {
> -            m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
> +            m_quant.invtransformNxN(residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
>              primitives.cu[sizeIdx].add_ps(reconQt, reconQtStride, pred, residual, stride, stride);
>          }
>          else
> @@ -437,7 +437,7 @@
>      uint32_t log2TrSize = cuGeom.log2CUSize - tuDepth;
>      uint32_t tuSize = 1 << log2TrSize;
>  
> -    X265_CHECK(tuSize == MAX_TS_SIZE, "transform skip is only possible at 4x4 TUs\n");
> +    X265_CHECK(tuSize <= MAX_TS_SIZE, "transform skip is only possible at 4x4 TUs\n");
>  
>      CUData& cu = mode.cu;
>      Yuv* predYuv = &mode.predYuv;
> @@ -495,7 +495,7 @@
>          uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeff, log2TrSize, TEXT_LUMA, absPartIdx, useTSkip);
>          if (numSig)
>          {
> -            m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeff, log2TrSize, TEXT_LUMA, true, useTSkip, numSig);
> +            m_quant.invtransformNxN(residual, stride, coeff, log2TrSize, TEXT_LUMA, true, useTSkip, numSig);
>              primitives.cu[sizeIdx].add_ps(tmpRecon, tmpReconStride, pred, residual, stride, stride);
>          }
>          else if (useTSkip)
> @@ -645,7 +645,7 @@
>          uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffY, log2TrSize, TEXT_LUMA, absPartIdx, false);
>          if (numSig)
>          {
> -            m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
> +            m_quant.invtransformNxN(residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
>              primitives.cu[sizeIdx].add_ps(picReconY, picStride, pred, residual, stride, stride);
>              cu.setCbfSubParts(1 << tuDepth, TEXT_LUMA, absPartIdx, fullDepth);
>          }
> @@ -819,7 +819,7 @@
>              uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffC, log2TrSizeC, ttype, absPartIdxC, false);
>              if (numSig)
>              {
> -                m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
> +                m_quant.invtransformNxN(residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
>                  primitives.cu[sizeIdxC].add_ps(reconQt, reconQtStride, pred, residual, stride, stride);
>                  cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
>              }
> @@ -923,7 +923,7 @@
>                  uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeff, log2TrSizeC, ttype, absPartIdxC, useTSkip);
>                  if (numSig)
>                  {
> -                    m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeff, log2TrSizeC, ttype, true, useTSkip, numSig);
> +                    m_quant.invtransformNxN(residual, stride, coeff, log2TrSizeC, ttype, true, useTSkip, numSig);
>                      primitives.cu[sizeIdxC].add_ps(recon, reconStride, pred, residual, stride, stride);
>                      cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
>                  }
> @@ -1110,7 +1110,7 @@
>              uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffC, log2TrSizeC, ttype, absPartIdxC, false);
>              if (numSig)
>              {
> -                m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
> +                m_quant.invtransformNxN(residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
>                  primitives.cu[sizeIdxC].add_ps(picReconC, picStride, pred, residual, stride, stride);
>                  cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
>              }
> @@ -1137,6 +1137,7 @@
>  
>      cu.setPartSizeSubParts(partSize);
>      cu.setPredModeSubParts(MODE_INTRA);
> +    m_quant.m_tqBypass = !!cu.m_tqBypass[0];
>  
>      uint32_t tuDepthRange[2];
>      cu.getIntraTUQtDepthRange(tuDepthRange, 0);
> @@ -2493,6 +2494,7 @@
>      uint32_t log2CUSize = cuGeom.log2CUSize;
>      int sizeIdx = log2CUSize - 2;
>  
> +    uint32_t tqBypass = cu.m_tqBypass[0];
>      m_quant.setQPforQuant(interMode.cu);
>  
>      resiYuv->subtract(*fencYuv, *predYuv, log2CUSize);
> @@ -2505,7 +2507,7 @@
>      Cost costs;
>      estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs, tuDepthRange);
>  
> -    if (!cu.m_tqBypass[0])
> +    if (!tqBypass)
>      {
>          uint32_t cbf0Dist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
>          cbf0Dist += m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], predYuv->m_csize, predYuv->m_buf[1], predYuv->m_csize));
> @@ -2540,15 +2542,16 @@
>      /* calculate signal bits for inter/merge/skip coded CU */
>      m_entropyCoder.load(m_rqt[depth].cur);
>  
> +    m_entropyCoder.resetBits();
> +    if (m_slice->m_pps->bTransquantBypassEnabled)
> +        m_entropyCoder.codeCUTransquantBypassFlag(tqBypass);
> +
>      uint32_t coeffBits, bits;
>      if (cu.m_mergeFlag[0] && cu.m_partSize[0] == SIZE_2Nx2N && !cu.getQtRootCbf(0))
>      {
>          cu.setPredModeSubParts(MODE_SKIP);
>  
>          /* Merge/Skip */
> -        m_entropyCoder.resetBits();
> -        if (m_slice->m_pps->bTransquantBypassEnabled)
> -            m_entropyCoder.codeCUTransquantBypassFlag(cu.m_tqBypass[0]);
>          m_entropyCoder.codeSkipFlag(cu, 0);
>          m_entropyCoder.codeMergeIndex(cu, 0);
>          coeffBits = 0;
> @@ -2556,9 +2559,6 @@
>      }
>      else
>      {
> -        m_entropyCoder.resetBits();
> -        if (m_slice->m_pps->bTransquantBypassEnabled)
> -            m_entropyCoder.codeCUTransquantBypassFlag(cu.m_tqBypass[0]);
>          m_entropyCoder.codeSkipFlag(cu, 0);
>          m_entropyCoder.codePredMode(cu.m_predMode[0]);
>          m_entropyCoder.codePartSize(cu, 0, cuGeom.depth);
> @@ -2639,7 +2639,7 @@
>  
>          if (numSigY)
>          {
> -            m_quant.invtransformNxN(cu.m_tqBypass[absPartIdx], curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSigY);
> +            m_quant.invtransformNxN(curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSigY);
>              cu.setCbfSubParts(setCbf, TEXT_LUMA, absPartIdx, depth);
>          }
>          else
> @@ -2672,7 +2672,7 @@
>                  uint32_t numSigU = m_quant.transformNxN(cu, fencCb, fencYuv->m_csize, curResiU, strideResiC, coeffCurU + subTUOffset, log2TrSizeC, TEXT_CHROMA_U, absPartIdxC, false);
>                  if (numSigU)
>                  {
> -                    m_quant.invtransformNxN(cu.m_tqBypass[absPartIdxC], curResiU, strideResiC, coeffCurU + subTUOffset, log2TrSizeC, TEXT_CHROMA_U, false, false, numSigU);
> +                    m_quant.invtransformNxN(curResiU, strideResiC, coeffCurU + subTUOffset, log2TrSizeC, TEXT_CHROMA_U, false, false, numSigU);
>                      cu.setCbfPartRange(setCbf, TEXT_CHROMA_U, absPartIdxC, tuIterator.absPartIdxStep);
>                  }
>                  else
> @@ -2686,7 +2686,7 @@
>                  uint32_t numSigV = m_quant.transformNxN(cu, fencCr, fencYuv->m_csize, curResiV, strideResiC, coeffCurV + subTUOffset, log2TrSizeC, TEXT_CHROMA_V, absPartIdxC, false);
>                  if (numSigV)
>                  {
> -                    m_quant.invtransformNxN(cu.m_tqBypass[absPartIdxC], curResiV, strideResiC, coeffCurV + subTUOffset, log2TrSizeC, TEXT_CHROMA_V, false, false, numSigV);
> +                    m_quant.invtransformNxN(curResiV, strideResiC, coeffCurV + subTUOffset, log2TrSizeC, TEXT_CHROMA_V, false, false, numSigV);
>                      cu.setCbfPartRange(setCbf, TEXT_CHROMA_V, absPartIdxC, tuIterator.absPartIdxStep);
>                  }
>                  else
> @@ -2832,7 +2832,7 @@
>  
>          if (cbfFlag[TEXT_LUMA][0])
>          {
> -            m_quant.invtransformNxN(cu.m_tqBypass[absPartIdx], curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSig[TEXT_LUMA][0]); //this is for inter mode only
> +            m_quant.invtransformNxN(curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSig[TEXT_LUMA][0]); //this is for inter mode only
>  
>              // non-zero cost calculation for luma - This is an approximation
>              // finally we have to encode correct cbf after comparing with null cost
> @@ -2931,7 +2931,7 @@
>  
>                      if (cbfFlag[chromaId][tuIterator.section])
>                      {
> -                        m_quant.invtransformNxN(cu.m_tqBypass[absPartIdxC], curResiC, strideResiC, coeffCurC + subTUOffset,
> +                        m_quant.invtransformNxN(curResiC, strideResiC, coeffCurC + subTUOffset,
>                                                  log2TrSizeC, (TextType)chromaId, false, false, numSig[chromaId][tuIterator.section]);
>  
>                          // non-zero cost calculation for luma, same as luma - This is an approximation
> @@ -3023,7 +3023,7 @@
>                  m_entropyCoder.codeCoeffNxN(cu, tsCoeffY, absPartIdx, log2TrSize, TEXT_LUMA);
>                  const uint32_t skipSingleBitsY = m_entropyCoder.getNumberOfWrittenBits();
>  
> -                m_quant.invtransformNxN(cu.m_tqBypass[absPartIdx], tsResiY, trSize, tsCoeffY, log2TrSize, TEXT_LUMA, false, true, numSigTSkipY);
> +                m_quant.invtransformNxN(tsResiY, trSize, tsCoeffY, log2TrSize, TEXT_LUMA, false, true, numSigTSkipY);
>  
>                  nonZeroDistY = primitives.cu[partSize].sse_ss(resiYuv.getLumaAddr(absPartIdx), resiYuv.m_size, tsResiY, trSize);
>  
> @@ -3094,7 +3094,7 @@
>                          m_entropyCoder.codeCoeffNxN(cu, tsCoeffC, absPartIdxC, log2TrSizeC, (TextType)chromaId);
>                          singleBits[chromaId][tuIterator.section] = m_entropyCoder.getNumberOfWrittenBits();
>  
> -                        m_quant.invtransformNxN(cu.m_tqBypass[absPartIdxC], tsResiC, trSizeC, tsCoeffC,
> +                        m_quant.invtransformNxN(tsResiC, trSizeC, tsCoeffC,
>                                                  log2TrSizeC, (TextType)chromaId, false, true, numSigTSkipC);
>                          uint32_t dist = primitives.cu[partSizeC].sse_ss(resiYuv.getChromaAddr(chromaId, absPartIdxC), resiYuv.m_csize, tsResiC, trSizeC);
>                          nonZeroDistC = m_rdCost.scaleChromaDist(chromaId, dist);
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho