[x265] [PATCH] Quant: fix for decoding hash mismatch and non-deterministic output in multi-socket m/c

Steve Borho steve at borho.org
Wed Sep 2 20:26:13 CEST 2015


On 09/02, ashok at multicorewareinc.com wrote:
> # HG changeset patch
> # User Ashok Kumar Mishra<ashok at multicorewareinc.com>
> # Date 1441182836 -19800
> #      Wed Sep 02 14:03:56 2015 +0530
> # Node ID a0b361e07eb32c0b676437283273142fb77bd5fe
> # Parent  86e9bd7dd19278fceef65fc93a06dc8746ec9daf
> Quant: fix for decoding hash mismatch and non-deterministic output in multi-socket m/c

LGTM

> diff -r 86e9bd7dd192 -r a0b361e07eb3 source/common/quant.cpp
> --- a/source/common/quant.cpp	Tue Sep 01 17:06:05 2015 +0530
> +++ b/source/common/quant.cpp	Wed Sep 02 14:03:56 2015 +0530
> @@ -204,7 +204,6 @@
>      m_resiDctCoeff = X265_MALLOC(int16_t, MAX_TR_SIZE * MAX_TR_SIZE * 2);
>      m_fencDctCoeff = m_resiDctCoeff + (MAX_TR_SIZE * MAX_TR_SIZE);
>      m_fencShortBuf = X265_MALLOC(int16_t, MAX_TR_SIZE * MAX_TR_SIZE);
> -    m_tqBypass = false;
>  
>      return m_resiDctCoeff && m_fencShortBuf;
>  }
> @@ -228,8 +227,6 @@
>  
>  void Quant::setQPforQuant(const CUData& ctu, int qp)
>  {
> -    m_tqBypass = !!ctu.m_tqBypass[0];
> -
>      m_nr = m_frameNr ? &m_frameNr[ctu.m_encData->m_frameEncoderID] : NULL;
>      m_qpParam[TEXT_LUMA].setQpParam(qp + QP_BD_OFFSET);
>      setChromaQP(qp + ctu.m_slice->m_pps->chromaQpOffset[0], TEXT_CHROMA_U, ctu.m_chromaFormat);
> @@ -404,7 +401,8 @@
>                               coeff_t* coeff, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool useTransformSkip)
>  {
>      const uint32_t sizeIdx = log2TrSize - 2;
> -    if (m_tqBypass)
> +
> +    if (cu.m_tqBypass[0])
>      {
>          X265_CHECK(log2TrSize >= 2 && log2TrSize <= 5, "Block size mistake!\n");
>          return primitives.cu[sizeIdx].copy_cnt(coeff, residual, resiStride);
> @@ -484,11 +482,12 @@
>      }
>  }
>  
> -void Quant::invtransformNxN(int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
> +void Quant::invtransformNxN(const CUData& cu, int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
>                              uint32_t log2TrSize, TextType ttype, bool bIntra, bool useTransformSkip, uint32_t numSig)
>  {
>      const uint32_t sizeIdx = log2TrSize - 2;
> -    if (m_tqBypass)
> +
> +    if (cu.m_tqBypass[0])
>      {
>          primitives.cu[sizeIdx].cpy1Dto2D_shl(residual, coeff, resiStride, 0);
>          return;
> diff -r 86e9bd7dd192 -r a0b361e07eb3 source/common/quant.h
> --- a/source/common/quant.h	Tue Sep 01 17:06:05 2015 +0530
> +++ b/source/common/quant.h	Wed Sep 02 14:03:56 2015 +0530
> @@ -95,7 +95,6 @@
>  
>      NoiseReduction*    m_nr;
>      NoiseReduction*    m_frameNr; // Array of NR structures, one for each frameEncoder
> -    bool               m_tqBypass;
>  
>      Quant();
>      ~Quant();
> @@ -110,7 +109,7 @@
>      uint32_t transformNxN(const CUData& cu, const pixel* fenc, uint32_t fencStride, const int16_t* residual, uint32_t resiStride, coeff_t* coeff,
>                            uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool useTransformSkip);
>  
> -    void invtransformNxN(int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
> +    void invtransformNxN(const CUData& cu, int16_t* residual, uint32_t resiStride, const coeff_t* coeff,
>                           uint32_t log2TrSize, TextType ttype, bool bIntra, bool useTransformSkip, uint32_t numSig);
>  
>      /* Pattern decision for context derivation process of significant_coeff_flag */
> diff -r 86e9bd7dd192 -r a0b361e07eb3 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp	Tue Sep 01 17:06:05 2015 +0530
> +++ b/source/encoder/analysis.cpp	Wed Sep 02 14:03:56 2015 +0530
> @@ -209,24 +209,20 @@
>          return;
>      else if (md.bestMode->cu.isIntra(0))
>      {
> -        m_quant.m_tqBypass = true;
>          md.pred[PRED_LOSSLESS].initCosts();
>          md.pred[PRED_LOSSLESS].cu.initLosslessCU(md.bestMode->cu, cuGeom);
>          PartSize size = (PartSize)md.pred[PRED_LOSSLESS].cu.m_partSize[0];
>          uint8_t* modes = md.pred[PRED_LOSSLESS].cu.m_lumaIntraDir;
>          checkIntra(md.pred[PRED_LOSSLESS], cuGeom, size, modes, NULL);
>          checkBestMode(md.pred[PRED_LOSSLESS], cuGeom.depth);
> -        m_quant.m_tqBypass = false;
>      }
>      else
>      {
> -        m_quant.m_tqBypass = true;
>          md.pred[PRED_LOSSLESS].initCosts();
>          md.pred[PRED_LOSSLESS].cu.initLosslessCU(md.bestMode->cu, cuGeom);
>          md.pred[PRED_LOSSLESS].predYuv.copyFromYuv(md.bestMode->predYuv);
>          encodeResAndCalcRdInterCU(md.pred[PRED_LOSSLESS], cuGeom);
>          checkBestMode(md.pred[PRED_LOSSLESS], cuGeom.depth);
> -        m_quant.m_tqBypass = false;
>      }
>  }
>  
> diff -r 86e9bd7dd192 -r a0b361e07eb3 source/encoder/search.cpp
> --- a/source/encoder/search.cpp	Tue Sep 01 17:06:05 2015 +0530
> +++ b/source/encoder/search.cpp	Wed Sep 02 14:03:56 2015 +0530
> @@ -319,7 +319,7 @@
>          uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffY, log2TrSize, TEXT_LUMA, absPartIdx, false);
>          if (numSig)
>          {
> -            m_quant.invtransformNxN(residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
> +            m_quant.invtransformNxN(cu, residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
>              primitives.cu[sizeIdx].add_ps(reconQt, reconQtStride, pred, residual, stride, stride);
>          }
>          else
> @@ -518,7 +518,7 @@
>          uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeff, log2TrSize, TEXT_LUMA, absPartIdx, useTSkip);
>          if (numSig)
>          {
> -            m_quant.invtransformNxN(residual, stride, coeff, log2TrSize, TEXT_LUMA, true, useTSkip, numSig);
> +            m_quant.invtransformNxN(cu, residual, stride, coeff, log2TrSize, TEXT_LUMA, true, useTSkip, numSig);
>              primitives.cu[sizeIdx].add_ps(tmpRecon, tmpReconStride, pred, residual, stride, stride);
>          }
>          else if (useTSkip)
> @@ -670,7 +670,7 @@
>          uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffY, log2TrSize, TEXT_LUMA, absPartIdx, false);
>          if (numSig)
>          {
> -            m_quant.invtransformNxN(residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
> +            m_quant.invtransformNxN(cu, residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
>              primitives.cu[sizeIdx].add_ps(picReconY, picStride, pred, residual, stride, stride);
>              cu.setCbfSubParts(1 << tuDepth, TEXT_LUMA, absPartIdx, fullDepth);
>          }
> @@ -845,7 +845,7 @@
>              uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffC, log2TrSizeC, ttype, absPartIdxC, false);
>              if (numSig)
>              {
> -                m_quant.invtransformNxN(residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
> +                m_quant.invtransformNxN(cu, residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
>                  primitives.cu[sizeIdxC].add_ps(reconQt, reconQtStride, pred, residual, stride, stride);
>                  cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
>              }
> @@ -946,7 +946,7 @@
>                  uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeff, log2TrSizeC, ttype, absPartIdxC, useTSkip);
>                  if (numSig)
>                  {
> -                    m_quant.invtransformNxN(residual, stride, coeff, log2TrSizeC, ttype, true, useTSkip, numSig);
> +                    m_quant.invtransformNxN(cu, residual, stride, coeff, log2TrSizeC, ttype, true, useTSkip, numSig);
>                      primitives.cu[sizeIdxC].add_ps(recon, reconStride, pred, residual, stride, stride);
>                      cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
>                  }
> @@ -1135,7 +1135,7 @@
>              uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffC, log2TrSizeC, ttype, absPartIdxC, false);
>              if (numSig)
>              {
> -                m_quant.invtransformNxN(residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
> +                m_quant.invtransformNxN(cu, residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
>                  primitives.cu[sizeIdxC].add_ps(picReconC, picStride, pred, residual, stride, stride);
>                  cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
>              }
> @@ -1162,7 +1162,6 @@
>  
>      cu.setPartSizeSubParts(partSize);
>      cu.setPredModeSubParts(MODE_INTRA);
> -    m_quant.m_tqBypass = !!cu.m_tqBypass[0];
>  
>      uint32_t tuDepthRange[2];
>      cu.getIntraTUQtDepthRange(tuDepthRange, 0);
> @@ -2683,7 +2682,7 @@
>  
>          if (numSigY)
>          {
> -            m_quant.invtransformNxN(curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSigY);
> +            m_quant.invtransformNxN(cu, curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSigY);
>              cu.setCbfSubParts(setCbf, TEXT_LUMA, absPartIdx, depth);
>          }
>          else
> @@ -2716,7 +2715,7 @@
>                  uint32_t numSigU = m_quant.transformNxN(cu, fencCb, fencYuv->m_csize, curResiU, strideResiC, coeffCurU + subTUOffset, log2TrSizeC, TEXT_CHROMA_U, absPartIdxC, false);
>                  if (numSigU)
>                  {
> -                    m_quant.invtransformNxN(curResiU, strideResiC, coeffCurU + subTUOffset, log2TrSizeC, TEXT_CHROMA_U, false, false, numSigU);
> +                    m_quant.invtransformNxN(cu, curResiU, strideResiC, coeffCurU + subTUOffset, log2TrSizeC, TEXT_CHROMA_U, false, false, numSigU);
>                      cu.setCbfPartRange(setCbf, TEXT_CHROMA_U, absPartIdxC, tuIterator.absPartIdxStep);
>                  }
>                  else
> @@ -2730,7 +2729,7 @@
>                  uint32_t numSigV = m_quant.transformNxN(cu, fencCr, fencYuv->m_csize, curResiV, strideResiC, coeffCurV + subTUOffset, log2TrSizeC, TEXT_CHROMA_V, absPartIdxC, false);
>                  if (numSigV)
>                  {
> -                    m_quant.invtransformNxN(curResiV, strideResiC, coeffCurV + subTUOffset, log2TrSizeC, TEXT_CHROMA_V, false, false, numSigV);
> +                    m_quant.invtransformNxN(cu, curResiV, strideResiC, coeffCurV + subTUOffset, log2TrSizeC, TEXT_CHROMA_V, false, false, numSigV);
>                      cu.setCbfPartRange(setCbf, TEXT_CHROMA_V, absPartIdxC, tuIterator.absPartIdxStep);
>                  }
>                  else
> @@ -2876,7 +2875,7 @@
>  
>          if (cbfFlag[TEXT_LUMA][0])
>          {
> -            m_quant.invtransformNxN(curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSig[TEXT_LUMA][0]); //this is for inter mode only
> +            m_quant.invtransformNxN(cu, curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSig[TEXT_LUMA][0]); //this is for inter mode only
>  
>              // non-zero cost calculation for luma - This is an approximation
>              // finally we have to encode correct cbf after comparing with null cost
> @@ -2973,7 +2972,7 @@
>  
>                      if (cbfFlag[chromaId][tuIterator.section])
>                      {
> -                        m_quant.invtransformNxN(curResiC, strideResiC, coeffCurC + subTUOffset,
> +                        m_quant.invtransformNxN(cu, curResiC, strideResiC, coeffCurC + subTUOffset,
>                                                  log2TrSizeC, (TextType)chromaId, false, false, numSig[chromaId][tuIterator.section]);
>  
>                          // non-zero cost calculation for luma, same as luma - This is an approximation
> @@ -3062,7 +3061,7 @@
>                  m_entropyCoder.codeCoeffNxN(cu, m_tsCoeff, absPartIdx, log2TrSize, TEXT_LUMA);
>                  const uint32_t skipSingleBitsY = m_entropyCoder.getNumberOfWrittenBits();
>  
> -                m_quant.invtransformNxN(m_tsResidual, trSize, m_tsCoeff, log2TrSize, TEXT_LUMA, false, true, numSigTSkipY);
> +                m_quant.invtransformNxN(cu, m_tsResidual, trSize, m_tsCoeff, log2TrSize, TEXT_LUMA, false, true, numSigTSkipY);
>  
>                  nonZeroDistY = primitives.cu[partSize].sse_ss(resiYuv.getLumaAddr(absPartIdx), resiYuv.m_size, m_tsResidual, trSize);
>  
> @@ -3130,7 +3129,7 @@
>                          m_entropyCoder.codeCoeffNxN(cu, m_tsCoeff, absPartIdxC, log2TrSizeC, (TextType)chromaId);
>                          singleBits[chromaId][tuIterator.section] = m_entropyCoder.getNumberOfWrittenBits();
>  
> -                        m_quant.invtransformNxN(m_tsResidual, trSizeC, m_tsCoeff,
> +                        m_quant.invtransformNxN(cu, m_tsResidual, trSizeC, m_tsCoeff,
>                                                  log2TrSizeC, (TextType)chromaId, false, true, numSigTSkipC);
>                          uint32_t dist = primitives.cu[partSizeC].sse_ss(resiYuv.getChromaAddr(chromaId, absPartIdxC), resiYuv.m_csize, m_tsResidual, trSizeC);
>                          nonZeroDistC = m_rdCost.scaleChromaDist(chromaId, dist);
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho


More information about the x265-devel mailing list