[x265] [PATCH] analysis: always configure quant QP directly after setting RD lambda

Sat Apr 25 08:59:36 CEST 2015

On Sat, Apr 25, 2015 at 1:50 AM, Steve Borho <steve at borho.org> wrote:
> # HG changeset patch
> # User Steve Borho <steve at borho.org>
> # Date 1429943995 18000
> #      Sat Apr 25 01:39:55 2015 -0500
> # Node ID 67b0c29c45e67da8045b720bc0d657a14f952c33
> # Parent  318aa6a4eaf2d58cf7d223bf92448d94cc439a5b
> analysis: always configure quant QP directly after setting RD lambda
>
> Basically, everywhere we adjust or assign QP we set quant QP immediately. This
> removes a great many ad-hoc calls to setQPforQuant() and hopefully makes it
> impossible to miss quant being configured properly.
>
> This patch fixes a layering violation where the frame encoder was setting the
> RDO lambdas directly, but only when delta-QP was not enabled.

this patch needs work for --cu-lossless, and changes outputs

> diff -r 318aa6a4eaf2 -r 67b0c29c45e6 source/common/quant.cpp
> --- a/source/common/quant.cpp   Sat Apr 25 00:41:25 2015 -0500
> +++ b/source/common/quant.cpp   Sat Apr 25 01:39:55 2015 -0500
> @@ -225,16 +225,15 @@
>      X265_FREE(m_fencShortBuf);
>  }
>
> -void Quant::setQPforQuant(const CUData& cu)
> +void Quant::setQPforQuant(const CUData& ctu, int qp)
>  {
> -    m_tqBypass = !!cu.m_tqBypass[0];
> +    m_tqBypass = !!ctu.m_tqBypass[0];
>      if (m_tqBypass)
>          return;
> -    m_nr = m_frameNr ? &m_frameNr[cu.m_encData->m_frameEncoderID] : NULL;
> -    int qpy = cu.m_qp[0];
> -    m_qpParam[TEXT_LUMA].setQpParam(qpy + QP_BD_OFFSET);
> -    setChromaQP(qpy + cu.m_slice->m_pps->chromaQpOffset[0], TEXT_CHROMA_U, cu.m_chromaFormat);
> -    setChromaQP(qpy + cu.m_slice->m_pps->chromaQpOffset[1], TEXT_CHROMA_V, cu.m_chromaFormat);
> +    m_nr = m_frameNr ? &m_frameNr[ctu.m_encData->m_frameEncoderID] : NULL;
> +    m_qpParam[TEXT_LUMA].setQpParam(qp + QP_BD_OFFSET);
> +    setChromaQP(qp + ctu.m_slice->m_pps->chromaQpOffset[0], TEXT_CHROMA_U, ctu.m_chromaFormat);
> +    setChromaQP(qp + ctu.m_slice->m_pps->chromaQpOffset[1], TEXT_CHROMA_V, ctu.m_chromaFormat);
>  }
>
>  void Quant::setChromaQP(int qpin, TextType ttype, int chFmt)
> diff -r 318aa6a4eaf2 -r 67b0c29c45e6 source/common/quant.h
> --- a/source/common/quant.h     Sat Apr 25 00:41:25 2015 -0500
> +++ b/source/common/quant.h     Sat Apr 25 01:39:55 2015 -0500
> @@ -103,7 +103,7 @@
>      bool allocNoiseReduction(const x265_param& param);
>
>      /* CU setup */
> -    void setQPforQuant(const CUData& cu);
> +    void setQPforQuant(const CUData& ctu, int qp);
>
>      uint32_t transformNxN(const CUData& cu, const pixel* fenc, uint32_t fencStride, const int16_t* residual, uint32_t resiStride, coeff_t* coeff,
>                            uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool useTransformSkip);
> diff -r 318aa6a4eaf2 -r 67b0c29c45e6 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp       Sat Apr 25 00:41:25 2015 -0500
> +++ b/source/encoder/analysis.cpp       Sat Apr 25 01:39:55 2015 -0500
> @@ -146,18 +146,16 @@
>
>      if (m_slice->m_pps->bUseDQP)
>      {
> -        m_aqQP[0] = calculateQpforCuSize(ctu, cuGeom);
> -        setLambdaFromQP(*m_slice, m_aqQP[0]);
> -        m_aqQP[0] = x265_clip3(QP_MIN, QP_MAX_SPEC, m_aqQP[0]);
> -        ctu.setQPSubParts((int8_t)m_aqQP[0], 0, 0);
> +        m_aqQP[0] = setLambdaFromQP(ctu, calculateQpforCuSize(ctu, cuGeom));
>
>          if (m_slice->m_pps->maxCuDQPDepth)
>              initAqQPs(1, ctu, &cuGeom + 1);
>      }
>      else
> -        m_aqQP[0] = m_slice->m_sliceQp;
> +        /* adaptive quant disabled, CTU QP is always slice QP, and within spec range */
> +        m_aqQP[0] = setLambdaFromQP(ctu, m_slice->m_sliceQp);
>
> -    m_quant.setQPforQuant(ctu);
> +    ctu.setQPSubParts((int8_t)m_aqQP[0], 0, 0);
>      m_rqt[0].cur.load(initialContext);
>      m_modeDepth[0].fencYuv.copyFromPicYuv(*m_frame->m_fencPic, ctu.m_cuAddr, 0);
>
> @@ -269,7 +267,6 @@
>              PartSize size = (PartSize)reusePartSizes[zOrder];
>              Mode& mode = size == SIZE_2Nx2N ? md.pred[PRED_INTRA] : md.pred[PRED_INTRA_NxN];
>              mode.cu.initSubCU(parentCTU, cuGeom, qp);
> -            m_quant.setQPforQuant(mode.cu);
>              checkIntra(mode, cuGeom, size, &reuseModes[zOrder], &reuseChromaModes[zOrder]);
>              checkBestMode(mode, depth);
>
> @@ -287,7 +284,6 @@
>      else if (mightNotSplit)
>      {
>          md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
> -        m_quant.setQPforQuant(md.pred[PRED_INTRA].cu);
>          checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL, NULL);
>          checkBestMode(md.pred[PRED_INTRA], depth);
>
> @@ -327,11 +323,7 @@
>                  m_rqt[nextDepth].cur.load(*nextContext);
>
>                  if (m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth)
> -                {
> -                    nextQP = m_aqQP[childGeom.index];
> -                    setLambdaFromQP(*m_slice, nextQP);
> -                    nextQP = x265_clip3(QP_MIN, QP_MAX_SPEC, nextQP);
> -                }
> +                    nextQP = setLambdaFromQP(parentCTU, m_aqQP[childGeom.index]);
>
>                  compressIntraCU(parentCTU, childGeom, zOrder, nextQP);
>
> @@ -400,14 +392,9 @@
>      {
>          slave.m_slice = m_slice;
>          slave.m_frame = m_frame;
> -        slave.setLambdaFromQP(*m_slice, m_rdCost.m_qp);
> +        slave.setLambdaFromQP(md.pred[PRED_2Nx2N].cu, m_rdCost.m_qp);
>          slave.invalidateContexts(0);
> -
> -        if (m_param->rdLevel >= 5)
> -        {
> -            slave.m_rqt[pmode.cuGeom.depth].cur.load(m_rqt[pmode.cuGeom.depth].cur);
> -            slave.m_quant.setQPforQuant(md.pred[PRED_2Nx2N].cu);
> -        }
> +        slave.m_rqt[pmode.cuGeom.depth].cur.load(m_rqt[pmode.cuGeom.depth].cur);
>      }
>
>      /* perform Mode task, repeat until no more work is available */
> @@ -418,11 +405,6 @@
>              switch (pmode.modes[task])
>              {
>              case PRED_INTRA:
> -                if (&slave != this)
> -                {
> -                    slave.m_rqt[pmode.cuGeom.depth].cur.load(m_rqt[pmode.cuGeom.depth].cur);
> -                    slave.m_quant.setQPforQuant(md.pred[PRED_INTRA].cu);
> -                }
>                  slave.checkIntraInInter(md.pred[PRED_INTRA], pmode.cuGeom);
>                  if (m_param->rdLevel > 2)
>                      slave.encodeIntraInInter(md.pred[PRED_INTRA], pmode.cuGeom);
> @@ -738,11 +720,7 @@
>                  m_rqt[nextDepth].cur.load(*nextContext);
>
>                  if (m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth)
> -                {
> -                    nextQP = m_aqQP[childGeom.index];
> -                    setLambdaFromQP(*m_slice, nextQP);
> -                    nextQP = x265_clip3(QP_MIN, QP_MAX_SPEC, nextQP);
> -                }
> +                    nextQP = setLambdaFromQP(parentCTU, m_aqQP[childGeom.index]);
>
>                  compressInterCU_dist(parentCTU, childGeom, nextQP);
>
> @@ -943,7 +921,6 @@
>                      {
>                          /* generate recon pixels with no rate distortion considerations */
>                          CUData& cu = md.bestMode->cu;
> -                        m_quant.setQPforQuant(cu);
>
>                          uint32_t tuDepthRange[2];
>                          cu.getInterTUQtDepthRange(tuDepthRange, 0);
> @@ -968,7 +945,6 @@
>                      {
>                          /* generate recon pixels with no rate distortion considerations */
>                          CUData& cu = md.bestMode->cu;
> -                        m_quant.setQPforQuant(cu);
>
>                          uint32_t tuDepthRange[2];
>                          cu.getIntraTUQtDepthRange(tuDepthRange, 0);
> @@ -1019,11 +995,7 @@
>                  m_rqt[nextDepth].cur.load(*nextContext);
>
>                  if (m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth)
> -                {
> -                    nextQP = m_aqQP[childGeom.index];
> -                    setLambdaFromQP(*m_slice, nextQP);
> -                    nextQP = x265_clip3(QP_MIN, QP_MAX_SPEC, nextQP);
> -                }
> +                    nextQP = setLambdaFromQP(parentCTU, m_aqQP[childGeom.index]);
>
>                  compressInterCU_rd0_4(parentCTU, childGeom, nextQP);
>
> @@ -1225,11 +1197,7 @@
>                  m_rqt[nextDepth].cur.load(*nextContext);
>
>                  if (m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth)
> -                {
> -                    nextQP = m_aqQP[childGeom.index];
> -                    setLambdaFromQP(*m_slice, nextQP);
> -                    nextQP = x265_clip3(QP_MIN, QP_MAX_SPEC, nextQP);
> -                }
> +                    nextQP = setLambdaFromQP(parentCTU, m_aqQP[childGeom.index]);
>
>                  compressInterCU_rd5_6(parentCTU, childGeom, zOrder, nextQP);
>
> @@ -1755,7 +1723,6 @@
>      CUData& cu = bestMode->cu;
>
>      cu.copyFromPic(ctu, cuGeom);
> -    m_quant.setQPforQuant(cu);
>
>      Yuv& fencYuv = m_modeDepth[cuGeom.depth].fencYuv;
>      if (cuGeom.depth)
> diff -r 318aa6a4eaf2 -r 67b0c29c45e6 source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp   Sat Apr 25 00:41:25 2015 -0500
> +++ b/source/encoder/frameencoder.cpp   Sat Apr 25 01:39:55 2015 -0500
> @@ -851,12 +851,11 @@
>
>          if (m_param->rc.aqMode || bIsVbv)
>          {
> +            X265_CHECK(slice->m_pps->bUseDQP, "adaptive quant in use without DQP\n");
>              int qp = calcQpForCu(cuAddr, curEncData.m_cuStat[cuAddr].baseQp);
>              qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
>              curEncData.m_rowStat[row].sumQpAq += qp;
>          }
> -        else
> -            tld.analysis.setLambdaFromQP(*slice, slice->m_sliceQp);
>
>          if (m_param->bEnableWavefront && !col && row)
>          {
> diff -r 318aa6a4eaf2 -r 67b0c29c45e6 source/encoder/rdcost.h
> --- a/source/encoder/rdcost.h   Sat Apr 25 00:41:25 2015 -0500
> +++ b/source/encoder/rdcost.h   Sat Apr 25 01:39:55 2015 -0500
> @@ -40,12 +40,13 @@
>      uint32_t  m_chromaDistWeight[2];
>      uint32_t  m_psyRdBase;
>      uint32_t  m_psyRd;
> -    int       m_qp;
> +    int       m_qp; /* QP used to configure lambda, may be higher than QP_MAX_SPEC but <= QP_MAX_MAX */
>
>      void setPsyRdScale(double scale)                { m_psyRdBase = (uint32_t)floor(65536.0 * scale * 0.33); }
>
>      void setQP(const Slice& slice, int qp)
>      {
> +        x265_emms(); /* TODO: if the lambda tables were ints, this would not be necessary */
>          m_qp = qp;
>
>          /* Scale PSY RD factor by a slice type factor */
> diff -r 318aa6a4eaf2 -r 67b0c29c45e6 source/encoder/search.cpp
> --- a/source/encoder/search.cpp Sat Apr 25 00:41:25 2015 -0500
> +++ b/source/encoder/search.cpp Sat Apr 25 01:39:55 2015 -0500
> @@ -163,11 +163,16 @@
>      X265_FREE(m_tsRecon);
>  }
>
> -void Search::setLambdaFromQP(const Slice& slice, int qp)
> +int Search::setLambdaFromQP(const CUData& ctu, int qp)
>  {
> -    x265_emms(); /* TODO: if the lambda tables were ints, this would not be necessary */
> +    X265_CHECK(qp >= QP_MIN && qp <= QP_MAX_MAX, "QP used for lambda is out of range\n");
> +
>      m_me.setQP(qp);
> -    m_rdCost.setQP(slice, qp);
> +    m_rdCost.setQP(*m_slice, qp);
> +
> +    int quantQP = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
> +    m_quant.setQPforQuant(ctu, quantQP);
> +    return quantQP;
>  }
>
>  #if CHECKED_BUILD || _DEBUG
> @@ -1364,8 +1369,6 @@
>      X265_CHECK(cu.m_partSize[0] == SIZE_2Nx2N, "encodeIntraInInter does not expect NxN intra\n");
>      X265_CHECK(!m_slice->isIntra(), "encodeIntraInInter does not expect to be used in I slices\n");
>
> -    m_quant.setQPforQuant(cu);
> -
>      uint32_t tuDepthRange[2];
>      cu.getIntraTUQtDepthRange(tuDepthRange, 0);
>
> @@ -1888,7 +1891,7 @@
>      /* Setup slave Search instance for ME for master's CU */
>      if (&slave != this)
>      {
> -        slave.setLambdaFromQP(*m_slice, m_rdCost.m_qp);
> +        slave.setLambdaFromQP(pme.mode.cu, m_rdCost.m_qp);
>          slave.m_slice = m_slice;
>          slave.m_frame = m_frame;
>
> @@ -2502,9 +2505,6 @@
>      uint32_t log2CUSize = cuGeom.log2CUSize;
>      int sizeIdx = log2CUSize - 2;
>
> -    uint32_t tqBypass = cu.m_tqBypass[0];
> -    m_quant.setQPforQuant(interMode.cu);
> -
>      resiYuv->subtract(*fencYuv, *predYuv, log2CUSize);
>
>      uint32_t tuDepthRange[2];
> @@ -2515,6 +2515,7 @@
>      Cost costs;
>      estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs, tuDepthRange);
>
> +    uint32_t tqBypass = cu.m_tqBypass[0];
>      if (!tqBypass)
>      {
>          uint32_t cbf0Dist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
> diff -r 318aa6a4eaf2 -r 67b0c29c45e6 source/encoder/search.h
> --- a/source/encoder/search.h   Sat Apr 25 00:41:25 2015 -0500
> +++ b/source/encoder/search.h   Sat Apr 25 01:39:55 2015 -0500
> @@ -287,7 +287,7 @@
>      ~Search();
>
>      bool     initSearch(const x265_param& param, ScalingList& scalingList);
> -    void     setLambdaFromQP(const Slice& slice, int qp);
> +    int      setLambdaFromQP(const CUData& ctu, int qp); /* returns real quant QP in valid spec range */
>
>      // mark temp RD entropy contexts as uninitialized; useful for finding loads without stores
>      void     invalidateContexts(int fromDepth);


-- 
Steve Borho