[x265] [PATCH 6 of 6 REV2] analysis: always configure quant QP directly after setting RD lambda

Mon Apr 27 13:34:58 CEST 2015

Agreed, and the averaging should be weighted by CU block size.

On Sun, Apr 26, 2015 at 11:17 PM, Steve Borho <steve at borho.org> wrote:

> On Sun, Apr 26, 2015 at 12:21 PM, Steve Borho <steve at borho.org> wrote:
> > # HG changeset patch
> > # User Steve Borho <steve at borho.org>
> > # Date 1429943995 18000
> > #      Sat Apr 25 01:39:55 2015 -0500
> > # Node ID 68a13226d586b335c02cade9311e093f0149c42a
> > # Parent  6a0a37c01cff03cadd44691a0fe447d17ec0b14f
> > analysis: always configure quant QP directly after setting RD lambda
> >
> > Basically, everywhere we adjust or assign QP we set quant QP
> immediately. This
> > removes a great many ad-hoc calls to setQPforQuant() and hopefully makes
> it
> > impossible to miss quant being configured properly.
> >
> > This patch fixes a layering violation where the frame encoder was
> setting the
> > RDO lambdas directly, but only when delta-QP was not enabled.
> >
> > diff -r 6a0a37c01cff -r 68a13226d586 source/common/quant.cpp
> > --- a/source/common/quant.cpp   Sat Apr 25 00:39:48 2015 -0500
> > +++ b/source/common/quant.cpp   Sat Apr 25 01:39:55 2015 -0500
> > @@ -225,16 +225,15 @@
> >      X265_FREE(m_fencShortBuf);
> >  }
> >
> > -void Quant::setQPforQuant(const CUData& cu)
> > +void Quant::setQPforQuant(const CUData& ctu, int qp)
> >  {
> > -    m_tqBypass = !!cu.m_tqBypass[0];
> > +    m_tqBypass = !!ctu.m_tqBypass[0];
> >      if (m_tqBypass)
> >          return;
> > -    m_nr = m_frameNr ? &m_frameNr[cu.m_encData->m_frameEncoderID] :
> NULL;
> > -    int qpy = cu.m_qp[0];
> > -    m_qpParam[TEXT_LUMA].setQpParam(qpy + QP_BD_OFFSET);
> > -    setChromaQP(qpy + cu.m_slice->m_pps->chromaQpOffset[0],
> TEXT_CHROMA_U, cu.m_chromaFormat);
> > -    setChromaQP(qpy + cu.m_slice->m_pps->chromaQpOffset[1],
> TEXT_CHROMA_V, cu.m_chromaFormat);
> > +    m_nr = m_frameNr ? &m_frameNr[ctu.m_encData->m_frameEncoderID] :
> NULL;
> > +    m_qpParam[TEXT_LUMA].setQpParam(qp + QP_BD_OFFSET);
> > +    setChromaQP(qp + ctu.m_slice->m_pps->chromaQpOffset[0],
> TEXT_CHROMA_U, ctu.m_chromaFormat);
> > +    setChromaQP(qp + ctu.m_slice->m_pps->chromaQpOffset[1],
> TEXT_CHROMA_V, ctu.m_chromaFormat);
> >  }
> >
> >  void Quant::setChromaQP(int qpin, TextType ttype, int chFmt)
> > diff -r 6a0a37c01cff -r 68a13226d586 source/common/quant.h
> > --- a/source/common/quant.h     Sat Apr 25 00:39:48 2015 -0500
> > +++ b/source/common/quant.h     Sat Apr 25 01:39:55 2015 -0500
> > @@ -103,7 +103,7 @@
> >      bool allocNoiseReduction(const x265_param& param);
> >
> >      /* CU setup */
> > -    void setQPforQuant(const CUData& cu);
> > +    void setQPforQuant(const CUData& ctu, int qp);
> >
> >      uint32_t transformNxN(const CUData& cu, const pixel* fenc, uint32_t
> fencStride, const int16_t* residual, uint32_t resiStride, coeff_t* coeff,
> >                            uint32_t log2TrSize, TextType ttype, uint32_t
> absPartIdx, bool useTransformSkip);
> > diff -r 6a0a37c01cff -r 68a13226d586 source/encoder/analysis.cpp
> > --- a/source/encoder/analysis.cpp       Sat Apr 25 00:39:48 2015 -0500
> > +++ b/source/encoder/analysis.cpp       Sat Apr 25 01:39:55 2015 -0500
> > @@ -146,18 +146,16 @@
> >
> >      if (m_slice->m_pps->bUseDQP)
> >      {
> > -        m_aqQP[0] = calculateQpforCuSize(ctu, cuGeom);
> > -        setLambdaFromQP(*m_slice, m_aqQP[0]);
> > -        m_aqQP[0] = x265_clip3(QP_MIN, QP_MAX_SPEC, m_aqQP[0]);
> > -        ctu.setQPSubParts((int8_t)m_aqQP[0], 0, 0);
> > +        m_aqQP[0] = setLambdaFromQP(ctu, calculateQpforCuSize(ctu,
> cuGeom));
> >
> >          if (m_slice->m_pps->maxCuDQPDepth)
> >              initAqQPs(1, ctu, &cuGeom + 1);
> >      }
> >      else
> > -        m_aqQP[0] = m_slice->m_sliceQp;
> > +        /* adaptive quant disabled, CTU QP is always slice QP, and
> within spec range */
> > +        m_aqQP[0] = setLambdaFromQP(ctu, m_slice->m_sliceQp);
> >
> > -    m_quant.setQPforQuant(ctu);
> > +    ctu.setQPSubParts((int8_t)m_aqQP[0], 0, 0);
> >      m_rqt[0].cur.load(initialContext);
> >      m_modeDepth[0].fencYuv.copyFromPicYuv(*m_frame->m_fencPic,
> ctu.m_cuAddr, 0);
> >
> > @@ -231,20 +229,24 @@
> >          return;
> >      else if (md.bestMode->cu.isIntra(0))
> >      {
> > +        m_quant.m_tqBypass = true;
> >          md.pred[PRED_LOSSLESS].initCosts();
> >          md.pred[PRED_LOSSLESS].cu.initLosslessCU(md.bestMode->cu,
> cuGeom);
> >          PartSize size =
> (PartSize)md.pred[PRED_LOSSLESS].cu.m_partSize[0];
> >          uint8_t* modes = md.pred[PRED_LOSSLESS].cu.m_lumaIntraDir;
> >          checkIntra(md.pred[PRED_LOSSLESS], cuGeom, size, modes, NULL);
> >          checkBestMode(md.pred[PRED_LOSSLESS], cuGeom.depth);
> > +        m_quant.m_tqBypass = false;
> >      }
> >      else
> >      {
> > +        m_quant.m_tqBypass = true;
> >          md.pred[PRED_LOSSLESS].initCosts();
> >          md.pred[PRED_LOSSLESS].cu.initLosslessCU(md.bestMode->cu,
> cuGeom);
> >
> md.pred[PRED_LOSSLESS].predYuv.copyFromYuv(md.bestMode->predYuv);
> >          encodeResAndCalcRdInterCU(md.pred[PRED_LOSSLESS], cuGeom);
> >          checkBestMode(md.pred[PRED_LOSSLESS], cuGeom.depth);
> > +        m_quant.m_tqBypass = false;
> >      }
> >  }
> >
> > @@ -269,7 +271,6 @@
> >              PartSize size = (PartSize)reusePartSizes[zOrder];
> >              Mode& mode = size == SIZE_2Nx2N ? md.pred[PRED_INTRA] :
> md.pred[PRED_INTRA_NxN];
> >              mode.cu.initSubCU(parentCTU, cuGeom, qp);
> > -            m_quant.setQPforQuant(mode.cu);
> >              checkIntra(mode, cuGeom, size, &reuseModes[zOrder],
> &reuseChromaModes[zOrder]);
> >              checkBestMode(mode, depth);
> >
> > @@ -287,7 +288,6 @@
> >      else if (mightNotSplit)
> >      {
> >          md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
> > -        m_quant.setQPforQuant(md.pred[PRED_INTRA].cu);
> >          checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL, NULL);
> >          checkBestMode(md.pred[PRED_INTRA], depth);
> >
> > @@ -327,11 +327,7 @@
> >                  m_rqt[nextDepth].cur.load(*nextContext);
> >
> >                  if (m_slice->m_pps->bUseDQP && nextDepth <=
> m_slice->m_pps->maxCuDQPDepth)
> > -                {
> > -                    nextQP = m_aqQP[childGeom.index];
> > -                    setLambdaFromQP(*m_slice, nextQP);
> > -                    nextQP = x265_clip3(QP_MIN, QP_MAX_SPEC, nextQP);
> > -                }
> > +                    nextQP = setLambdaFromQP(parentCTU,
> m_aqQP[childGeom.index]);
> >
> >                  compressIntraCU(parentCTU, childGeom, zOrder, nextQP);
> >
> > @@ -401,14 +397,9 @@
> >      {
> >          slave.m_slice = m_slice;
> >          slave.m_frame = m_frame;
> > -        slave.setLambdaFromQP(*m_slice, m_rdCost.m_qp);
> > +        slave.setLambdaFromQP(md.pred[PRED_2Nx2N].cu, m_rdCost.m_qp);
> >          slave.invalidateContexts(0);
> > -
> > -        if (m_param->rdLevel >= 5)
> > -        {
> > -
> slave.m_rqt[pmode.cuGeom.depth].cur.load(m_rqt[pmode.cuGeom.depth].cur);
> > -            slave.m_quant.setQPforQuant(md.pred[PRED_2Nx2N].cu);
> > -        }
> > +
> slave.m_rqt[pmode.cuGeom.depth].cur.load(m_rqt[pmode.cuGeom.depth].cur);
> >      }
> >
> >      /* perform Mode task, repeat until no more work is available */
> > @@ -419,11 +410,6 @@
> >              switch (pmode.modes[task])
> >              {
> >              case PRED_INTRA:
> > -                if (&slave != this)
> > -                {
> > -
> slave.m_rqt[pmode.cuGeom.depth].cur.load(m_rqt[pmode.cuGeom.depth].cur);
> > -                    slave.m_quant.setQPforQuant(md.pred[PRED_INTRA].cu);
> > -                }
> >                  slave.checkIntraInInter(md.pred[PRED_INTRA],
> pmode.cuGeom);
> >                  if (m_param->rdLevel > 2)
> >                      slave.encodeIntraInInter(md.pred[PRED_INTRA],
> pmode.cuGeom);
> > @@ -739,11 +725,7 @@
> >                  m_rqt[nextDepth].cur.load(*nextContext);
> >
> >                  if (m_slice->m_pps->bUseDQP && nextDepth <=
> m_slice->m_pps->maxCuDQPDepth)
> > -                {
> > -                    nextQP = m_aqQP[childGeom.index];
> > -                    setLambdaFromQP(*m_slice, nextQP);
> > -                    nextQP = x265_clip3(QP_MIN, QP_MAX_SPEC, nextQP);
> > -                }
> > +                    nextQP = setLambdaFromQP(parentCTU,
> m_aqQP[childGeom.index]);
> >
> >                  compressInterCU_dist(parentCTU, childGeom, nextQP);
> >
> > @@ -944,7 +926,6 @@
> >                      {
> >                          /* generate recon pixels with no rate
> distortion considerations */
> >                          CUData& cu = md.bestMode->cu;
> > -                        m_quant.setQPforQuant(cu);
> >
> >                          uint32_t tuDepthRange[2];
> >                          cu.getInterTUQtDepthRange(tuDepthRange, 0);
> > @@ -969,7 +950,6 @@
> >                      {
> >                          /* generate recon pixels with no rate
> distortion considerations */
> >                          CUData& cu = md.bestMode->cu;
> > -                        m_quant.setQPforQuant(cu);
> >
> >                          uint32_t tuDepthRange[2];
> >                          cu.getIntraTUQtDepthRange(tuDepthRange, 0);
> > @@ -1020,11 +1000,7 @@
> >                  m_rqt[nextDepth].cur.load(*nextContext);
> >
> >                  if (m_slice->m_pps->bUseDQP && nextDepth <=
> m_slice->m_pps->maxCuDQPDepth)
> > -                {
> > -                    nextQP = m_aqQP[childGeom.index];
> > -                    setLambdaFromQP(*m_slice, nextQP);
> > -                    nextQP = x265_clip3(QP_MIN, QP_MAX_SPEC, nextQP);
> > -                }
> > +                    nextQP = setLambdaFromQP(parentCTU,
> m_aqQP[childGeom.index]);
> >
> >                  compressInterCU_rd0_4(parentCTU, childGeom, nextQP);
> >
> > @@ -1228,11 +1204,7 @@
> >                  m_rqt[nextDepth].cur.load(*nextContext);
> >
> >                  if (m_slice->m_pps->bUseDQP && nextDepth <=
> m_slice->m_pps->maxCuDQPDepth)
> > -                {
> > -                    nextQP = m_aqQP[childGeom.index];
> > -                    setLambdaFromQP(*m_slice, nextQP);
> > -                    nextQP = x265_clip3(QP_MIN, QP_MAX_SPEC, nextQP);
> > -                }
> > +                    nextQP = setLambdaFromQP(parentCTU,
> m_aqQP[childGeom.index]);
> >
> >                  compressInterCU_rd5_6(parentCTU, childGeom, zOrder,
> nextQP);
> >
> > @@ -1758,7 +1730,6 @@
> >      CUData& cu = bestMode->cu;
> >
> >      cu.copyFromPic(ctu, cuGeom);
> > -    m_quant.setQPforQuant(cu);
> >
> >      Yuv& fencYuv = m_modeDepth[cuGeom.depth].fencYuv;
> >      if (cuGeom.depth)
> > diff -r 6a0a37c01cff -r 68a13226d586 source/encoder/frameencoder.cpp
> > --- a/source/encoder/frameencoder.cpp   Sat Apr 25 00:39:48 2015 -0500
> > +++ b/source/encoder/frameencoder.cpp   Sat Apr 25 01:39:55 2015 -0500
> > @@ -851,12 +851,11 @@
> >
> >          if (m_param->rc.aqMode || bIsVbv)
> >          {
> > +            X265_CHECK(slice->m_pps->bUseDQP, "adaptive quant in use
> without DQP\n");
> >              int qp = calcQpForCu(cuAddr,
> curEncData.m_cuStat[cuAddr].baseQp);
> >              qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
> >              curEncData.m_rowStat[row].sumQpAq += qp;
> >          }
> > -        else
> > -            tld.analysis.setLambdaFromQP(*slice, slice->m_sliceQp);
>
> Note that this change raises the question of whether calcQpForCu() is
> necessary any more in the frame encoder. The returned QP is only used
> to update the row sum QP. compressCTU() is free to use an entirely
> different QP (or QPs per CU).
>
> My guess is that rate control would work better if sumQpAq were
> incremented *after* the CTU was compressed, based on the average of
> the QPs actually used to code the block. And this would also be more
> work efficient.
>
> >          if (m_param->bEnableWavefront && !col && row)
> >          {
> > diff -r 6a0a37c01cff -r 68a13226d586 source/encoder/rdcost.h
> > --- a/source/encoder/rdcost.h   Sat Apr 25 00:39:48 2015 -0500
> > +++ b/source/encoder/rdcost.h   Sat Apr 25 01:39:55 2015 -0500
> > @@ -40,12 +40,13 @@
> >      uint32_t  m_chromaDistWeight[2];
> >      uint32_t  m_psyRdBase;
> >      uint32_t  m_psyRd;
> > -    int       m_qp;
> > +    int       m_qp; /* QP used to configure lambda, may be higher than
> QP_MAX_SPEC but <= QP_MAX_MAX */
> >
> >      void setPsyRdScale(double scale)                { m_psyRdBase =
> (uint32_t)floor(65536.0 * scale * 0.33); }
> >
> >      void setQP(const Slice& slice, int qp)
> >      {
> > +        x265_emms(); /* TODO: if the lambda tables were ints, this
> would not be necessary */
> >          m_qp = qp;
> >
> >          /* Scale PSY RD factor by a slice type factor */
> > diff -r 6a0a37c01cff -r 68a13226d586 source/encoder/search.cpp
> > --- a/source/encoder/search.cpp Sat Apr 25 00:39:48 2015 -0500
> > +++ b/source/encoder/search.cpp Sat Apr 25 01:39:55 2015 -0500
> > @@ -163,11 +163,16 @@
> >      X265_FREE(m_tsRecon);
> >  }
> >
> > -void Search::setLambdaFromQP(const Slice& slice, int qp)
> > +int Search::setLambdaFromQP(const CUData& ctu, int qp)
> >  {
> > -    x265_emms(); /* TODO: if the lambda tables were ints, this would
> not be necessary */
> > +    X265_CHECK(qp >= QP_MIN && qp <= QP_MAX_MAX, "QP used for lambda is
> out of range\n");
> > +
> >      m_me.setQP(qp);
> > -    m_rdCost.setQP(slice, qp);
> > +    m_rdCost.setQP(*m_slice, qp);
> > +
> > +    int quantQP = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
> > +    m_quant.setQPforQuant(ctu, quantQP);
> > +    return quantQP;
> >  }
> >
> >  #if CHECKED_BUILD || _DEBUG
> > @@ -1364,8 +1369,6 @@
> >      X265_CHECK(cu.m_partSize[0] == SIZE_2Nx2N, "encodeIntraInInter does
> not expect NxN intra\n");
> >      X265_CHECK(!m_slice->isIntra(), "encodeIntraInInter does not expect
> to be used in I slices\n");
> >
> > -    m_quant.setQPforQuant(cu);
> > -
> >      uint32_t tuDepthRange[2];
> >      cu.getIntraTUQtDepthRange(tuDepthRange, 0);
> >
> > @@ -1888,10 +1891,9 @@
> >      /* Setup slave Search instance for ME for master's CU */
> >      if (&slave != this)
> >      {
> > -        slave.setLambdaFromQP(*m_slice, m_rdCost.m_qp);
> >          slave.m_slice = m_slice;
> >          slave.m_frame = m_frame;
> > -
> > +        slave.setLambdaFromQP(pme.mode.cu, m_rdCost.m_qp);
> >          slave.m_me.setSourcePU(*pme.mode.fencYuv, pme.pu.ctuAddr,
> pme.pu.cuAbsPartIdx, pme.pu.puAbsPartIdx, pme.pu.width, pme.pu.height);
> >      }
> >
> > @@ -2523,9 +2525,6 @@
> >      uint32_t log2CUSize = cuGeom.log2CUSize;
> >      int sizeIdx = log2CUSize - 2;
> >
> > -    uint32_t tqBypass = cu.m_tqBypass[0];
> > -    m_quant.setQPforQuant(interMode.cu);
> > -
> >      resiYuv->subtract(*fencYuv, *predYuv, log2CUSize);
> >
> >      uint32_t tuDepthRange[2];
> > @@ -2536,6 +2535,7 @@
> >      Cost costs;
> >      estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs,
> tuDepthRange);
> >
> > +    uint32_t tqBypass = cu.m_tqBypass[0];
> >      if (!tqBypass)
> >      {
> >          uint32_t cbf0Dist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0],
> fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
> > diff -r 6a0a37c01cff -r 68a13226d586 source/encoder/search.h
> > --- a/source/encoder/search.h   Sat Apr 25 00:39:48 2015 -0500
> > +++ b/source/encoder/search.h   Sat Apr 25 01:39:55 2015 -0500
> > @@ -287,7 +287,7 @@
> >      ~Search();
> >
> >      bool     initSearch(const x265_param& param, ScalingList&
> scalingList);
> > -    void     setLambdaFromQP(const Slice& slice, int qp);
> > +    int      setLambdaFromQP(const CUData& ctu, int qp); /* returns
> real quant QP in valid spec range */
> >
> >      // mark temp RD entropy contexts as uninitialized; useful for
> finding loads without stores
> >      void     invalidateContexts(int fromDepth);
>
>
>
> --
> Steve Borho
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150427/74fb3102/attachment-0001.html>