[x265] [PATCH 6 of 6 REV2] analysis: always configure quant QP directly after setting RD lambda

Aarthi Priya Thirumalai aarthi at multicorewareinc.com
Mon Apr 27 13:17:09 CEST 2015


We may remove the qp offset calculations from there but may need to use it
for collecting cu level intra and inter satdcosts
from lowres data for row vbv encodes.

On Sun, Apr 26, 2015 at 11:17 PM, Steve Borho <steve at borho.org> wrote:

> On Sun, Apr 26, 2015 at 12:21 PM, Steve Borho <steve at borho.org> wrote:
> > # HG changeset patch
> > # User Steve Borho <steve at borho.org>
> > # Date 1429943995 18000
> > #      Sat Apr 25 01:39:55 2015 -0500
> > # Node ID 68a13226d586b335c02cade9311e093f0149c42a
> > # Parent  6a0a37c01cff03cadd44691a0fe447d17ec0b14f
> > analysis: always configure quant QP directly after setting RD lambda
> >
> > Basically, everywhere we adjust or assign QP we set quant QP
> immediately. This
> > removes a great many ad-hoc calls to setQPforQuant() and hopefully makes
> it
> > impossible to miss quant being configured properly.
> >
> > This patch fixes a layering violation where the frame encoder was
> setting the
> > RDO lambdas directly, but only when delta-QP was not enabled.
> >
> > diff -r 6a0a37c01cff -r 68a13226d586 source/common/quant.cpp
> > --- a/source/common/quant.cpp   Sat Apr 25 00:39:48 2015 -0500
> > +++ b/source/common/quant.cpp   Sat Apr 25 01:39:55 2015 -0500
> > @@ -225,16 +225,15 @@
> >      X265_FREE(m_fencShortBuf);
> >  }
> >
> > -void Quant::setQPforQuant(const CUData& cu)
> > +void Quant::setQPforQuant(const CUData& ctu, int qp)
> >  {
> > -    m_tqBypass = !!cu.m_tqBypass[0];
> > +    m_tqBypass = !!ctu.m_tqBypass[0];
> >      if (m_tqBypass)
> >          return;
> > -    m_nr = m_frameNr ? &m_frameNr[cu.m_encData->m_frameEncoderID] :
> NULL;
> > -    int qpy = cu.m_qp[0];
> > -    m_qpParam[TEXT_LUMA].setQpParam(qpy + QP_BD_OFFSET);
> > -    setChromaQP(qpy + cu.m_slice->m_pps->chromaQpOffset[0],
> TEXT_CHROMA_U, cu.m_chromaFormat);
> > -    setChromaQP(qpy + cu.m_slice->m_pps->chromaQpOffset[1],
> TEXT_CHROMA_V, cu.m_chromaFormat);
> > +    m_nr = m_frameNr ? &m_frameNr[ctu.m_encData->m_frameEncoderID] :
> NULL;
> > +    m_qpParam[TEXT_LUMA].setQpParam(qp + QP_BD_OFFSET);
> > +    setChromaQP(qp + ctu.m_slice->m_pps->chromaQpOffset[0],
> TEXT_CHROMA_U, ctu.m_chromaFormat);
> > +    setChromaQP(qp + ctu.m_slice->m_pps->chromaQpOffset[1],
> TEXT_CHROMA_V, ctu.m_chromaFormat);
> >  }
> >
> >  void Quant::setChromaQP(int qpin, TextType ttype, int chFmt)
> > diff -r 6a0a37c01cff -r 68a13226d586 source/common/quant.h
> > --- a/source/common/quant.h     Sat Apr 25 00:39:48 2015 -0500
> > +++ b/source/common/quant.h     Sat Apr 25 01:39:55 2015 -0500
> > @@ -103,7 +103,7 @@
> >      bool allocNoiseReduction(const x265_param& param);
> >
> >      /* CU setup */
> > -    void setQPforQuant(const CUData& cu);
> > +    void setQPforQuant(const CUData& ctu, int qp);
> >
> >      uint32_t transformNxN(const CUData& cu, const pixel* fenc, uint32_t
> fencStride, const int16_t* residual, uint32_t resiStride, coeff_t* coeff,
> >                            uint32_t log2TrSize, TextType ttype, uint32_t
> absPartIdx, bool useTransformSkip);
> > diff -r 6a0a37c01cff -r 68a13226d586 source/encoder/analysis.cpp
> > --- a/source/encoder/analysis.cpp       Sat Apr 25 00:39:48 2015 -0500
> > +++ b/source/encoder/analysis.cpp       Sat Apr 25 01:39:55 2015 -0500
> > @@ -146,18 +146,16 @@
> >
> >      if (m_slice->m_pps->bUseDQP)
> >      {
> > -        m_aqQP[0] = calculateQpforCuSize(ctu, cuGeom);
> > -        setLambdaFromQP(*m_slice, m_aqQP[0]);
> > -        m_aqQP[0] = x265_clip3(QP_MIN, QP_MAX_SPEC, m_aqQP[0]);
> > -        ctu.setQPSubParts((int8_t)m_aqQP[0], 0, 0);
> > +        m_aqQP[0] = setLambdaFromQP(ctu, calculateQpforCuSize(ctu,
> cuGeom));
> >
> >          if (m_slice->m_pps->maxCuDQPDepth)
> >              initAqQPs(1, ctu, &cuGeom + 1);
> >      }
> >      else
> > -        m_aqQP[0] = m_slice->m_sliceQp;
> > +        /* adaptive quant disabled, CTU QP is always slice QP, and
> within spec range */
> > +        m_aqQP[0] = setLambdaFromQP(ctu, m_slice->m_sliceQp);
> >
> > -    m_quant.setQPforQuant(ctu);
> > +    ctu.setQPSubParts((int8_t)m_aqQP[0], 0, 0);
> >      m_rqt[0].cur.load(initialContext);
> >      m_modeDepth[0].fencYuv.copyFromPicYuv(*m_frame->m_fencPic,
> ctu.m_cuAddr, 0);
> >
> > @@ -231,20 +229,24 @@
> >          return;
> >      else if (md.bestMode->cu.isIntra(0))
> >      {
> > +        m_quant.m_tqBypass = true;
> >          md.pred[PRED_LOSSLESS].initCosts();
> >          md.pred[PRED_LOSSLESS].cu.initLosslessCU(md.bestMode->cu,
> cuGeom);
> >          PartSize size =
> (PartSize)md.pred[PRED_LOSSLESS].cu.m_partSize[0];
> >          uint8_t* modes = md.pred[PRED_LOSSLESS].cu.m_lumaIntraDir;
> >          checkIntra(md.pred[PRED_LOSSLESS], cuGeom, size, modes, NULL);
> >          checkBestMode(md.pred[PRED_LOSSLESS], cuGeom.depth);
> > +        m_quant.m_tqBypass = false;
> >      }
> >      else
> >      {
> > +        m_quant.m_tqBypass = true;
> >          md.pred[PRED_LOSSLESS].initCosts();
> >          md.pred[PRED_LOSSLESS].cu.initLosslessCU(md.bestMode->cu,
> cuGeom);
> >
> md.pred[PRED_LOSSLESS].predYuv.copyFromYuv(md.bestMode->predYuv);
> >          encodeResAndCalcRdInterCU(md.pred[PRED_LOSSLESS], cuGeom);
> >          checkBestMode(md.pred[PRED_LOSSLESS], cuGeom.depth);
> > +        m_quant.m_tqBypass = false;
> >      }
> >  }
> >
> > @@ -269,7 +271,6 @@
> >              PartSize size = (PartSize)reusePartSizes[zOrder];
> >              Mode& mode = size == SIZE_2Nx2N ? md.pred[PRED_INTRA] :
> md.pred[PRED_INTRA_NxN];
> >              mode.cu.initSubCU(parentCTU, cuGeom, qp);
> > -            m_quant.setQPforQuant(mode.cu);
> >              checkIntra(mode, cuGeom, size, &reuseModes[zOrder],
> &reuseChromaModes[zOrder]);
> >              checkBestMode(mode, depth);
> >
> > @@ -287,7 +288,6 @@
> >      else if (mightNotSplit)
> >      {
> >          md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
> > -        m_quant.setQPforQuant(md.pred[PRED_INTRA].cu);
> >          checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL, NULL);
> >          checkBestMode(md.pred[PRED_INTRA], depth);
> >
> > @@ -327,11 +327,7 @@
> >                  m_rqt[nextDepth].cur.load(*nextContext);
> >
> >                  if (m_slice->m_pps->bUseDQP && nextDepth <=
> m_slice->m_pps->maxCuDQPDepth)
> > -                {
> > -                    nextQP = m_aqQP[childGeom.index];
> > -                    setLambdaFromQP(*m_slice, nextQP);
> > -                    nextQP = x265_clip3(QP_MIN, QP_MAX_SPEC, nextQP);
> > -                }
> > +                    nextQP = setLambdaFromQP(parentCTU,
> m_aqQP[childGeom.index]);
> >
> >                  compressIntraCU(parentCTU, childGeom, zOrder, nextQP);
> >
> > @@ -401,14 +397,9 @@
> >      {
> >          slave.m_slice = m_slice;
> >          slave.m_frame = m_frame;
> > -        slave.setLambdaFromQP(*m_slice, m_rdCost.m_qp);
> > +        slave.setLambdaFromQP(md.pred[PRED_2Nx2N].cu, m_rdCost.m_qp);
> >          slave.invalidateContexts(0);
> > -
> > -        if (m_param->rdLevel >= 5)
> > -        {
> > -
> slave.m_rqt[pmode.cuGeom.depth].cur.load(m_rqt[pmode.cuGeom.depth].cur);
> > -            slave.m_quant.setQPforQuant(md.pred[PRED_2Nx2N].cu);
> > -        }
> > +
> slave.m_rqt[pmode.cuGeom.depth].cur.load(m_rqt[pmode.cuGeom.depth].cur);
> >      }
> >
> >      /* perform Mode task, repeat until no more work is available */
> > @@ -419,11 +410,6 @@
> >              switch (pmode.modes[task])
> >              {
> >              case PRED_INTRA:
> > -                if (&slave != this)
> > -                {
> > -
> slave.m_rqt[pmode.cuGeom.depth].cur.load(m_rqt[pmode.cuGeom.depth].cur);
> > -                    slave.m_quant.setQPforQuant(md.pred[PRED_INTRA].cu);
> > -                }
> >                  slave.checkIntraInInter(md.pred[PRED_INTRA],
> pmode.cuGeom);
> >                  if (m_param->rdLevel > 2)
> >                      slave.encodeIntraInInter(md.pred[PRED_INTRA],
> pmode.cuGeom);
> > @@ -739,11 +725,7 @@
> >                  m_rqt[nextDepth].cur.load(*nextContext);
> >
> >                  if (m_slice->m_pps->bUseDQP && nextDepth <=
> m_slice->m_pps->maxCuDQPDepth)
> > -                {
> > -                    nextQP = m_aqQP[childGeom.index];
> > -                    setLambdaFromQP(*m_slice, nextQP);
> > -                    nextQP = x265_clip3(QP_MIN, QP_MAX_SPEC, nextQP);
> > -                }
> > +                    nextQP = setLambdaFromQP(parentCTU,
> m_aqQP[childGeom.index]);
> >
> >                  compressInterCU_dist(parentCTU, childGeom, nextQP);
> >
> > @@ -944,7 +926,6 @@
> >                      {
> >                          /* generate recon pixels with no rate
> distortion considerations */
> >                          CUData& cu = md.bestMode->cu;
> > -                        m_quant.setQPforQuant(cu);
> >
> >                          uint32_t tuDepthRange[2];
> >                          cu.getInterTUQtDepthRange(tuDepthRange, 0);
> > @@ -969,7 +950,6 @@
> >                      {
> >                          /* generate recon pixels with no rate
> distortion considerations */
> >                          CUData& cu = md.bestMode->cu;
> > -                        m_quant.setQPforQuant(cu);
> >
> >                          uint32_t tuDepthRange[2];
> >                          cu.getIntraTUQtDepthRange(tuDepthRange, 0);
> > @@ -1020,11 +1000,7 @@
> >                  m_rqt[nextDepth].cur.load(*nextContext);
> >
> >                  if (m_slice->m_pps->bUseDQP && nextDepth <=
> m_slice->m_pps->maxCuDQPDepth)
> > -                {
> > -                    nextQP = m_aqQP[childGeom.index];
> > -                    setLambdaFromQP(*m_slice, nextQP);
> > -                    nextQP = x265_clip3(QP_MIN, QP_MAX_SPEC, nextQP);
> > -                }
> > +                    nextQP = setLambdaFromQP(parentCTU,
> m_aqQP[childGeom.index]);
> >
> >                  compressInterCU_rd0_4(parentCTU, childGeom, nextQP);
> >
> > @@ -1228,11 +1204,7 @@
> >                  m_rqt[nextDepth].cur.load(*nextContext);
> >
> >                  if (m_slice->m_pps->bUseDQP && nextDepth <=
> m_slice->m_pps->maxCuDQPDepth)
> > -                {
> > -                    nextQP = m_aqQP[childGeom.index];
> > -                    setLambdaFromQP(*m_slice, nextQP);
> > -                    nextQP = x265_clip3(QP_MIN, QP_MAX_SPEC, nextQP);
> > -                }
> > +                    nextQP = setLambdaFromQP(parentCTU,
> m_aqQP[childGeom.index]);
> >
> >                  compressInterCU_rd5_6(parentCTU, childGeom, zOrder,
> nextQP);
> >
> > @@ -1758,7 +1730,6 @@
> >      CUData& cu = bestMode->cu;
> >
> >      cu.copyFromPic(ctu, cuGeom);
> > -    m_quant.setQPforQuant(cu);
> >
> >      Yuv& fencYuv = m_modeDepth[cuGeom.depth].fencYuv;
> >      if (cuGeom.depth)
> > diff -r 6a0a37c01cff -r 68a13226d586 source/encoder/frameencoder.cpp
> > --- a/source/encoder/frameencoder.cpp   Sat Apr 25 00:39:48 2015 -0500
> > +++ b/source/encoder/frameencoder.cpp   Sat Apr 25 01:39:55 2015 -0500
> > @@ -851,12 +851,11 @@
> >
> >          if (m_param->rc.aqMode || bIsVbv)
> >          {
> > +            X265_CHECK(slice->m_pps->bUseDQP, "adaptive quant in use
> without DQP\n");
> >              int qp = calcQpForCu(cuAddr,
> curEncData.m_cuStat[cuAddr].baseQp);
> >              qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
> >              curEncData.m_rowStat[row].sumQpAq += qp;
> >          }
> > -        else
> > -            tld.analysis.setLambdaFromQP(*slice, slice->m_sliceQp);
>
> Note that this change raises the question of whether calcQpForCu() is
> necessary any more in the frame encoder. The returned QP is only used
> to update the row sum QP. compressCTU() is free to use an entirely
> different QP (or QPs per CU).
>
> My guess is that rate control would work better if sumQpAq were
> incremented *after* the CTU was compressed, based on the average of
> the QPs actually used to code the block. And this would also be more
> work efficient.
>
> >          if (m_param->bEnableWavefront && !col && row)
> >          {
> > diff -r 6a0a37c01cff -r 68a13226d586 source/encoder/rdcost.h
> > --- a/source/encoder/rdcost.h   Sat Apr 25 00:39:48 2015 -0500
> > +++ b/source/encoder/rdcost.h   Sat Apr 25 01:39:55 2015 -0500
> > @@ -40,12 +40,13 @@
> >      uint32_t  m_chromaDistWeight[2];
> >      uint32_t  m_psyRdBase;
> >      uint32_t  m_psyRd;
> > -    int       m_qp;
> > +    int       m_qp; /* QP used to configure lambda, may be higher than
> QP_MAX_SPEC but <= QP_MAX_MAX */
> >
> >      void setPsyRdScale(double scale)                { m_psyRdBase =
> (uint32_t)floor(65536.0 * scale * 0.33); }
> >
> >      void setQP(const Slice& slice, int qp)
> >      {
> > +        x265_emms(); /* TODO: if the lambda tables were ints, this
> would not be necessary */
> >          m_qp = qp;
> >
> >          /* Scale PSY RD factor by a slice type factor */
> > diff -r 6a0a37c01cff -r 68a13226d586 source/encoder/search.cpp
> > --- a/source/encoder/search.cpp Sat Apr 25 00:39:48 2015 -0500
> > +++ b/source/encoder/search.cpp Sat Apr 25 01:39:55 2015 -0500
> > @@ -163,11 +163,16 @@
> >      X265_FREE(m_tsRecon);
> >  }
> >
> > -void Search::setLambdaFromQP(const Slice& slice, int qp)
> > +int Search::setLambdaFromQP(const CUData& ctu, int qp)
> >  {
> > -    x265_emms(); /* TODO: if the lambda tables were ints, this would
> not be necessary */
> > +    X265_CHECK(qp >= QP_MIN && qp <= QP_MAX_MAX, "QP used for lambda is
> out of range\n");
> > +
> >      m_me.setQP(qp);
> > -    m_rdCost.setQP(slice, qp);
> > +    m_rdCost.setQP(*m_slice, qp);
> > +
> > +    int quantQP = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
> > +    m_quant.setQPforQuant(ctu, quantQP);
> > +    return quantQP;
> >  }
> >
> >  #if CHECKED_BUILD || _DEBUG
> > @@ -1364,8 +1369,6 @@
> >      X265_CHECK(cu.m_partSize[0] == SIZE_2Nx2N, "encodeIntraInInter does
> not expect NxN intra\n");
> >      X265_CHECK(!m_slice->isIntra(), "encodeIntraInInter does not expect
> to be used in I slices\n");
> >
> > -    m_quant.setQPforQuant(cu);
> > -
> >      uint32_t tuDepthRange[2];
> >      cu.getIntraTUQtDepthRange(tuDepthRange, 0);
> >
> > @@ -1888,10 +1891,9 @@
> >      /* Setup slave Search instance for ME for master's CU */
> >      if (&slave != this)
> >      {
> > -        slave.setLambdaFromQP(*m_slice, m_rdCost.m_qp);
> >          slave.m_slice = m_slice;
> >          slave.m_frame = m_frame;
> > -
> > +        slave.setLambdaFromQP(pme.mode.cu, m_rdCost.m_qp);
> >          slave.m_me.setSourcePU(*pme.mode.fencYuv, pme.pu.ctuAddr,
> pme.pu.cuAbsPartIdx, pme.pu.puAbsPartIdx, pme.pu.width, pme.pu.height);
> >      }
> >
> > @@ -2523,9 +2525,6 @@
> >      uint32_t log2CUSize = cuGeom.log2CUSize;
> >      int sizeIdx = log2CUSize - 2;
> >
> > -    uint32_t tqBypass = cu.m_tqBypass[0];
> > -    m_quant.setQPforQuant(interMode.cu);
> > -
> >      resiYuv->subtract(*fencYuv, *predYuv, log2CUSize);
> >
> >      uint32_t tuDepthRange[2];
> > @@ -2536,6 +2535,7 @@
> >      Cost costs;
> >      estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs,
> tuDepthRange);
> >
> > +    uint32_t tqBypass = cu.m_tqBypass[0];
> >      if (!tqBypass)
> >      {
> >          uint32_t cbf0Dist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0],
> fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
> > diff -r 6a0a37c01cff -r 68a13226d586 source/encoder/search.h
> > --- a/source/encoder/search.h   Sat Apr 25 00:39:48 2015 -0500
> > +++ b/source/encoder/search.h   Sat Apr 25 01:39:55 2015 -0500
> > @@ -287,7 +287,7 @@
> >      ~Search();
> >
> >      bool     initSearch(const x265_param& param, ScalingList&
> scalingList);
> > -    void     setLambdaFromQP(const Slice& slice, int qp);
> > +    int      setLambdaFromQP(const CUData& ctu, int qp); /* returns
> real quant QP in valid spec range */
> >
> >      // mark temp RD entropy contexts as uninitialized; useful for
> finding loads without stores
> >      void     invalidateContexts(int fromDepth);
>
>
>
> --
> Steve Borho
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150427/a55b7847/attachment-0001.html>


More information about the x265-devel mailing list