[x265] [PATCH] psyrdoq: implementation of psyrdoq

Wed Jul 23 02:38:31 CEST 2014

Does it make sense to try this for DC coefficients?

On Tue, Jul 22, 2014 at 9:24 PM, Steve Borho <steve at borho.org> wrote:

> On 07/22, sumalatha at multicorewareinc.com wrote:
> > # HG changeset patch
> > # User Sumalatha Polureddy<sumalatha at multicorewareinc.com>
> > # Date 1406032149 -19800
> > # Node ID 37e03dcd2e4f0b5894880ff8c097bd6e11590459
> > # Parent  d303b4d860e9f06396a156726dd518d0f41fe796
> > psyrdoq: implementation of psyrdoq
> >
> > diff -r d303b4d860e9 -r 37e03dcd2e4f
> source/Lib/TLibCommon/TComTrQuant.cpp
> > --- a/source/Lib/TLibCommon/TComTrQuant.cpp   Mon Jul 21 22:43:38 2014
> -0500
> > +++ b/source/Lib/TLibCommon/TComTrQuant.cpp   Tue Jul 22 17:59:09 2014
> +0530
> > @@ -64,6 +64,8 @@
> >      return y + ((x - y) & ((x - y) >> (sizeof(int) * CHAR_BIT - 1)));
> // min(x, y)
> >  }
> >
> > +#define SIGN(x,y) ((x^(y >> 31))-(y >> 31))
> > +
> >  //
> ====================================================================================================================
> >  // TComTrQuant class member functions
> >  //
> ====================================================================================================================
> > @@ -307,6 +309,8 @@
> >  }
> >
> >  uint32_t TComTrQuant::transformNxN(TComDataCU* cu,
> > +                                   pixel*      fenc,
> > +                                   uint32_t    fencStride,
> >                                     int16_t*    residual,
> >                                     uint32_t    stride,
> >                                     coeff_t*    coeff,
> > @@ -316,10 +320,10 @@
> >                                     bool        useTransformSkip,
> >                                     bool        curUseRDOQ)
> >  {
> > +    int trSize = 1 << log2TrSize;
> >      if (cu->getCUTransquantBypass(absPartIdx))
> >      {
> >          uint32_t numSig = 0;
> > -        int trSize = 1 << log2TrSize;
> >          for (int k = 0; k < trSize; k++)
> >          {
> >              for (int j = 0; j < trSize; j++)
> > @@ -339,6 +343,12 @@
> >          const uint32_t sizeIdx = log2TrSize - 2;
> >          int useDST = (sizeIdx == 0 && ttype == TEXT_LUMA &&
> cu->getPredictionMode(absPartIdx) == MODE_INTRA);
> >          int index = DCT_4x4 + sizeIdx - useDST;
> > +        if (psyRdoqEnabled())
> > +        {
> > +            // converting pixel to int and putting in separate buffer
> to take dct
> > +            primitives.square_copy_ps[sizeIdx](m_tmpfencBuf,
> MAX_CU_SIZE, fenc, fencStride);
> > +            primitives.dct[index](m_tmpfencBuf, m_tmpfencCoeff, stride);
> > +        }
> >          primitives.dct[index](residual, m_tmpCoeff, stride);
> >          if (m_nr->bNoiseReduction)
> >          {
> > @@ -356,7 +366,7 @@
> >
> >      if (m_useRDOQ && curUseRDOQ)
> >      {
> > -        return xRateDistOptQuant(cu, m_tmpCoeff, coeff, log2TrSize,
> ttype, absPartIdx);
> > +        return xRateDistOptQuant(cu, m_tmpfencCoeff, m_tmpCoeff, coeff,
> log2TrSize, ttype, absPartIdx);
> >      }
> >      return xQuant(cu, m_tmpCoeff, coeff, log2TrSize, ttype, absPartIdx);
> >  }
> > @@ -505,7 +515,7 @@
> >   * Rate distortion optimized quantization for entropy
> >   * coding engines using probability models like CABAC
> >   */
> > -uint32_t TComTrQuant::xRateDistOptQuant(TComDataCU* cu, int32_t*
> srcCoeff, coeff_t* dstCoeff, uint32_t log2TrSize,
> > +uint32_t TComTrQuant::xRateDistOptQuant(TComDataCU* cu, int32_t*
> fencCoeff, int32_t* srcCoeff, coeff_t* dstCoeff, uint32_t log2TrSize,
> >                                          TextType ttype, uint32_t
> absPartIdx)
> >  {
> >      uint32_t trSize = 1 << log2TrSize;
> > @@ -614,7 +624,7 @@
> >                  {
> >                      level = xGetCodedLevel(costCoeff[scanPos],
> curCostSig, costSig[scanPos],
> >                                             levelDouble, maxAbsLevel,
> baseLevel, greaterOneBits, levelAbsBits, goRiceParam,
> > -                                           c1c2Idx, qbits, scaleFactor,
> 1);
> > +                                           c1c2Idx, qbits, scaleFactor,
> 1, srcCoeff[blkPos], fencCoeff[blkPos]);
> >                      sigRateDelta[blkPos] = 0;
> >                  }
> >                  else
> > @@ -631,7 +641,7 @@
> >                          curCostSig = xGetRateSigCoef(1, ctxSig);
> >                          level = xGetCodedLevel(costCoeff[scanPos],
> curCostSig, costSig[scanPos],
> >                                                 levelDouble,
> maxAbsLevel, baseLevel, greaterOneBits, levelAbsBits, goRiceParam,
> > -                                               c1c2Idx, qbits,
> scaleFactor, 0);
> > +                                               c1c2Idx, qbits,
> scaleFactor, 0, srcCoeff[blkPos], fencCoeff[blkPos]);
> >                      }
> >                      else
> >                      {
> > @@ -1126,7 +1136,9 @@
> >                                              uint32_t     c1c2Idx,
> >                                              int          qbits,
> >                                              double       scaleFactor,
> > -                                            bool         last) const
> > +                                            bool         last,
> > +                                            int          signCoef,
> > +                                            int          origCoef) const
> >  {
> >      uint32_t   bestAbsLevel = 0;
> >
> > @@ -1155,7 +1167,18 @@
> >      for (int absLevel = maxAbsLevel; absLevel >= minAbsLevel;
> absLevel--)
> >      {
> >          X265_CHECK(fabs((double)err2 - double(levelDouble  - (absLevel
> << qbits)) * double(levelDouble  - (absLevel << qbits)) * scaleFactor) <
> 1e-5, "err2 check failure\n");
> > -        double curCost = err2 + xGetICRateCost(absLevel, diffLevel,
> greaterOneBits, levelAbsBits, absGoRice, c1c2Idx);
> > +        double curCost = 0;
>
> in x264, psy-trellis is not applied to the DC coeff nor to chroma; we
> need to at least try to do that here to measure the effect.
>
> > +        if (psyRdoqEnabled())
> > +        {
> > +            int unquantAbsLevel = (maxAbsLevel << qbits);
> > +            int predictedCoef = origCoef - signCoef;
> > +            int psyValue = abs(unquantAbsLevel + SIGN(predictedCoef,
> signCoef)); // test with differnt cases to check whether SIGN is required
> > +            curCost = err2 + xGetICRateCost(absLevel, diffLevel,
> greaterOneBits, levelAbsBits, absGoRice, c1c2Idx) - (m_psyRdoqScale *
> psyValue * m_lambda);
> > +        }
> > +        else
> > +        {
> > +            curCost = err2 + xGetICRateCost(absLevel, diffLevel,
> greaterOneBits, levelAbsBits, absGoRice, c1c2Idx);
> > +        }
>
> style: no braces around single-line expressions
>
> >          curCost       += curCostSig;
> >
> >          if (curCost < bestCodedCost)
> > diff -r d303b4d860e9 -r 37e03dcd2e4f source/Lib/TLibCommon/TComTrQuant.h
> > --- a/source/Lib/TLibCommon/TComTrQuant.h     Mon Jul 21 22:43:38 2014
> -0500
> > +++ b/source/Lib/TLibCommon/TComTrQuant.h     Tue Jul 22 17:59:09 2014
> +0530
> > @@ -120,8 +120,18 @@
> >      // initialize class
> >      void init(bool useRDOQ);
> >
> > +    void setPsyRdoqScale(double scale)
> > +    {
> > +        m_psyRdoqScale = scale;
> > +    }
> > +
> > +    inline bool psyRdoqEnabled() const
> > +    {
> > +        return m_psyRdoqScale != 0;
> > +    }
>
> Don't use access methods for an internal member variable; I intend to
> remove the similar methods from RDCost. It may be faster to keep a bool
> for whether psy-rdoq is enabled, instead of a double comparison
>
> > +
> >      // transform & inverse transform functions
> > -    uint32_t transformNxN(TComDataCU* cu, int16_t* residual, uint32_t
> stride, coeff_t* coeff, uint32_t log2TrSize,
> > +    uint32_t transformNxN(TComDataCU* cu, pixel* fenc, uint32_t
> fencStride, int16_t* residual, uint32_t stride, coeff_t* coeff, uint32_t
> log2TrSize,
> >                            TextType ttype, uint32_t absPartIdx, bool
> useTransformSkip = false, bool curUseRDOQ = true);
> >
> >      void invtransformNxN(bool transQuantBypass, int16_t* residual,
> uint32_t stride, coeff_t* coeff, uint32_t log2TrSize, TextType ttype, bool
> bIntra, bool useTransformSkip, uint32_t numSig);
> > @@ -207,7 +217,11 @@
> >      bool     m_useRDOQ;
> >      bool     m_scalingListEnabledFlag;
> >
> > -    int32_t* m_tmpCoeff;
> > +    double   m_psyRdoqScale;
> > +    int32_t* m_tmpCoeff; // tmp buffer which stores the dct of residual
> > +    int32_t  m_tmpfencCoeff[MAX_CU_SIZE * MAX_CU_SIZE]; // tmp buffer
> which stores the dct of orig pix buffer
> > +    int16_t m_tmpfencBuf[MAX_CU_SIZE * MAX_CU_SIZE]; // tmp buffer
> which stores short of orig pixel
>
> These buffers should have better names so they don't require trailing
> comments, and use MAX_TS_SIZE.
>
> coeff_t m_resiDctCoeff[MAX_TS_SIZE * MAX_TS_SIZE];
> coeff_t m_fencDctCoeff[MAX_TS_SIZE * MAX_TS_SIZE];
> int16_t m_fencShortBuf[MAX_TS_SIZE * MAX_TS_SIZE];
>
> >      int32_t*
> m_quantCoef[ScalingList::NUM_SIZES][ScalingList::NUM_LISTS][ScalingList::NUM_REM];
>     ///< array of quantization matrix coefficient 4x4
> >      int32_t*
> m_dequantCoef[ScalingList::NUM_SIZES][ScalingList::NUM_LISTS][ScalingList::NUM_REM];
>   ///< array of dequantization matrix coefficient 4x4
> >
> > @@ -221,11 +235,11 @@
> >      uint32_t xQuant(TComDataCU* cu, int32_t* src, coeff_t* dst,
> uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx);
> >
> >      // RDOQ functions
> > -    uint32_t xRateDistOptQuant(TComDataCU* cu, int32_t* srcCoeff,
> coeff_t* dstCoeff, uint32_t log2TrSize, TextType ttype, uint32_t
> absPartIdx);
> > +    uint32_t xRateDistOptQuant(TComDataCU* cu, int32_t* fencCoeff,
> int32_t* srcCoeff, coeff_t* dstCoeff, uint32_t log2TrSize, TextType ttype,
> uint32_t absPartIdx);
> >
> >      inline uint32_t xGetCodedLevel(double& codedCost, const double
> curCostSig, double& codedCostSig, int levelDouble,
> >                                     uint32_t maxAbsLevel, uint32_t
> baseLevel, const int *greaterOneBits, const int *levelAbsBits, uint32_t
> absGoRice,
> > -                                   uint32_t c1c2Idx, int qbits, double
> scale, bool bLast) const;
> > +                                   uint32_t c1c2Idx, int qbits, double
> scale, bool bLast, int sign_coef, int orig_coef) const;
> >
> >      inline double xGetICRateCost(uint32_t absLevel, int32_t  diffLevel,
> const int *greaterOneBits, const int *levelAbsBits, uint32_t absGoRice,
> uint32_t c1c2Idx) const;
> >
> > diff -r d303b4d860e9 -r 37e03dcd2e4f
> source/Lib/TLibEncoder/TEncSearch.cpp
> > --- a/source/Lib/TLibEncoder/TEncSearch.cpp   Mon Jul 21 22:43:38 2014
> -0500
> > +++ b/source/Lib/TLibEncoder/TEncSearch.cpp   Tue Jul 22 17:59:09 2014
> +0530
> > @@ -438,7 +438,7 @@
>
> <snip TEncSearch diffs>
>
> > diff -r d303b4d860e9 -r 37e03dcd2e4f source/common/param.cpp
> > --- a/source/common/param.cpp Mon Jul 21 22:43:38 2014 -0500
> > +++ b/source/common/param.cpp Tue Jul 22 17:59:09 2014 +0530
> > @@ -163,6 +163,7 @@
> >      param->crQpOffset = 0;
> >      param->rdPenalty = 0;
> >      param->psyRd = 0.0;
> > +    param->psyRdoq = 0.5;
>
> The patch which is pushed should have the default be 0.0 - enabing the
> psycho-visual optimations by default should be a seperate patch
>
> >      param->bIntraInBFrames = 1;
> >      param->bLossless = 0;
> >      param->bCULossless = 0;
> > @@ -413,11 +414,13 @@
> >          {
> >              param->rc.aqStrength = 0.0;
> >              param->psyRd = 0.0;
> > +            param->psyRdoq = 0.0;
> >          }
> >          else if (!strcmp(tune, "ssim"))
> >          {
> >              param->rc.aqMode = X265_AQ_AUTO_VARIANCE;
> >              param->psyRd = 0.0;
> > +            param->psyRdoq = 0.0;
> >          }
> >          else if (!strcmp(tune, "fastdecode") ||
> >                   !strcmp(tune, "fast-decode"))
>
> later in this file are warnings that are emitted if psy-rd is enabled
> and SSIM or PSNR are being reported. those checks need to include
> psy-rdoq
>
> also, the psy-rdoq scale factor must be logged, if it is enabled.
>
> > diff -r d303b4d860e9 -r 37e03dcd2e4f source/encoder/analysis.cpp
> > --- a/source/encoder/analysis.cpp     Mon Jul 21 22:43:38 2014 -0500
> > +++ b/source/encoder/analysis.cpp     Tue Jul 22 17:59:09 2014 +0530
> > @@ -49,6 +49,7 @@
> >  {
> >      m_param = top->m_param;
> >      m_trQuant.init(top->m_bEnableRDOQ);
> > +    m_trQuant.setPsyRdoqScale(top->m_param->psyRdoq);
>
> nit: drop top->
>
> >      if (!top->m_scalingList.m_bEnabled)
> >      {
> > @@ -1874,7 +1875,7 @@
> >              primitives.chroma[m_param->internalCsp].sub_ps[part](dst,
> dststride, src1, src2, src1stride, src2stride);
> >
> >              // Residual encoding
> > -            residualTransformQuantInter(cu, 0, m_tmpResiYuv[depth],
> cu->getDepth(0), true);
> > +            residualTransformQuantInter(cu, 0, m_origYuv[0],
> m_tmpResiYuv[depth], cu->getDepth(0), true);
> >              checkDQP(cu);
> >
> >              if (lcu->getMergeFlag(absPartIdx) &&
> cu->getPartitionSize(0) == SIZE_2Nx2N && !cu->getQtRootCbf(0))
> > diff -r d303b4d860e9 -r 37e03dcd2e4f source/encoder/encoder.cpp
> > --- a/source/encoder/encoder.cpp      Mon Jul 21 22:43:38 2014 -0500
> > +++ b/source/encoder/encoder.cpp      Tue Jul 22 17:59:09 2014 +0530
> > @@ -1296,8 +1296,17 @@
> >          p->crQpOffset += 6;
> >      }
> >
> > -    // disable RDOQ if psy-rd is enabled; until we make it psy-aware
> > -    m_bEnableRDOQ = p->psyRd == 0.0 && p->rdLevel >= 4;
> > +    // disable RDOQ when rdlevel < 4
> > +    // disable RDOQ when psyrd is enabled and psyrdoq is disabled
> > +    if (p->rdLevel >= 4)
> > +    {
> > +        if (p->psyRd && !p->psyRdoq)
> > +            m_bEnableRDOQ = 0;
> > +        else
> > +            m_bEnableRDOQ = 1;
> > +    }
> > +    else
> > +        m_bEnableRDOQ = 0;
>
> conciseness is a virtue:
>
>     // disable RDOQ if rdlevel < 4 or if psy-rd is enabled without psy-rdoq
>     m_bEnableRDOQ = p->rdLevel >= 4 ? (p->psyRdoq || !p->psyRd) : 0;
>
> >      if (p->bLossless)
> >      {
> > diff -r d303b4d860e9 -r 37e03dcd2e4f source/x265.h
> > --- a/source/x265.h   Mon Jul 21 22:43:38 2014 -0500
> > +++ b/source/x265.h   Tue Jul 22 17:59:09 2014 +0530
> > @@ -613,6 +613,10 @@
> >       * energy of the source, at the cost of lost compression. Default
> 0.0 */
> >      double    psyRd;
> >
> > +    /* Psycho-visual rate-distortion opt quantize strength. Only has an
> effect in presets
> > +    * which use RDOQ. Default 0.0 */
>
> white-space
>
> > +    double    psyRdoq;
>
> And of course, the final patch must bump X265_BUILD, add the CLI option and
> param_parse support, and update doc/reST/cli.txt
>
> --
> Steve Borho
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140723/13998047/attachment-0001.html>