[x265] [PATCH 1 of 1] search: change psy-rd energy and cost measurement

Deepthi Nandakumar deepthi at multicorewareinc.com
Mon Oct 20 19:37:50 CEST 2014


Thanks Steve - will clean it up before pushing.

I wanted some feedback on the visual quality - I couldnt see any
improvement, and sometimes felt the existing psy-rd implementation was
marginally better. Would be good if Sumalatha/anyone else could test, and
see if it improves psy-rd.

On Mon, Oct 20, 2014 at 10:54 PM, Steve Borho <steve at borho.org> wrote:

> On 10/20, deepthi at multicorewareinc.com wrote:
> > # HG changeset patch
> > # User Deepthi Nandakumar <deepthi at multicorewareinc.com>
> > # Date 1413805233 -19800
> > #      Mon Oct 20 17:10:33 2014 +0530
> > # Node ID c9b80a61687aea02108cb41f26f19f1408b00d2f
> > # Parent  7eab67ffff81a44cc67c388dc4fcae2468979fae
> > search: change psy-rd energy and cost measurement.
> >
> > Psy-rd now calculates distortion and psyenergy as a function of source
> and recon,
> > as opposed to original residual and reconstructed residual.
>
> the logic looks sound, but I have some implementation nits
>
> > diff -r 7eab67ffff81 -r c9b80a61687a source/encoder/search.cpp
> > --- a/source/encoder/search.cpp       Mon Oct 20 15:37:50 2014 +0530
> > +++ b/source/encoder/search.cpp       Mon Oct 20 17:10:33 2014 +0530
> > @@ -2719,6 +2719,7 @@
> >  {
> >      TComDataCU* cu = &mode.cu;
> >      const Yuv* fencYuv = mode.fencYuv;
> > +    const Yuv* predYuv = &mode.predYuv;
> >
> >      X265_CHECK(cu->m_depth[0] == cu->m_depth[absPartIdx], "depth not
> matching\n");
> >      const uint32_t trMode = depth - cu->m_depth[0];
> > @@ -2796,6 +2797,7 @@
> >              m_entropyCoder.estBit(m_entropyCoder.m_estBitsSbac,
> log2TrSize, true);
> >
> >          pixel *fenc =
> const_cast<pixel*>(fencYuv->getLumaAddr(absPartIdx));
> > +        pixel *pred =
> const_cast<pixel*>(predYuv->getLumaAddr(absPartIdx));
>
> predYuv isn't const like fencYuv, so you don't need the casts.
>
> >          int16_t *resi = resiYuv->getLumaAddr(absPartIdx);
> >          numSigY = m_quant.transformNxN(cu, fenc, fencYuv->m_size, resi,
> resiYuv->m_size, coeffCurY, log2TrSize, TEXT_LUMA, absPartIdx, false);
> >
> > @@ -2863,10 +2865,18 @@
> >          }
> >
> >          X265_CHECK(log2TrSize <= 5, "log2TrSize is too large\n");
> > -        uint32_t distY =
> primitives.ssd_s[partSize](resiYuv->getLumaAddr(absPartIdx),
> resiYuv->m_size);
> > +        uint32_t distY;
> >          uint32_t psyEnergyY = 0;
> > +        /* When psy-rd is enabled, distortion and psyEnergy are
> measured against source, recon */
> >          if (m_rdCost.m_psyRd)
> > -            psyEnergyY = m_rdCost.psyCost(partSize,
> resiYuv->getLumaAddr(absPartIdx), resiYuv->m_size, (int16_t*)zeroShort, 0);
> > +        {
> > +            fenc = const_cast<pixel*>(fencYuv->getLumaAddr(absPartIdx));
> > +            pred = const_cast<pixel*>(predYuv->getLumaAddr(absPartIdx));
> > +            distY = primitives.sse_pp[partSize](fenc, fencYuv->m_size,
> pred, predYuv->m_size);
> > +            psyEnergyY = m_rdCost.psyCost(partSize, fenc,
> fencYuv->m_size, pred, predYuv->m_size);
> > +        }
> > +        else
> > +            distY =
> primitives.ssd_s[partSize](resiYuv->getLumaAddr(absPartIdx),
> resiYuv->m_size);
> >
> >          int16_t *curResiY =
> m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
> >          X265_CHECK(m_qtTempShortYuv[qtLayer].m_size == MAX_CU_SIZE,
> "width not full CU\n");
> > @@ -2880,10 +2890,21 @@
> >          {
> >
> m_quant.invtransformNxN(cu->m_cuTransquantBypass[absPartIdx], curResiY,
> strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSigY);
> //this is for inter mode only
> >
> > -            const uint32_t nonZeroDistY =
> primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx),
> resiYuv->m_size, curResiY, strideResiY);
> > +            uint32_t nonZeroDistY;
> >              uint32_t nonZeroPsyEnergyY = 0;
> >              if (m_rdCost.m_psyRd)
> > -                nonZeroPsyEnergyY = m_rdCost.psyCost(partSize,
> resiYuv->getLumaAddr(absPartIdx), resiYuv->m_size, curResiY, strideResiY);
> > +            {
> > +                ALIGN_VAR_32(pixel, tmpRecon[MAX_CU_SIZE *
> MAX_CU_SIZE]);
>
> tmpRecon probably wants to be a per-depth Yuv, but otherwise the logic
> looks sound.
>
> > +                uint32_t strideRecon = MAX_CU_SIZE;
> > +                //===== reconstruction =====
>
> I've been removing this style of comment. both for the = abuse and the
> not very helpful content. if it said something like "measure distortion
> and psy-energy with reconstructed pixels", I might be inclined to keep
> it
>
> > +                fenc =
> const_cast<pixel*>(fencYuv->getLumaAddr(absPartIdx));
> > +                pred =
> const_cast<pixel*>(predYuv->getLumaAddr(absPartIdx));
> > +                primitives.luma_add_ps[partSize](tmpRecon, strideRecon,
> pred, curResiY, predYuv->m_size, strideResiY);
> > +                nonZeroDistY = primitives.sse_pp[partSize](fenc,
> fencYuv->m_size, tmpRecon, strideRecon);
> > +                nonZeroPsyEnergyY = m_rdCost.psyCost(partSize, fenc,
> fencYuv->m_size, tmpRecon, strideRecon);
> > +            }
> > +            else
> > +                nonZeroDistY =
> primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx),
> resiYuv->m_size, curResiY, strideResiY);
> >
> >              if (cu->m_cuTransquantBypass[0])
> >              {
> > @@ -2956,19 +2977,41 @@
> >                  int16_t *curResiU =
> m_qtTempShortYuv[qtLayer].getCbAddr(absPartIdxC);
> >                  int16_t *curResiV =
> m_qtTempShortYuv[qtLayer].getCrAddr(absPartIdxC);
> >
> > -                distU =
> m_rdCost.scaleChromaDistCb(primitives.ssd_s[log2TrSizeC -
> 2](resiYuv->getCbAddr(absPartIdxC), resiYuv->m_csize));
> > -                if (outZeroDist)
> > +                 if(m_rdCost.m_psyRd)
> > +                {
> > +                    fenc =
> const_cast<pixel*>(fencYuv->getCbAddr(absPartIdxC));
> > +                    pred =
> const_cast<pixel*>(predYuv->getCbAddr(absPartIdxC));
> > +                    distU =
> m_rdCost.scaleChromaDistCb(primitives.sse_pp[partSizeC](fenc,
> fencYuv->m_csize,pred, predYuv->m_csize));
>
> a couple of w-s nits here
>
> > +                    psyEnergyU = m_rdCost.psyCost(partSizeC, fenc,
> fencYuv->m_csize, pred, predYuv->m_csize);
> > +                }
> > +                else
> > +                    distU =
> m_rdCost.scaleChromaDistCb(primitives.ssd_s[partSizeC](resiYuv->getCbAddr(absPartIdxC),
> resiYuv->m_csize));
> > +
> > +                 if (outZeroDist)
> >                      *outZeroDist += distU;
> >
> >                  if (numSigU[tuIterator.section])
> >                  {
> >
> m_quant.invtransformNxN(cu->m_cuTransquantBypass[absPartIdxC], curResiU,
> strideResiC, coeffCurU + subTUOffset,
> >                                              log2TrSizeC, TEXT_CHROMA_U,
> false, false, numSigU[tuIterator.section]);
> > -                    uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC),
> resiYuv->m_csize, curResiU, strideResiC);
> > -                    const uint32_t nonZeroDistU =
> m_rdCost.scaleChromaDistCb(dist);
> > +                    uint32_t nonZeroDistU;
> >                      uint32_t nonZeroPsyEnergyU = 0;
> >                      if (m_rdCost.m_psyRd)
> > -                        nonZeroPsyEnergyU = m_rdCost.psyCost(partSizeC,
> resiYuv->getCbAddr(absPartIdxC), resiYuv->m_csize, curResiU, strideResiC);
> > +                    {
> > +                        ALIGN_VAR_32(pixel, tmpReconU[MAX_CU_SIZE *
> MAX_CU_SIZE]);
> > +                        uint32_t strideReconC = MAX_CU_SIZE;
> > +                        //===== reconstruction =====
> > +                        fenc =
> const_cast<pixel*>(fencYuv->getCbAddr(absPartIdxC));
> > +                        pred =
> const_cast<pixel*>(predYuv->getCbAddr(absPartIdxC));
> > +                        primitives.luma_add_ps[partSizeC](tmpReconU,
> strideReconC, pred, curResiU, predYuv->m_csize, strideResiC);
> > +                        nonZeroDistU =
> m_rdCost.scaleChromaDistCb(primitives.sse_pp[partSizeC](fenc,
> fencYuv->m_csize, tmpReconU, strideReconC));
> > +                        nonZeroPsyEnergyU = m_rdCost.psyCost(partSizeC,
> fenc, fencYuv->m_csize, tmpReconU, strideReconC);
> > +                    }
> > +                    else
> > +                    {
> > +                        uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC),
> resiYuv->m_csize, curResiU, strideResiC);
> > +                        nonZeroDistU = m_rdCost.scaleChromaDistCb(dist);
> > +                    }
> >
> >                      if (cu->m_cuTransquantBypass[0])
> >                      {
> > @@ -3025,7 +3068,16 @@
> >                  if (!numSigU[tuIterator.section])
> >                      primitives.blockfill_s[partSizeC](curResiU,
> strideResiC, 0);
> >
> > -                distV =
> m_rdCost.scaleChromaDistCr(primitives.ssd_s[partSizeC](resiYuv->getCrAddr(absPartIdxC),
> resiYuv->m_csize));
> > +                if(m_rdCost.m_psyRd)
> > +                {
> > +                    fenc =
> const_cast<pixel*>(fencYuv->getCrAddr(absPartIdxC));
> > +                    pred =
> const_cast<pixel*>(predYuv->getCrAddr(absPartIdxC));
> > +                    distV =
> m_rdCost.scaleChromaDistCr(primitives.sse_pp[partSizeC](fenc,
> fencYuv->m_csize, pred, predYuv->m_csize));
> > +                    psyEnergyV = m_rdCost.psyCost(partSizeC, fenc,
> fencYuv->m_csize, pred, predYuv->m_csize);
> > +                }
> > +                else
> > +                    distV =
> m_rdCost.scaleChromaDistCr(primitives.ssd_s[partSizeC](resiYuv->getCrAddr(absPartIdxC),
> resiYuv->m_csize));
> > +
> >                  if (outZeroDist)
> >                      *outZeroDist += distV;
> >
> > @@ -3033,11 +3085,24 @@
> >                  {
> >
> m_quant.invtransformNxN(cu->m_cuTransquantBypass[absPartIdxC], curResiV,
> strideResiC, coeffCurV + subTUOffset,
> >                                              log2TrSizeC, TEXT_CHROMA_V,
> false, false, numSigV[tuIterator.section]);
> > -                    uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC),
> resiYuv->m_csize, curResiV, strideResiC);
> > -                    const uint32_t nonZeroDistV =
> m_rdCost.scaleChromaDistCr(dist);
> > +                    uint32_t nonZeroDistV;
> >                      uint32_t nonZeroPsyEnergyV = 0;
> >                      if (m_rdCost.m_psyRd)
> > -                        nonZeroPsyEnergyV = m_rdCost.psyCost(partSizeC,
> resiYuv->getCrAddr(absPartIdxC), resiYuv->m_csize, curResiV, strideResiC);
> > +                    {
> > +                        ALIGN_VAR_32(pixel, tmpReconV[MAX_CU_SIZE *
> MAX_CU_SIZE]);
> > +                        uint32_t strideReconC = MAX_CU_SIZE;
> > +                        fenc =
> const_cast<pixel*>(fencYuv->getCrAddr(absPartIdxC));
> > +                        pred =
> const_cast<pixel*>(predYuv->getCrAddr(absPartIdxC));
> > +                        //===== reconstruction =====
> > +                        primitives.luma_add_ps[partSizeC](tmpReconV,
> strideReconC, pred, curResiV, predYuv->m_csize, strideResiC);
> > +                        nonZeroDistV =
> m_rdCost.scaleChromaDistCr(primitives.sse_pp[partSizeC](fenc,
> fencYuv->m_csize, tmpReconV, strideReconC));
> > +                        nonZeroPsyEnergyV = m_rdCost.psyCost(partSizeC,
> fenc, fencYuv->m_csize, tmpReconV, strideReconC);
> > +                    }
> > +                    else
> > +                    {
> > +                        uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC),
> resiYuv->m_csize, curResiV, strideResiC);
> > +                        nonZeroDistV = m_rdCost.scaleChromaDistCr(dist);
> > +                    }
> >
> >                      if (cu->m_cuTransquantBypass[0])
> >                      {
> > @@ -3130,15 +3195,23 @@
> >
> >
> m_quant.invtransformNxN(cu->m_cuTransquantBypass[absPartIdx], tsResiY,
> trSize, tsCoeffY, log2TrSize, TEXT_LUMA, false, true, numSigTSkipY);
> >
> > -                nonZeroDistY =
> primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx),
> resiYuv->m_size, tsResiY, trSize);
> > -
> >                  if (m_rdCost.m_psyRd)
> >                  {
> > -                    nonZeroPsyEnergyY = m_rdCost.psyCost(partSize,
> resiYuv->getLumaAddr(absPartIdx), resiYuv->m_size, tsResiY, trSize);
> > +                    ALIGN_VAR_32(pixel, tmpRecon[MAX_CU_SIZE *
> MAX_CU_SIZE]);
> > +                    uint32_t strideRecon = MAX_CU_SIZE;
> > +                    //===== reconstruction =====
> > +                    fenc =
> const_cast<pixel*>(fencYuv->getLumaAddr(absPartIdx));
> > +                    pred =
> const_cast<pixel*>(predYuv->getLumaAddr(absPartIdx));
> > +                    primitives.luma_add_ps[partSize](tmpRecon,
> strideRecon, pred, tsResiY, predYuv->m_size, trSize);
> > +                    nonZeroDistY = primitives.sse_pp[partSize](fenc,
> fencYuv->m_size, tmpRecon, strideRecon);
> > +                    nonZeroPsyEnergyY = m_rdCost.psyCost(partSize,
> fenc, fencYuv->m_size, tmpRecon, strideRecon);
> >                      singleCostY = m_rdCost.calcPsyRdCost(nonZeroDistY,
> skipSingleBitsY, nonZeroPsyEnergyY);
> >                  }
> >                  else
> > +                {
> > +                    nonZeroDistY =
> primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx),
> resiYuv->m_size, tsResiY, trSize);
> >                      singleCostY = m_rdCost.calcRdCost(nonZeroDistY,
> skipSingleBitsY);
> > +                }
> >              }
> >
> >              if (!numSigTSkipY || minCost[TEXT_LUMA][0] < singleCostY)
> > @@ -3208,15 +3281,24 @@
> >
> >
> m_quant.invtransformNxN(cu->m_cuTransquantBypass[absPartIdxC], tsResiU,
> trSizeC, tsCoeffU,
> >                                              log2TrSizeC, TEXT_CHROMA_U,
> false, true, numSigTSkipU);
> > -                    uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC),
> resiYuv->m_csize, tsResiU, trSizeC);
> > -                    nonZeroDistU = m_rdCost.scaleChromaDistCb(dist);
> >                      if (m_rdCost.m_psyRd)
> >                      {
> > -                        nonZeroPsyEnergyU = m_rdCost.psyCost(partSizeC,
> resiYuv->getCbAddr(absPartIdxC), resiYuv->m_csize, tsResiU, trSizeC);
> > +                        ALIGN_VAR_32(pixel, tmpReconU[MAX_CU_SIZE *
> MAX_CU_SIZE]);
> > +                        uint32_t strideReconC = MAX_CU_SIZE;
> > +                        //===== reconstruction =====
> > +                        fenc =
> const_cast<pixel*>(fencYuv->getCbAddr(absPartIdxC));
> > +                        pred =
> const_cast<pixel*>(predYuv->getCbAddr(absPartIdxC));
> > +                        primitives.luma_add_ps[partSizeC](tmpReconU,
> strideReconC, pred, tsResiU, predYuv->m_csize, trSizeC);
> > +                        nonZeroDistU =
> m_rdCost.scaleChromaDistCb(primitives.sse_pp[partSizeC](fenc,
> fencYuv->m_csize, tmpReconU, strideReconC));
> > +                        nonZeroPsyEnergyU = m_rdCost.psyCost(partSizeC,
> fenc, fencYuv->m_csize, tmpReconU, strideReconC);
> >                          singleCostU =
> m_rdCost.calcPsyRdCost(nonZeroDistU,
> singleBitsComp[TEXT_CHROMA_U][tuIterator.section], nonZeroPsyEnergyU);
> >                      }
> >                      else
> > +                    {
> > +                        uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC),
> resiYuv->m_csize, tsResiU, trSizeC);
> > +                        nonZeroDistU = m_rdCost.scaleChromaDistCb(dist);
> >                          singleCostU = m_rdCost.calcRdCost(nonZeroDistU,
> singleBitsComp[TEXT_CHROMA_U][tuIterator.section]);
> > +                    }
> >                  }
> >
> >                  if (!numSigTSkipU ||
> minCost[TEXT_CHROMA_U][tuIterator.section] < singleCostU)
> > @@ -3239,15 +3321,24 @@
> >
> >
> m_quant.invtransformNxN(cu->m_cuTransquantBypass[absPartIdxC], tsResiV,
> trSizeC, tsCoeffV,
> >                                              log2TrSizeC, TEXT_CHROMA_V,
> false, true, numSigTSkipV);
> > -                    uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC),
> resiYuv->m_csize, tsResiV, trSizeC);
> > -                    nonZeroDistV = m_rdCost.scaleChromaDistCr(dist);
> >                      if (m_rdCost.m_psyRd)
> >                      {
> > -                        nonZeroPsyEnergyV = m_rdCost.psyCost(partSizeC,
> resiYuv->getCrAddr(absPartIdxC), resiYuv->m_csize, tsResiV, trSizeC);
> > +                        ALIGN_VAR_32(pixel, tmpReconV[MAX_CU_SIZE *
> MAX_CU_SIZE]);
> > +                        uint32_t strideReconC = MAX_CU_SIZE;
> > +                        //===== reconstruction =====
> > +                        fenc =
> const_cast<pixel*>(fencYuv->getCrAddr(absPartIdxC));
> > +                        pred =
> const_cast<pixel*>(predYuv->getCrAddr(absPartIdxC));
> > +                        primitives.luma_add_ps[partSizeC](tmpReconV,
> strideReconC, pred, tsResiV, predYuv->m_csize, trSizeC);
> > +                        nonZeroDistV =
> m_rdCost.scaleChromaDistCr(primitives.sse_pp[partSizeC](fenc,
> fencYuv->m_csize, tmpReconV, strideReconC));
> > +                        nonZeroPsyEnergyV = m_rdCost.psyCost(partSizeC,
> fenc, fencYuv->m_csize, tmpReconV, strideReconC);
> >                          singleCostV =
> m_rdCost.calcPsyRdCost(nonZeroDistV,
> singleBitsComp[TEXT_CHROMA_V][tuIterator.section], nonZeroPsyEnergyV);
> >                      }
> >                      else
> > +                    {
> > +                        uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC),
> resiYuv->m_csize, tsResiV, trSizeC);
> > +                        nonZeroDistV = m_rdCost.scaleChromaDistCr(dist);
> >                          singleCostV = m_rdCost.calcRdCost(nonZeroDistV,
> singleBitsComp[TEXT_CHROMA_V][tuIterator.section]);
> > +                    }
> >                  }
> >
> >                  if (!numSigTSkipV ||
> minCost[TEXT_CHROMA_V][tuIterator.section] < singleCostV)
> > _______________________________________________
> > x265-devel mailing list
> > x265-devel at videolan.org
> > https://mailman.videolan.org/listinfo/x265-devel
>
> --
> Steve Borho
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20141020/a54df2db/attachment-0001.html>


More information about the x265-devel mailing list