[x265] [PATCH 1 of 1] search: change psy-rd energy and cost measurement

Sumalatha Polureddy sumalatha at multicorewareinc.com
Tue Oct 21 13:18:14 CEST 2014


I tested this patch with few clips with below cli
I checked with differnt values of psy-rd 0.5, 1, 1.5, 2

x265.exe 720p50_parkrun_ter.y4m -o 720p50_parkrun_ter_medium_psy-rd.hevc
--bitrate 3000 --psy-rd 0.5

1. NebutaFestival -- I don't see any difference in visual quality with and
without this patch
2. Duckstakeoff -- the old psy-rd implementation is better than new
implementation
3. parkrun -- in some places the old implementation is visually better and
in some places new implementation is better

Regards
Sumalatha

On Mon, Oct 20, 2014 at 11:07 PM, Deepthi Nandakumar <
deepthi at multicorewareinc.com> wrote:

> Thanks Steve - will clean it up before pushing.
>
> I wanted some feedback on the visual quality - I couldnt see any
> improvement, and sometimes felt the existing psy-rd implementation was
> marginally better. Would be good if Sumalatha/anyone else could test, and
> see if it improves psy-rd.
>
> On Mon, Oct 20, 2014 at 10:54 PM, Steve Borho <steve at borho.org> wrote:
>
>> On 10/20, deepthi at multicorewareinc.com wrote:
>> > # HG changeset patch
>> > # User Deepthi Nandakumar <deepthi at multicorewareinc.com>
>> > # Date 1413805233 -19800
>> > #      Mon Oct 20 17:10:33 2014 +0530
>> > # Node ID c9b80a61687aea02108cb41f26f19f1408b00d2f
>> > # Parent  7eab67ffff81a44cc67c388dc4fcae2468979fae
>> > search: change psy-rd energy and cost measurement.
>> >
>> > Psy-rd now calculates distortion and psyenergy as a function of source
>> and recon,
>> > as opposed to original residual and reconstructed residual.
>>
>> the logic looks sound, but I have some implementation nits
>>
>> > diff -r 7eab67ffff81 -r c9b80a61687a source/encoder/search.cpp
>> > --- a/source/encoder/search.cpp       Mon Oct 20 15:37:50 2014 +0530
>> > +++ b/source/encoder/search.cpp       Mon Oct 20 17:10:33 2014 +0530
>> > @@ -2719,6 +2719,7 @@
>> >  {
>> >      TComDataCU* cu = &mode.cu;
>> >      const Yuv* fencYuv = mode.fencYuv;
>> > +    const Yuv* predYuv = &mode.predYuv;
>> >
>> >      X265_CHECK(cu->m_depth[0] == cu->m_depth[absPartIdx], "depth not
>> matching\n");
>> >      const uint32_t trMode = depth - cu->m_depth[0];
>> > @@ -2796,6 +2797,7 @@
>> >              m_entropyCoder.estBit(m_entropyCoder.m_estBitsSbac,
>> log2TrSize, true);
>> >
>> >          pixel *fenc =
>> const_cast<pixel*>(fencYuv->getLumaAddr(absPartIdx));
>> > +        pixel *pred =
>> const_cast<pixel*>(predYuv->getLumaAddr(absPartIdx));
>>
>> predYuv isn't const like fencYuv, so you don't need the casts.
>>
>> >          int16_t *resi = resiYuv->getLumaAddr(absPartIdx);
>> >          numSigY = m_quant.transformNxN(cu, fenc, fencYuv->m_size,
>> resi, resiYuv->m_size, coeffCurY, log2TrSize, TEXT_LUMA, absPartIdx, false);
>> >
>> > @@ -2863,10 +2865,18 @@
>> >          }
>> >
>> >          X265_CHECK(log2TrSize <= 5, "log2TrSize is too large\n");
>> > -        uint32_t distY =
>> primitives.ssd_s[partSize](resiYuv->getLumaAddr(absPartIdx),
>> resiYuv->m_size);
>> > +        uint32_t distY;
>> >          uint32_t psyEnergyY = 0;
>> > +        /* When psy-rd is enabled, distortion and psyEnergy are
>> measured against source, recon */
>> >          if (m_rdCost.m_psyRd)
>> > -            psyEnergyY = m_rdCost.psyCost(partSize,
>> resiYuv->getLumaAddr(absPartIdx), resiYuv->m_size, (int16_t*)zeroShort, 0);
>> > +        {
>> > +            fenc =
>> const_cast<pixel*>(fencYuv->getLumaAddr(absPartIdx));
>> > +            pred =
>> const_cast<pixel*>(predYuv->getLumaAddr(absPartIdx));
>> > +            distY = primitives.sse_pp[partSize](fenc, fencYuv->m_size,
>> pred, predYuv->m_size);
>> > +            psyEnergyY = m_rdCost.psyCost(partSize, fenc,
>> fencYuv->m_size, pred, predYuv->m_size);
>> > +        }
>> > +        else
>> > +            distY =
>> primitives.ssd_s[partSize](resiYuv->getLumaAddr(absPartIdx),
>> resiYuv->m_size);
>> >
>> >          int16_t *curResiY =
>> m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
>> >          X265_CHECK(m_qtTempShortYuv[qtLayer].m_size == MAX_CU_SIZE,
>> "width not full CU\n");
>> > @@ -2880,10 +2890,21 @@
>> >          {
>> >
>> m_quant.invtransformNxN(cu->m_cuTransquantBypass[absPartIdx], curResiY,
>> strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSigY);
>> //this is for inter mode only
>> >
>> > -            const uint32_t nonZeroDistY =
>> primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx),
>> resiYuv->m_size, curResiY, strideResiY);
>> > +            uint32_t nonZeroDistY;
>> >              uint32_t nonZeroPsyEnergyY = 0;
>> >              if (m_rdCost.m_psyRd)
>> > -                nonZeroPsyEnergyY = m_rdCost.psyCost(partSize,
>> resiYuv->getLumaAddr(absPartIdx), resiYuv->m_size, curResiY, strideResiY);
>> > +            {
>> > +                ALIGN_VAR_32(pixel, tmpRecon[MAX_CU_SIZE *
>> MAX_CU_SIZE]);
>>
>> tmpRecon probably wants to be a per-depth Yuv, but otherwise the logic
>> looks sound.
>>
>> > +                uint32_t strideRecon = MAX_CU_SIZE;
>> > +                //===== reconstruction =====
>>
>> I've been removing this style of comment. both for the = abuse and the
>> not very helpful content. if it said something like "measure distortion
>> and psy-energy with reconstructed pixels", I might be inclined to keep
>> it
>>
>> > +                fenc =
>> const_cast<pixel*>(fencYuv->getLumaAddr(absPartIdx));
>> > +                pred =
>> const_cast<pixel*>(predYuv->getLumaAddr(absPartIdx));
>> > +                primitives.luma_add_ps[partSize](tmpRecon,
>> strideRecon, pred, curResiY, predYuv->m_size, strideResiY);
>> > +                nonZeroDistY = primitives.sse_pp[partSize](fenc,
>> fencYuv->m_size, tmpRecon, strideRecon);
>> > +                nonZeroPsyEnergyY = m_rdCost.psyCost(partSize, fenc,
>> fencYuv->m_size, tmpRecon, strideRecon);
>> > +            }
>> > +            else
>> > +                nonZeroDistY =
>> primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx),
>> resiYuv->m_size, curResiY, strideResiY);
>> >
>> >              if (cu->m_cuTransquantBypass[0])
>> >              {
>> > @@ -2956,19 +2977,41 @@
>> >                  int16_t *curResiU =
>> m_qtTempShortYuv[qtLayer].getCbAddr(absPartIdxC);
>> >                  int16_t *curResiV =
>> m_qtTempShortYuv[qtLayer].getCrAddr(absPartIdxC);
>> >
>> > -                distU =
>> m_rdCost.scaleChromaDistCb(primitives.ssd_s[log2TrSizeC -
>> 2](resiYuv->getCbAddr(absPartIdxC), resiYuv->m_csize));
>> > -                if (outZeroDist)
>> > +                 if(m_rdCost.m_psyRd)
>> > +                {
>> > +                    fenc =
>> const_cast<pixel*>(fencYuv->getCbAddr(absPartIdxC));
>> > +                    pred =
>> const_cast<pixel*>(predYuv->getCbAddr(absPartIdxC));
>> > +                    distU =
>> m_rdCost.scaleChromaDistCb(primitives.sse_pp[partSizeC](fenc,
>> fencYuv->m_csize,pred, predYuv->m_csize));
>>
>> a couple of w-s nits here
>>
>> > +                    psyEnergyU = m_rdCost.psyCost(partSizeC, fenc,
>> fencYuv->m_csize, pred, predYuv->m_csize);
>> > +                }
>> > +                else
>> > +                    distU =
>> m_rdCost.scaleChromaDistCb(primitives.ssd_s[partSizeC](resiYuv->getCbAddr(absPartIdxC),
>> resiYuv->m_csize));
>> > +
>> > +                 if (outZeroDist)
>> >                      *outZeroDist += distU;
>> >
>> >                  if (numSigU[tuIterator.section])
>> >                  {
>> >
>> m_quant.invtransformNxN(cu->m_cuTransquantBypass[absPartIdxC], curResiU,
>> strideResiC, coeffCurU + subTUOffset,
>> >                                              log2TrSizeC,
>> TEXT_CHROMA_U, false, false, numSigU[tuIterator.section]);
>> > -                    uint32_t dist =
>> primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC),
>> resiYuv->m_csize, curResiU, strideResiC);
>> > -                    const uint32_t nonZeroDistU =
>> m_rdCost.scaleChromaDistCb(dist);
>> > +                    uint32_t nonZeroDistU;
>> >                      uint32_t nonZeroPsyEnergyU = 0;
>> >                      if (m_rdCost.m_psyRd)
>> > -                        nonZeroPsyEnergyU =
>> m_rdCost.psyCost(partSizeC, resiYuv->getCbAddr(absPartIdxC),
>> resiYuv->m_csize, curResiU, strideResiC);
>> > +                    {
>> > +                        ALIGN_VAR_32(pixel, tmpReconU[MAX_CU_SIZE *
>> MAX_CU_SIZE]);
>> > +                        uint32_t strideReconC = MAX_CU_SIZE;
>> > +                        //===== reconstruction =====
>> > +                        fenc =
>> const_cast<pixel*>(fencYuv->getCbAddr(absPartIdxC));
>> > +                        pred =
>> const_cast<pixel*>(predYuv->getCbAddr(absPartIdxC));
>> > +                        primitives.luma_add_ps[partSizeC](tmpReconU,
>> strideReconC, pred, curResiU, predYuv->m_csize, strideResiC);
>> > +                        nonZeroDistU =
>> m_rdCost.scaleChromaDistCb(primitives.sse_pp[partSizeC](fenc,
>> fencYuv->m_csize, tmpReconU, strideReconC));
>> > +                        nonZeroPsyEnergyU =
>> m_rdCost.psyCost(partSizeC, fenc, fencYuv->m_csize, tmpReconU,
>> strideReconC);
>> > +                    }
>> > +                    else
>> > +                    {
>> > +                        uint32_t dist =
>> primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC),
>> resiYuv->m_csize, curResiU, strideResiC);
>> > +                        nonZeroDistU =
>> m_rdCost.scaleChromaDistCb(dist);
>> > +                    }
>> >
>> >                      if (cu->m_cuTransquantBypass[0])
>> >                      {
>> > @@ -3025,7 +3068,16 @@
>> >                  if (!numSigU[tuIterator.section])
>> >                      primitives.blockfill_s[partSizeC](curResiU,
>> strideResiC, 0);
>> >
>> > -                distV =
>> m_rdCost.scaleChromaDistCr(primitives.ssd_s[partSizeC](resiYuv->getCrAddr(absPartIdxC),
>> resiYuv->m_csize));
>> > +                if(m_rdCost.m_psyRd)
>> > +                {
>> > +                    fenc =
>> const_cast<pixel*>(fencYuv->getCrAddr(absPartIdxC));
>> > +                    pred =
>> const_cast<pixel*>(predYuv->getCrAddr(absPartIdxC));
>> > +                    distV =
>> m_rdCost.scaleChromaDistCr(primitives.sse_pp[partSizeC](fenc,
>> fencYuv->m_csize, pred, predYuv->m_csize));
>> > +                    psyEnergyV = m_rdCost.psyCost(partSizeC, fenc,
>> fencYuv->m_csize, pred, predYuv->m_csize);
>> > +                }
>> > +                else
>> > +                    distV =
>> m_rdCost.scaleChromaDistCr(primitives.ssd_s[partSizeC](resiYuv->getCrAddr(absPartIdxC),
>> resiYuv->m_csize));
>> > +
>> >                  if (outZeroDist)
>> >                      *outZeroDist += distV;
>> >
>> > @@ -3033,11 +3085,24 @@
>> >                  {
>> >
>> m_quant.invtransformNxN(cu->m_cuTransquantBypass[absPartIdxC], curResiV,
>> strideResiC, coeffCurV + subTUOffset,
>> >                                              log2TrSizeC,
>> TEXT_CHROMA_V, false, false, numSigV[tuIterator.section]);
>> > -                    uint32_t dist =
>> primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC),
>> resiYuv->m_csize, curResiV, strideResiC);
>> > -                    const uint32_t nonZeroDistV =
>> m_rdCost.scaleChromaDistCr(dist);
>> > +                    uint32_t nonZeroDistV;
>> >                      uint32_t nonZeroPsyEnergyV = 0;
>> >                      if (m_rdCost.m_psyRd)
>> > -                        nonZeroPsyEnergyV =
>> m_rdCost.psyCost(partSizeC, resiYuv->getCrAddr(absPartIdxC),
>> resiYuv->m_csize, curResiV, strideResiC);
>> > +                    {
>> > +                        ALIGN_VAR_32(pixel, tmpReconV[MAX_CU_SIZE *
>> MAX_CU_SIZE]);
>> > +                        uint32_t strideReconC = MAX_CU_SIZE;
>> > +                        fenc =
>> const_cast<pixel*>(fencYuv->getCrAddr(absPartIdxC));
>> > +                        pred =
>> const_cast<pixel*>(predYuv->getCrAddr(absPartIdxC));
>> > +                        //===== reconstruction =====
>> > +                        primitives.luma_add_ps[partSizeC](tmpReconV,
>> strideReconC, pred, curResiV, predYuv->m_csize, strideResiC);
>> > +                        nonZeroDistV =
>> m_rdCost.scaleChromaDistCr(primitives.sse_pp[partSizeC](fenc,
>> fencYuv->m_csize, tmpReconV, strideReconC));
>> > +                        nonZeroPsyEnergyV =
>> m_rdCost.psyCost(partSizeC, fenc, fencYuv->m_csize, tmpReconV,
>> strideReconC);
>> > +                    }
>> > +                    else
>> > +                    {
>> > +                        uint32_t dist =
>> primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC),
>> resiYuv->m_csize, curResiV, strideResiC);
>> > +                        nonZeroDistV =
>> m_rdCost.scaleChromaDistCr(dist);
>> > +                    }
>> >
>> >                      if (cu->m_cuTransquantBypass[0])
>> >                      {
>> > @@ -3130,15 +3195,23 @@
>> >
>> >
>> m_quant.invtransformNxN(cu->m_cuTransquantBypass[absPartIdx], tsResiY,
>> trSize, tsCoeffY, log2TrSize, TEXT_LUMA, false, true, numSigTSkipY);
>> >
>> > -                nonZeroDistY =
>> primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx),
>> resiYuv->m_size, tsResiY, trSize);
>> > -
>> >                  if (m_rdCost.m_psyRd)
>> >                  {
>> > -                    nonZeroPsyEnergyY = m_rdCost.psyCost(partSize,
>> resiYuv->getLumaAddr(absPartIdx), resiYuv->m_size, tsResiY, trSize);
>> > +                    ALIGN_VAR_32(pixel, tmpRecon[MAX_CU_SIZE *
>> MAX_CU_SIZE]);
>> > +                    uint32_t strideRecon = MAX_CU_SIZE;
>> > +                    //===== reconstruction =====
>> > +                    fenc =
>> const_cast<pixel*>(fencYuv->getLumaAddr(absPartIdx));
>> > +                    pred =
>> const_cast<pixel*>(predYuv->getLumaAddr(absPartIdx));
>> > +                    primitives.luma_add_ps[partSize](tmpRecon,
>> strideRecon, pred, tsResiY, predYuv->m_size, trSize);
>> > +                    nonZeroDistY = primitives.sse_pp[partSize](fenc,
>> fencYuv->m_size, tmpRecon, strideRecon);
>> > +                    nonZeroPsyEnergyY = m_rdCost.psyCost(partSize,
>> fenc, fencYuv->m_size, tmpRecon, strideRecon);
>> >                      singleCostY = m_rdCost.calcPsyRdCost(nonZeroDistY,
>> skipSingleBitsY, nonZeroPsyEnergyY);
>> >                  }
>> >                  else
>> > +                {
>> > +                    nonZeroDistY =
>> primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx),
>> resiYuv->m_size, tsResiY, trSize);
>> >                      singleCostY = m_rdCost.calcRdCost(nonZeroDistY,
>> skipSingleBitsY);
>> > +                }
>> >              }
>> >
>> >              if (!numSigTSkipY || minCost[TEXT_LUMA][0] < singleCostY)
>> > @@ -3208,15 +3281,24 @@
>> >
>> >
>> m_quant.invtransformNxN(cu->m_cuTransquantBypass[absPartIdxC], tsResiU,
>> trSizeC, tsCoeffU,
>> >                                              log2TrSizeC,
>> TEXT_CHROMA_U, false, true, numSigTSkipU);
>> > -                    uint32_t dist =
>> primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC),
>> resiYuv->m_csize, tsResiU, trSizeC);
>> > -                    nonZeroDistU = m_rdCost.scaleChromaDistCb(dist);
>> >                      if (m_rdCost.m_psyRd)
>> >                      {
>> > -                        nonZeroPsyEnergyU =
>> m_rdCost.psyCost(partSizeC, resiYuv->getCbAddr(absPartIdxC),
>> resiYuv->m_csize, tsResiU, trSizeC);
>> > +                        ALIGN_VAR_32(pixel, tmpReconU[MAX_CU_SIZE *
>> MAX_CU_SIZE]);
>> > +                        uint32_t strideReconC = MAX_CU_SIZE;
>> > +                        //===== reconstruction =====
>> > +                        fenc =
>> const_cast<pixel*>(fencYuv->getCbAddr(absPartIdxC));
>> > +                        pred =
>> const_cast<pixel*>(predYuv->getCbAddr(absPartIdxC));
>> > +                        primitives.luma_add_ps[partSizeC](tmpReconU,
>> strideReconC, pred, tsResiU, predYuv->m_csize, trSizeC);
>> > +                        nonZeroDistU =
>> m_rdCost.scaleChromaDistCb(primitives.sse_pp[partSizeC](fenc,
>> fencYuv->m_csize, tmpReconU, strideReconC));
>> > +                        nonZeroPsyEnergyU =
>> m_rdCost.psyCost(partSizeC, fenc, fencYuv->m_csize, tmpReconU,
>> strideReconC);
>> >                          singleCostU =
>> m_rdCost.calcPsyRdCost(nonZeroDistU,
>> singleBitsComp[TEXT_CHROMA_U][tuIterator.section], nonZeroPsyEnergyU);
>> >                      }
>> >                      else
>> > +                    {
>> > +                        uint32_t dist =
>> primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC),
>> resiYuv->m_csize, tsResiU, trSizeC);
>> > +                        nonZeroDistU =
>> m_rdCost.scaleChromaDistCb(dist);
>> >                          singleCostU =
>> m_rdCost.calcRdCost(nonZeroDistU,
>> singleBitsComp[TEXT_CHROMA_U][tuIterator.section]);
>> > +                    }
>> >                  }
>> >
>> >                  if (!numSigTSkipU ||
>> minCost[TEXT_CHROMA_U][tuIterator.section] < singleCostU)
>> > @@ -3239,15 +3321,24 @@
>> >
>> >
>> m_quant.invtransformNxN(cu->m_cuTransquantBypass[absPartIdxC], tsResiV,
>> trSizeC, tsCoeffV,
>> >                                              log2TrSizeC,
>> TEXT_CHROMA_V, false, true, numSigTSkipV);
>> > -                    uint32_t dist =
>> primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC),
>> resiYuv->m_csize, tsResiV, trSizeC);
>> > -                    nonZeroDistV = m_rdCost.scaleChromaDistCr(dist);
>> >                      if (m_rdCost.m_psyRd)
>> >                      {
>> > -                        nonZeroPsyEnergyV =
>> m_rdCost.psyCost(partSizeC, resiYuv->getCrAddr(absPartIdxC),
>> resiYuv->m_csize, tsResiV, trSizeC);
>> > +                        ALIGN_VAR_32(pixel, tmpReconV[MAX_CU_SIZE *
>> MAX_CU_SIZE]);
>> > +                        uint32_t strideReconC = MAX_CU_SIZE;
>> > +                        //===== reconstruction =====
>> > +                        fenc =
>> const_cast<pixel*>(fencYuv->getCrAddr(absPartIdxC));
>> > +                        pred =
>> const_cast<pixel*>(predYuv->getCrAddr(absPartIdxC));
>> > +                        primitives.luma_add_ps[partSizeC](tmpReconV,
>> strideReconC, pred, tsResiV, predYuv->m_csize, trSizeC);
>> > +                        nonZeroDistV =
>> m_rdCost.scaleChromaDistCr(primitives.sse_pp[partSizeC](fenc,
>> fencYuv->m_csize, tmpReconV, strideReconC));
>> > +                        nonZeroPsyEnergyV =
>> m_rdCost.psyCost(partSizeC, fenc, fencYuv->m_csize, tmpReconV,
>> strideReconC);
>> >                          singleCostV =
>> m_rdCost.calcPsyRdCost(nonZeroDistV,
>> singleBitsComp[TEXT_CHROMA_V][tuIterator.section], nonZeroPsyEnergyV);
>> >                      }
>> >                      else
>> > +                    {
>> > +                        uint32_t dist =
>> primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC),
>> resiYuv->m_csize, tsResiV, trSizeC);
>> > +                        nonZeroDistV =
>> m_rdCost.scaleChromaDistCr(dist);
>> >                          singleCostV =
>> m_rdCost.calcRdCost(nonZeroDistV,
>> singleBitsComp[TEXT_CHROMA_V][tuIterator.section]);
>> > +                    }
>> >                  }
>> >
>> >                  if (!numSigTSkipV ||
>> minCost[TEXT_CHROMA_V][tuIterator.section] < singleCostV)
>> > _______________________________________________
>> > x265-devel mailing list
>> > x265-devel at videolan.org
>> > https://mailman.videolan.org/listinfo/x265-devel
>>
>> --
>> Steve Borho
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20141021/6a88f389/attachment-0001.html>


More information about the x265-devel mailing list