[x265] [PATCH 1 of 1] search: change psy-rd energy and cost measurement
Deepthi Nandakumar
deepthi at multicorewareinc.com
Mon Oct 20 19:37:50 CEST 2014
Thanks Steve - will clean it up before pushing.
I wanted some feedback on the visual quality - I couldnt see any
improvement, and sometimes felt the existing psy-rd implementation was
marginally better. Would be good if Sumalatha/anyone else could test, and
see if it improves psy-rd.
On Mon, Oct 20, 2014 at 10:54 PM, Steve Borho <steve at borho.org> wrote:
> On 10/20, deepthi at multicorewareinc.com wrote:
> > # HG changeset patch
> > # User Deepthi Nandakumar <deepthi at multicorewareinc.com>
> > # Date 1413805233 -19800
> > # Mon Oct 20 17:10:33 2014 +0530
> > # Node ID c9b80a61687aea02108cb41f26f19f1408b00d2f
> > # Parent 7eab67ffff81a44cc67c388dc4fcae2468979fae
> > search: change psy-rd energy and cost measurement.
> >
> > Psy-rd now calculates distortion and psyenergy as a function of source
> and recon,
> > as opposed to original residual and reconstructed residual.
>
> the logic looks sound, but I have some implementation nits
>
> > diff -r 7eab67ffff81 -r c9b80a61687a source/encoder/search.cpp
> > --- a/source/encoder/search.cpp Mon Oct 20 15:37:50 2014 +0530
> > +++ b/source/encoder/search.cpp Mon Oct 20 17:10:33 2014 +0530
> > @@ -2719,6 +2719,7 @@
> > {
> > TComDataCU* cu = &mode.cu;
> > const Yuv* fencYuv = mode.fencYuv;
> > + const Yuv* predYuv = &mode.predYuv;
> >
> > X265_CHECK(cu->m_depth[0] == cu->m_depth[absPartIdx], "depth not
> matching\n");
> > const uint32_t trMode = depth - cu->m_depth[0];
> > @@ -2796,6 +2797,7 @@
> > m_entropyCoder.estBit(m_entropyCoder.m_estBitsSbac,
> log2TrSize, true);
> >
> > pixel *fenc =
> const_cast<pixel*>(fencYuv->getLumaAddr(absPartIdx));
> > + pixel *pred =
> const_cast<pixel*>(predYuv->getLumaAddr(absPartIdx));
>
> predYuv isn't const like fencYuv, so you don't need the casts.
>
> > int16_t *resi = resiYuv->getLumaAddr(absPartIdx);
> > numSigY = m_quant.transformNxN(cu, fenc, fencYuv->m_size, resi,
> resiYuv->m_size, coeffCurY, log2TrSize, TEXT_LUMA, absPartIdx, false);
> >
> > @@ -2863,10 +2865,18 @@
> > }
> >
> > X265_CHECK(log2TrSize <= 5, "log2TrSize is too large\n");
> > - uint32_t distY =
> primitives.ssd_s[partSize](resiYuv->getLumaAddr(absPartIdx),
> resiYuv->m_size);
> > + uint32_t distY;
> > uint32_t psyEnergyY = 0;
> > + /* When psy-rd is enabled, distortion and psyEnergy are
> measured against source, recon */
> > if (m_rdCost.m_psyRd)
> > - psyEnergyY = m_rdCost.psyCost(partSize,
> resiYuv->getLumaAddr(absPartIdx), resiYuv->m_size, (int16_t*)zeroShort, 0);
> > + {
> > + fenc = const_cast<pixel*>(fencYuv->getLumaAddr(absPartIdx));
> > + pred = const_cast<pixel*>(predYuv->getLumaAddr(absPartIdx));
> > + distY = primitives.sse_pp[partSize](fenc, fencYuv->m_size,
> pred, predYuv->m_size);
> > + psyEnergyY = m_rdCost.psyCost(partSize, fenc,
> fencYuv->m_size, pred, predYuv->m_size);
> > + }
> > + else
> > + distY =
> primitives.ssd_s[partSize](resiYuv->getLumaAddr(absPartIdx),
> resiYuv->m_size);
> >
> > int16_t *curResiY =
> m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
> > X265_CHECK(m_qtTempShortYuv[qtLayer].m_size == MAX_CU_SIZE,
> "width not full CU\n");
> > @@ -2880,10 +2890,21 @@
> > {
> >
> m_quant.invtransformNxN(cu->m_cuTransquantBypass[absPartIdx], curResiY,
> strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSigY);
> //this is for inter mode only
> >
> > - const uint32_t nonZeroDistY =
> primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx),
> resiYuv->m_size, curResiY, strideResiY);
> > + uint32_t nonZeroDistY;
> > uint32_t nonZeroPsyEnergyY = 0;
> > if (m_rdCost.m_psyRd)
> > - nonZeroPsyEnergyY = m_rdCost.psyCost(partSize,
> resiYuv->getLumaAddr(absPartIdx), resiYuv->m_size, curResiY, strideResiY);
> > + {
> > + ALIGN_VAR_32(pixel, tmpRecon[MAX_CU_SIZE *
> MAX_CU_SIZE]);
>
> tmpRecon probably wants to be a per-depth Yuv, but otherwise the logic
> looks sound.
>
> > + uint32_t strideRecon = MAX_CU_SIZE;
> > + //===== reconstruction =====
>
> I've been removing this style of comment. both for the = abuse and the
> not very helpful content. if it said something like "measure distortion
> and psy-energy with reconstructed pixels", I might be inclined to keep
> it
>
> > + fenc =
> const_cast<pixel*>(fencYuv->getLumaAddr(absPartIdx));
> > + pred =
> const_cast<pixel*>(predYuv->getLumaAddr(absPartIdx));
> > + primitives.luma_add_ps[partSize](tmpRecon, strideRecon,
> pred, curResiY, predYuv->m_size, strideResiY);
> > + nonZeroDistY = primitives.sse_pp[partSize](fenc,
> fencYuv->m_size, tmpRecon, strideRecon);
> > + nonZeroPsyEnergyY = m_rdCost.psyCost(partSize, fenc,
> fencYuv->m_size, tmpRecon, strideRecon);
> > + }
> > + else
> > + nonZeroDistY =
> primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx),
> resiYuv->m_size, curResiY, strideResiY);
> >
> > if (cu->m_cuTransquantBypass[0])
> > {
> > @@ -2956,19 +2977,41 @@
> > int16_t *curResiU =
> m_qtTempShortYuv[qtLayer].getCbAddr(absPartIdxC);
> > int16_t *curResiV =
> m_qtTempShortYuv[qtLayer].getCrAddr(absPartIdxC);
> >
> > - distU =
> m_rdCost.scaleChromaDistCb(primitives.ssd_s[log2TrSizeC -
> 2](resiYuv->getCbAddr(absPartIdxC), resiYuv->m_csize));
> > - if (outZeroDist)
> > + if(m_rdCost.m_psyRd)
> > + {
> > + fenc =
> const_cast<pixel*>(fencYuv->getCbAddr(absPartIdxC));
> > + pred =
> const_cast<pixel*>(predYuv->getCbAddr(absPartIdxC));
> > + distU =
> m_rdCost.scaleChromaDistCb(primitives.sse_pp[partSizeC](fenc,
> fencYuv->m_csize,pred, predYuv->m_csize));
>
> a couple of w-s nits here
>
> > + psyEnergyU = m_rdCost.psyCost(partSizeC, fenc,
> fencYuv->m_csize, pred, predYuv->m_csize);
> > + }
> > + else
> > + distU =
> m_rdCost.scaleChromaDistCb(primitives.ssd_s[partSizeC](resiYuv->getCbAddr(absPartIdxC),
> resiYuv->m_csize));
> > +
> > + if (outZeroDist)
> > *outZeroDist += distU;
> >
> > if (numSigU[tuIterator.section])
> > {
> >
> m_quant.invtransformNxN(cu->m_cuTransquantBypass[absPartIdxC], curResiU,
> strideResiC, coeffCurU + subTUOffset,
> > log2TrSizeC, TEXT_CHROMA_U,
> false, false, numSigU[tuIterator.section]);
> > - uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC),
> resiYuv->m_csize, curResiU, strideResiC);
> > - const uint32_t nonZeroDistU =
> m_rdCost.scaleChromaDistCb(dist);
> > + uint32_t nonZeroDistU;
> > uint32_t nonZeroPsyEnergyU = 0;
> > if (m_rdCost.m_psyRd)
> > - nonZeroPsyEnergyU = m_rdCost.psyCost(partSizeC,
> resiYuv->getCbAddr(absPartIdxC), resiYuv->m_csize, curResiU, strideResiC);
> > + {
> > + ALIGN_VAR_32(pixel, tmpReconU[MAX_CU_SIZE *
> MAX_CU_SIZE]);
> > + uint32_t strideReconC = MAX_CU_SIZE;
> > + //===== reconstruction =====
> > + fenc =
> const_cast<pixel*>(fencYuv->getCbAddr(absPartIdxC));
> > + pred =
> const_cast<pixel*>(predYuv->getCbAddr(absPartIdxC));
> > + primitives.luma_add_ps[partSizeC](tmpReconU,
> strideReconC, pred, curResiU, predYuv->m_csize, strideResiC);
> > + nonZeroDistU =
> m_rdCost.scaleChromaDistCb(primitives.sse_pp[partSizeC](fenc,
> fencYuv->m_csize, tmpReconU, strideReconC));
> > + nonZeroPsyEnergyU = m_rdCost.psyCost(partSizeC,
> fenc, fencYuv->m_csize, tmpReconU, strideReconC);
> > + }
> > + else
> > + {
> > + uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC),
> resiYuv->m_csize, curResiU, strideResiC);
> > + nonZeroDistU = m_rdCost.scaleChromaDistCb(dist);
> > + }
> >
> > if (cu->m_cuTransquantBypass[0])
> > {
> > @@ -3025,7 +3068,16 @@
> > if (!numSigU[tuIterator.section])
> > primitives.blockfill_s[partSizeC](curResiU,
> strideResiC, 0);
> >
> > - distV =
> m_rdCost.scaleChromaDistCr(primitives.ssd_s[partSizeC](resiYuv->getCrAddr(absPartIdxC),
> resiYuv->m_csize));
> > + if(m_rdCost.m_psyRd)
> > + {
> > + fenc =
> const_cast<pixel*>(fencYuv->getCrAddr(absPartIdxC));
> > + pred =
> const_cast<pixel*>(predYuv->getCrAddr(absPartIdxC));
> > + distV =
> m_rdCost.scaleChromaDistCr(primitives.sse_pp[partSizeC](fenc,
> fencYuv->m_csize, pred, predYuv->m_csize));
> > + psyEnergyV = m_rdCost.psyCost(partSizeC, fenc,
> fencYuv->m_csize, pred, predYuv->m_csize);
> > + }
> > + else
> > + distV =
> m_rdCost.scaleChromaDistCr(primitives.ssd_s[partSizeC](resiYuv->getCrAddr(absPartIdxC),
> resiYuv->m_csize));
> > +
> > if (outZeroDist)
> > *outZeroDist += distV;
> >
> > @@ -3033,11 +3085,24 @@
> > {
> >
> m_quant.invtransformNxN(cu->m_cuTransquantBypass[absPartIdxC], curResiV,
> strideResiC, coeffCurV + subTUOffset,
> > log2TrSizeC, TEXT_CHROMA_V,
> false, false, numSigV[tuIterator.section]);
> > - uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC),
> resiYuv->m_csize, curResiV, strideResiC);
> > - const uint32_t nonZeroDistV =
> m_rdCost.scaleChromaDistCr(dist);
> > + uint32_t nonZeroDistV;
> > uint32_t nonZeroPsyEnergyV = 0;
> > if (m_rdCost.m_psyRd)
> > - nonZeroPsyEnergyV = m_rdCost.psyCost(partSizeC,
> resiYuv->getCrAddr(absPartIdxC), resiYuv->m_csize, curResiV, strideResiC);
> > + {
> > + ALIGN_VAR_32(pixel, tmpReconV[MAX_CU_SIZE *
> MAX_CU_SIZE]);
> > + uint32_t strideReconC = MAX_CU_SIZE;
> > + fenc =
> const_cast<pixel*>(fencYuv->getCrAddr(absPartIdxC));
> > + pred =
> const_cast<pixel*>(predYuv->getCrAddr(absPartIdxC));
> > + //===== reconstruction =====
> > + primitives.luma_add_ps[partSizeC](tmpReconV,
> strideReconC, pred, curResiV, predYuv->m_csize, strideResiC);
> > + nonZeroDistV =
> m_rdCost.scaleChromaDistCr(primitives.sse_pp[partSizeC](fenc,
> fencYuv->m_csize, tmpReconV, strideReconC));
> > + nonZeroPsyEnergyV = m_rdCost.psyCost(partSizeC,
> fenc, fencYuv->m_csize, tmpReconV, strideReconC);
> > + }
> > + else
> > + {
> > + uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC),
> resiYuv->m_csize, curResiV, strideResiC);
> > + nonZeroDistV = m_rdCost.scaleChromaDistCr(dist);
> > + }
> >
> > if (cu->m_cuTransquantBypass[0])
> > {
> > @@ -3130,15 +3195,23 @@
> >
> >
> m_quant.invtransformNxN(cu->m_cuTransquantBypass[absPartIdx], tsResiY,
> trSize, tsCoeffY, log2TrSize, TEXT_LUMA, false, true, numSigTSkipY);
> >
> > - nonZeroDistY =
> primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx),
> resiYuv->m_size, tsResiY, trSize);
> > -
> > if (m_rdCost.m_psyRd)
> > {
> > - nonZeroPsyEnergyY = m_rdCost.psyCost(partSize,
> resiYuv->getLumaAddr(absPartIdx), resiYuv->m_size, tsResiY, trSize);
> > + ALIGN_VAR_32(pixel, tmpRecon[MAX_CU_SIZE *
> MAX_CU_SIZE]);
> > + uint32_t strideRecon = MAX_CU_SIZE;
> > + //===== reconstruction =====
> > + fenc =
> const_cast<pixel*>(fencYuv->getLumaAddr(absPartIdx));
> > + pred =
> const_cast<pixel*>(predYuv->getLumaAddr(absPartIdx));
> > + primitives.luma_add_ps[partSize](tmpRecon,
> strideRecon, pred, tsResiY, predYuv->m_size, trSize);
> > + nonZeroDistY = primitives.sse_pp[partSize](fenc,
> fencYuv->m_size, tmpRecon, strideRecon);
> > + nonZeroPsyEnergyY = m_rdCost.psyCost(partSize,
> fenc, fencYuv->m_size, tmpRecon, strideRecon);
> > singleCostY = m_rdCost.calcPsyRdCost(nonZeroDistY,
> skipSingleBitsY, nonZeroPsyEnergyY);
> > }
> > else
> > + {
> > + nonZeroDistY =
> primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx),
> resiYuv->m_size, tsResiY, trSize);
> > singleCostY = m_rdCost.calcRdCost(nonZeroDistY,
> skipSingleBitsY);
> > + }
> > }
> >
> > if (!numSigTSkipY || minCost[TEXT_LUMA][0] < singleCostY)
> > @@ -3208,15 +3281,24 @@
> >
> >
> m_quant.invtransformNxN(cu->m_cuTransquantBypass[absPartIdxC], tsResiU,
> trSizeC, tsCoeffU,
> > log2TrSizeC, TEXT_CHROMA_U,
> false, true, numSigTSkipU);
> > - uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC),
> resiYuv->m_csize, tsResiU, trSizeC);
> > - nonZeroDistU = m_rdCost.scaleChromaDistCb(dist);
> > if (m_rdCost.m_psyRd)
> > {
> > - nonZeroPsyEnergyU = m_rdCost.psyCost(partSizeC,
> resiYuv->getCbAddr(absPartIdxC), resiYuv->m_csize, tsResiU, trSizeC);
> > + ALIGN_VAR_32(pixel, tmpReconU[MAX_CU_SIZE *
> MAX_CU_SIZE]);
> > + uint32_t strideReconC = MAX_CU_SIZE;
> > + //===== reconstruction =====
> > + fenc =
> const_cast<pixel*>(fencYuv->getCbAddr(absPartIdxC));
> > + pred =
> const_cast<pixel*>(predYuv->getCbAddr(absPartIdxC));
> > + primitives.luma_add_ps[partSizeC](tmpReconU,
> strideReconC, pred, tsResiU, predYuv->m_csize, trSizeC);
> > + nonZeroDistU =
> m_rdCost.scaleChromaDistCb(primitives.sse_pp[partSizeC](fenc,
> fencYuv->m_csize, tmpReconU, strideReconC));
> > + nonZeroPsyEnergyU = m_rdCost.psyCost(partSizeC,
> fenc, fencYuv->m_csize, tmpReconU, strideReconC);
> > singleCostU =
> m_rdCost.calcPsyRdCost(nonZeroDistU,
> singleBitsComp[TEXT_CHROMA_U][tuIterator.section], nonZeroPsyEnergyU);
> > }
> > else
> > + {
> > + uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC),
> resiYuv->m_csize, tsResiU, trSizeC);
> > + nonZeroDistU = m_rdCost.scaleChromaDistCb(dist);
> > singleCostU = m_rdCost.calcRdCost(nonZeroDistU,
> singleBitsComp[TEXT_CHROMA_U][tuIterator.section]);
> > + }
> > }
> >
> > if (!numSigTSkipU ||
> minCost[TEXT_CHROMA_U][tuIterator.section] < singleCostU)
> > @@ -3239,15 +3321,24 @@
> >
> >
> m_quant.invtransformNxN(cu->m_cuTransquantBypass[absPartIdxC], tsResiV,
> trSizeC, tsCoeffV,
> > log2TrSizeC, TEXT_CHROMA_V,
> false, true, numSigTSkipV);
> > - uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC),
> resiYuv->m_csize, tsResiV, trSizeC);
> > - nonZeroDistV = m_rdCost.scaleChromaDistCr(dist);
> > if (m_rdCost.m_psyRd)
> > {
> > - nonZeroPsyEnergyV = m_rdCost.psyCost(partSizeC,
> resiYuv->getCrAddr(absPartIdxC), resiYuv->m_csize, tsResiV, trSizeC);
> > + ALIGN_VAR_32(pixel, tmpReconV[MAX_CU_SIZE *
> MAX_CU_SIZE]);
> > + uint32_t strideReconC = MAX_CU_SIZE;
> > + //===== reconstruction =====
> > + fenc =
> const_cast<pixel*>(fencYuv->getCrAddr(absPartIdxC));
> > + pred =
> const_cast<pixel*>(predYuv->getCrAddr(absPartIdxC));
> > + primitives.luma_add_ps[partSizeC](tmpReconV,
> strideReconC, pred, tsResiV, predYuv->m_csize, trSizeC);
> > + nonZeroDistV =
> m_rdCost.scaleChromaDistCr(primitives.sse_pp[partSizeC](fenc,
> fencYuv->m_csize, tmpReconV, strideReconC));
> > + nonZeroPsyEnergyV = m_rdCost.psyCost(partSizeC,
> fenc, fencYuv->m_csize, tmpReconV, strideReconC);
> > singleCostV =
> m_rdCost.calcPsyRdCost(nonZeroDistV,
> singleBitsComp[TEXT_CHROMA_V][tuIterator.section], nonZeroPsyEnergyV);
> > }
> > else
> > + {
> > + uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC),
> resiYuv->m_csize, tsResiV, trSizeC);
> > + nonZeroDistV = m_rdCost.scaleChromaDistCr(dist);
> > singleCostV = m_rdCost.calcRdCost(nonZeroDistV,
> singleBitsComp[TEXT_CHROMA_V][tuIterator.section]);
> > + }
> > }
> >
> > if (!numSigTSkipV ||
> minCost[TEXT_CHROMA_V][tuIterator.section] < singleCostV)
> > _______________________________________________
> > x265-devel mailing list
> > x265-devel at videolan.org
> > https://mailman.videolan.org/listinfo/x265-devel
>
> --
> Steve Borho
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20141020/a54df2db/attachment-0001.html>
More information about the x265-devel
mailing list