[x265] [PATCH] no-rdo: refactor enodeResandCalcRDInterCU function
Deepthi Devaki Akkoorath
deepthidevaki at multicorewareinc.com
Fri Nov 8 10:44:48 CET 2013
The bitrate/PSNR changes after this is patch is applied is because, this
patch uses full pseudo-encode to estimate the bits for both residual and
zero-residue mode.
Before BasketballPass_416x240 318.48 kb/s, Global PSNR: 35.468
After BasketballPass_416x240 316.62 kb/s, Global PSNR: 35.463
Before big_buck_bunny_360p24 50.14 kb/s, Global PSNR: 43.662
After big_buck_bunny_360p24 49.92 kb/s, Global PSNR: 43.657
Before FourPeople_1280x720_60 501.80 kb/s, Global PSNR: 39.633
After FourPeople_1280x720_60 500.74 kb/s, Global PSNR: 39.627
Before sintel_trailer_2k_720 89.05 kb/s, Global PSNR: 55.233
After sintel_trailer_2k_720 89.61 kb/s, Global PSNR: 55.189
Before Johnny_1280x720_60 289.92 kb/s, Global PSNR: 40.470
After Johnny_1280x720_60 289.00 kb/s, Global PSNR: 40.473
Before Kimono1_1920x1080_24 1783.15 kb/s, Global PSNR: 38.594
After Kimono1_1920x1080_24 1778.44 kb/s, Global PSNR: 38.592
Before BBDrive_1920x1080_50 3799.89 kb/s, Global PSNR: 37.109
After BBDrive_1920x1080_50 3802.98 kb/s, Global PSNR: 37.107
-
Deepthi
On Fri, Nov 8, 2013 at 2:50 PM, <deepthidevaki at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Deepthi Devaki <deepthidevaki at multicorewareinc.com>
> # Date 1383894227 -19800
> # Node ID a73bc98e632c668c9ebd5a1a9ed40557cb44d00c
> # Parent fef74c2e329dc24d9e93624de217babc2d6fa77f
> no-rdo: refactor enodeResandCalcRDInterCU function
>
> Divide estimateBits and modeDecision inside the function. EstimateBits
> uses a pseudo encode. Bitstream changes with this patch for --rd 1.
>
> diff -r fef74c2e329d -r a73bc98e632c source/Lib/TLibEncoder/TEncSearch.cpp
> --- a/source/Lib/TLibEncoder/TEncSearch.cpp Fri Nov 08 02:57:47 2013
> -0600
> +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Fri Nov 08 12:33:47 2013
> +0530
> @@ -2941,6 +2941,144 @@
> cu->setQPSubParts(qpBest, 0, cu->getDepth(0));
> }
>
> +void TEncSearch::estimateRDInterCU(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv, TShortYUV* outResiYuv,
> + TShortYUV* outBestResiYuv, TComYuv*
> outReconYuv, bool /*bSkipRes*/, bool curUseRDOQ)
> +{
> + uint32_t width = cu->getWidth(0);
> + uint32_t height = cu->getHeight(0);
> +
> + outResiYuv->subtract(fencYuv, predYuv, 0, width);
> +
> + uint32_t zerobits = estimateZerobits(cu);
> + uint32_t zerodistortion = estimateZeroDist(cu, fencYuv, predYuv);
> + uint64_t zerocost = m_rdCost->calcRdCost(zerodistortion, zerobits);
> +
> + uint32_t distortion = 0;
> + uint32_t bits = 0;
> + estimateBitsDist(cu, outResiYuv, bits, distortion, curUseRDOQ);
> + uint64_t cost = m_rdCost->calcRdCost(distortion, bits);
> +
> + if (cu->isLosslessCoded(0))
> + {
> + zerocost = cost + 1;
> + }
> +
> + if (zerocost < cost)
> + {
> + const uint32_t qpartnum = cu->getPic()->getNumPartInCU() >>
> (cu->getDepth(0) << 1);
> + ::memset(cu->getTransformIdx(), 0, qpartnum * sizeof(UChar));
> + ::memset(cu->getCbf(TEXT_LUMA), 0, qpartnum * sizeof(UChar));
> + ::memset(cu->getCbf(TEXT_CHROMA_U), 0, qpartnum * sizeof(UChar));
> + ::memset(cu->getCbf(TEXT_CHROMA_V), 0, qpartnum * sizeof(UChar));
> + ::memset(cu->getCoeffY(), 0, width * height * sizeof(TCoeff));
> + ::memset(cu->getCoeffCb(), 0, width * height * sizeof(TCoeff) >>
> 2);
> + ::memset(cu->getCoeffCr(), 0, width * height * sizeof(TCoeff) >>
> 2);
> + cu->setTransformSkipSubParts(0, 0, 0, 0, cu->getDepth(0));
> + if (cu->getMergeFlag(0) && cu->getPartitionSize(0) == SIZE_2Nx2N)
> + {
> + cu->setSkipFlagSubParts(true, 0, cu->getDepth(0));
> + }
> + bits = zerobits;
> + outBestResiYuv->clear();
> + generateRecon(cu, predYuv, outBestResiYuv, outReconYuv, true);
> + }
> + else
> + {
> + xSetResidualQTData(cu, 0, 0, outBestResiYuv, cu->getDepth(0),
> true);
> + generateRecon(cu, predYuv, outBestResiYuv, outReconYuv, false);
> + }
> +
> + int part = partitionFromSizes(width, height);
> + distortion = primitives.sse_pp[part](fencYuv->getLumaAddr(),
> fencYuv->getStride(), outReconYuv->getLumaAddr(), outReconYuv->getStride());
> + part = partitionFromSizes(width >> 1, height >> 1);
> + distortion +=
> m_rdCost->scaleChromaDistCb(primitives.sse_pp[part](fencYuv->getCbAddr(),
> fencYuv->getCStride(), outReconYuv->getCbAddr(),
> outReconYuv->getCStride()));
> + distortion +=
> m_rdCost->scaleChromaDistCr(primitives.sse_pp[part](fencYuv->getCrAddr(),
> fencYuv->getCStride(), outReconYuv->getCrAddr(),
> outReconYuv->getCStride()));
> +
> + cu->m_totalBits = bits;
> + cu->m_totalDistortion = distortion;
> + cu->m_totalCost = m_rdCost->calcRdCost(distortion, bits);
> +}
> +
> +uint32_t TEncSearch::estimateZerobits(TComDataCU* cu)
> +{
> + if (cu->isIntra(0))
> + {
> + return 0;
> + }
> +
> + uint32_t zeroResiBits = 0;
> +
> + uint32_t width = cu->getWidth(0);
> + uint32_t height = cu->getHeight(0);
> +
> + const uint32_t qpartnum = cu->getPic()->getNumPartInCU() >>
> (cu->getDepth(0) << 1);
> + ::memset(cu->getTransformIdx(), 0, qpartnum * sizeof(UChar));
> + ::memset(cu->getCbf(TEXT_LUMA), 0, qpartnum * sizeof(UChar));
> + ::memset(cu->getCbf(TEXT_CHROMA_U), 0, qpartnum * sizeof(UChar));
> + ::memset(cu->getCbf(TEXT_CHROMA_V), 0, qpartnum * sizeof(UChar));
> + ::memset(cu->getCoeffY(), 0, width * height * sizeof(TCoeff));
> + ::memset(cu->getCoeffCb(), 0, width * height * sizeof(TCoeff) >> 2);
> + ::memset(cu->getCoeffCr(), 0, width * height * sizeof(TCoeff) >> 2);
> + cu->setTransformSkipSubParts(0, 0, 0, 0, cu->getDepth(0));
> +
> +
> m_rdGoOnSbacCoder->load(m_rdSbacCoders[cu->getDepth(0)][CI_CURR_BEST]);
> + zeroResiBits = xSymbolBitsInter(cu);
> + // Reset skipflags to false which would have set to true by
> xSymbolBitsInter if merge-skip
> + cu->setSkipFlagSubParts(false, 0, cu->getDepth(0));
> + return zeroResiBits;
> +}
> +
> +uint32_t TEncSearch::estimateZeroDist(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv)
> +{
> + uint32_t distortion = 0;
> +
> + uint32_t width = cu->getWidth(0);
> + uint32_t height = cu->getHeight(0);
> +
> + int part = partitionFromSizes(width, height);
> +
> + distortion = primitives.sse_pp[part](fencYuv->getLumaAddr(),
> fencYuv->getStride(), predYuv->getLumaAddr(), predYuv->getStride());
> + part = partitionFromSizes(width >> 1, height >> 1);
> + distortion +=
> m_rdCost->scaleChromaDistCb(primitives.sse_pp[part](fencYuv->getCbAddr(),
> fencYuv->getCStride(), predYuv->getCbAddr(), predYuv->getCStride()));
> + distortion +=
> m_rdCost->scaleChromaDistCr(primitives.sse_pp[part](fencYuv->getCrAddr(),
> fencYuv->getCStride(), predYuv->getCrAddr(), predYuv->getCStride()));
> + return distortion;
> +}
> +
> +void TEncSearch::generateRecon(TComDataCU* cu, TComYuv* predYuv,
> TShortYUV* resiYuv, TComYuv* reconYuv, bool skipRes)
> +{
> + if (skipRes)
> + {
> + predYuv->copyToPartYuv(reconYuv, 0);
> + return;
> + }
> + else
> + {
> + uint32_t width = cu->getWidth(0);
> + xSetResidualQTData(cu, 0, 0, resiYuv, cu->getDepth(0), true);
> + reconYuv->addClip(predYuv, resiYuv, 0, width);
> + }
> +}
> +
> +void TEncSearch::estimateBitsDist(TComDataCU* cu, TShortYUV* resiYuv,
> uint32_t& bits, uint32_t& distortion, bool curUseRDOQ)
> +{
> + if (cu->isIntra(0))
> + {
> + return;
> + }
> +
> + bits = 0;
> + distortion = 0;
> + uint64_t cost = 0;
> + uint32_t zeroDistortion = 0;
> +
> m_rdGoOnSbacCoder->load(m_rdSbacCoders[cu->getDepth(0)][CI_CURR_BEST]);
> + xEstimateResidualQT(cu, 0, 0, resiYuv, cu->getDepth(0), cost, bits,
> distortion, &zeroDistortion, curUseRDOQ);
> +
> + xSetResidualQTData(cu, 0, 0, NULL, cu->getDepth(0), false);
> +
> m_rdGoOnSbacCoder->load(m_rdSbacCoders[cu->getDepth(0)][CI_CURR_BEST]);
> + bits = xSymbolBitsInter(cu);
> +
> m_rdGoOnSbacCoder->store(m_rdSbacCoders[cu->getDepth(0)][CI_TEMP_BEST]);
> +}
> +
> #if _MSC_VER
> #pragma warning(disable: 4701) // potentially uninitialized local variable
> #endif
> diff -r fef74c2e329d -r a73bc98e632c source/Lib/TLibEncoder/TEncSearch.h
> --- a/source/Lib/TLibEncoder/TEncSearch.h Fri Nov 08 02:57:47 2013
> -0600
> +++ b/source/Lib/TLibEncoder/TEncSearch.h Fri Nov 08 12:33:47 2013
> +0530
> @@ -153,6 +153,17 @@
> void encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv, TShortYUV* resiYuv, TShortYUV* bestResiYuv,
> TComYuv* reconYuv, bool bSkipRes, bool
> curUseRDOQ = true);
>
> + void estimateRDInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv*
> predYuv, TShortYUV* resiYuv, TShortYUV* bestResiYuv,
> + TComYuv* reconYuv, bool bSkipRes, bool
> curUseRDOQ = true);
> +
> + uint32_t estimateZerobits(TComDataCU* cu);
> +
> + uint32_t estimateZeroDist(TComDataCU* cu, TComYuv* fencYuv, TComYuv*
> predYuv);
> +
> + void generateRecon(TComDataCU* cu, TComYuv* predYuv, TShortYUV*
> resiYuv, TComYuv* reconYuv, bool skipRes);
> +
> + void estimateBitsDist(TComDataCU* cu, TShortYUV* resiYuv, uint32_t&
> bits, uint32_t& distortion, bool curUseRDOQ);
> +
> /// set ME search range
> void setAdaptiveSearchRange(int dir, int refIdx, int merange) {
> m_adaptiveRange[dir][refIdx] = merange; }
>
> diff -r fef74c2e329d -r a73bc98e632c source/encoder/compress.cpp
> --- a/source/encoder/compress.cpp Fri Nov 08 02:57:47 2013 -0600
> +++ b/source/encoder/compress.cpp Fri Nov 08 12:33:47 2013 +0530
> @@ -319,7 +319,7 @@
> m_tmpRecoYuv[depth] = yuv;
>
> //Encode with residue
> - m_search->encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth],
> bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth],
> m_tmpRecoYuv[depth], false);
> + m_search->estimateRDInterCU(outTempCU, m_origYuv[depth], bestPredYuv,
> m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], false);
>
> if (outTempCU->m_totalCost < outBestCU->m_totalCost) //Choose best
> from no-residue mode and residue mode
> {
> @@ -476,8 +476,9 @@
> m_search->motionCompensation(outBestCU,
> m_bestPredYuv[depth], REF_PIC_LIST_X, partIdx, false, true);
> }
>
> - m_search->encodeResAndCalcRdInterCU(outBestCU,
> m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth],
> - m_bestResiYuv[depth],
> m_bestRecoYuv[depth], false);
> + m_search->estimateRDInterCU(outBestCU, m_origYuv[depth],
> m_bestPredYuv[depth], m_tmpResiYuv[depth],
> + m_bestResiYuv[depth],
> m_bestRecoYuv[depth], false);
> +
> #if CU_STAT_LOGFILE
> fprintf(fp1, "\n N : %d , Best Inter : %d , ",
> outBestCU->getWidth(0) / 2, outBestCU->m_totalCost);
> #endif
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131108/53dcbe24/attachment-0001.html>
More information about the x265-devel
mailing list