<div dir="ltr"><br><div><div>The bitrate/PSNR changes after this is patch is applied is because, this patch uses full pseudo-encode to estimate the bits for both residual and zero-residue mode.</div><div><br></div><div><div>
Before<span class="" style="white-space:pre"> </span> BasketballPass_416x240<span class="" style="white-space:pre"> </span> 318.48 kb/s, Global PSNR: 35.468</div><div>After<span class="" style="white-space:pre"> </span> BasketballPass_416x240<span class="" style="white-space:pre"> </span> 316.62 kb/s, Global PSNR: 35.463</div>
<div><br></div><div>Before<span class="" style="white-space:pre"> </span> big_buck_bunny_360p24<span class="" style="white-space:pre"> </span> 50.14 kb/s, Global PSNR: 43.662</div><div>After<span class="" style="white-space:pre"> </span> big_buck_bunny_360p24<span class="" style="white-space:pre"> </span> 49.92 kb/s, Global PSNR: 43.657</div>
<div><br></div><div>Before<span class="" style="white-space:pre"> </span> FourPeople_1280x720_60<span class="" style="white-space:pre"> </span> 501.80 kb/s, Global PSNR: 39.633</div><div>After<span class="" style="white-space:pre"> </span> FourPeople_1280x720_60<span class="" style="white-space:pre"> </span> 500.74 kb/s, Global PSNR: 39.627</div>
<div><br></div><div>Before<span class="" style="white-space:pre"> </span> sintel_trailer_2k_720<span class="" style="white-space:pre"> </span> 89.05 kb/s, Global PSNR: 55.233</div><div>After<span class="" style="white-space:pre"> </span> sintel_trailer_2k_720<span class="" style="white-space:pre"> </span> 89.61 kb/s, Global PSNR: 55.189</div>
<div><br></div><div>Before<span class="" style="white-space:pre"> </span> Johnny_1280x720_60<span class="" style="white-space:pre"> </span> 289.92 kb/s, Global PSNR: 40.470</div><div>After<span class="" style="white-space:pre"> </span> Johnny_1280x720_60<span class="" style="white-space:pre"> </span> 289.00 kb/s, Global PSNR: 40.473</div>
<div><br></div><div>Before<span class="" style="white-space:pre"> </span> Kimono1_1920x1080_24<span class="" style="white-space:pre"> </span> 1783.15 kb/s, Global PSNR: 38.594</div><div>After<span class="" style="white-space:pre"> </span> Kimono1_1920x1080_24<span class="" style="white-space:pre"> </span> 1778.44 kb/s, Global PSNR: 38.592</div>
<div><br></div><div>Before<span class="" style="white-space:pre"> </span> BBDrive_1920x1080_50<span class="" style="white-space:pre"> </span> 3799.89 kb/s, Global PSNR: 37.109</div><div>After<span class="" style="white-space:pre"> </span> BBDrive_1920x1080_50<span class="" style="white-space:pre"> </span> 3802.98 kb/s, Global PSNR: 37.107</div>
</div></div><div><br></div><div><br></div><div>-</div><div>Deepthi</div><div><br></div></div><div class="gmail_extra"><br><br><div class="gmail_quote">On Fri, Nov 8, 2013 at 2:50 PM, <span dir="ltr"><<a href="mailto:deepthidevaki@multicorewareinc.com" target="_blank">deepthidevaki@multicorewareinc.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Deepthi Devaki <<a href="mailto:deepthidevaki@multicorewareinc.com">deepthidevaki@multicorewareinc.com</a>><br>
# Date 1383894227 -19800<br>
# Node ID a73bc98e632c668c9ebd5a1a9ed40557cb44d00c<br>
# Parent fef74c2e329dc24d9e93624de217babc2d6fa77f<br>
no-rdo: refactor enodeResandCalcRDInterCU function<br>
<br>
Divide estimateBits and modeDecision inside the function. EstimateBits uses a pseudo encode. Bitstream changes with this patch for --rd 1.<br>
<br>
diff -r fef74c2e329d -r a73bc98e632c source/Lib/TLibEncoder/TEncSearch.cpp<br>
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Fri Nov 08 02:57:47 2013 -0600<br>
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Fri Nov 08 12:33:47 2013 +0530<br>
@@ -2941,6 +2941,144 @@<br>
cu->setQPSubParts(qpBest, 0, cu->getDepth(0));<br>
}<br>
<br>
+void TEncSearch::estimateRDInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, TShortYUV* outResiYuv,<br>
+ TShortYUV* outBestResiYuv, TComYuv* outReconYuv, bool /*bSkipRes*/, bool curUseRDOQ)<br>
+{<br>
+ uint32_t width = cu->getWidth(0);<br>
+ uint32_t height = cu->getHeight(0);<br>
+<br>
+ outResiYuv->subtract(fencYuv, predYuv, 0, width);<br>
+<br>
+ uint32_t zerobits = estimateZerobits(cu);<br>
+ uint32_t zerodistortion = estimateZeroDist(cu, fencYuv, predYuv);<br>
+ uint64_t zerocost = m_rdCost->calcRdCost(zerodistortion, zerobits);<br>
+<br>
+ uint32_t distortion = 0;<br>
+ uint32_t bits = 0;<br>
+ estimateBitsDist(cu, outResiYuv, bits, distortion, curUseRDOQ);<br>
+ uint64_t cost = m_rdCost->calcRdCost(distortion, bits);<br>
+<br>
+ if (cu->isLosslessCoded(0))<br>
+ {<br>
+ zerocost = cost + 1;<br>
+ }<br>
+<br>
+ if (zerocost < cost)<br>
+ {<br>
+ const uint32_t qpartnum = cu->getPic()->getNumPartInCU() >> (cu->getDepth(0) << 1);<br>
+ ::memset(cu->getTransformIdx(), 0, qpartnum * sizeof(UChar));<br>
+ ::memset(cu->getCbf(TEXT_LUMA), 0, qpartnum * sizeof(UChar));<br>
+ ::memset(cu->getCbf(TEXT_CHROMA_U), 0, qpartnum * sizeof(UChar));<br>
+ ::memset(cu->getCbf(TEXT_CHROMA_V), 0, qpartnum * sizeof(UChar));<br>
+ ::memset(cu->getCoeffY(), 0, width * height * sizeof(TCoeff));<br>
+ ::memset(cu->getCoeffCb(), 0, width * height * sizeof(TCoeff) >> 2);<br>
+ ::memset(cu->getCoeffCr(), 0, width * height * sizeof(TCoeff) >> 2);<br>
+ cu->setTransformSkipSubParts(0, 0, 0, 0, cu->getDepth(0));<br>
+ if (cu->getMergeFlag(0) && cu->getPartitionSize(0) == SIZE_2Nx2N)<br>
+ {<br>
+ cu->setSkipFlagSubParts(true, 0, cu->getDepth(0));<br>
+ }<br>
+ bits = zerobits;<br>
+ outBestResiYuv->clear();<br>
+ generateRecon(cu, predYuv, outBestResiYuv, outReconYuv, true);<br>
+ }<br>
+ else<br>
+ {<br>
+ xSetResidualQTData(cu, 0, 0, outBestResiYuv, cu->getDepth(0), true);<br>
+ generateRecon(cu, predYuv, outBestResiYuv, outReconYuv, false);<br>
+ }<br>
+<br>
+ int part = partitionFromSizes(width, height);<br>
+ distortion = primitives.sse_pp[part](fencYuv->getLumaAddr(), fencYuv->getStride(), outReconYuv->getLumaAddr(), outReconYuv->getStride());<br>
+ part = partitionFromSizes(width >> 1, height >> 1);<br>
+ distortion += m_rdCost->scaleChromaDistCb(primitives.sse_pp[part](fencYuv->getCbAddr(), fencYuv->getCStride(), outReconYuv->getCbAddr(), outReconYuv->getCStride()));<br>
+ distortion += m_rdCost->scaleChromaDistCr(primitives.sse_pp[part](fencYuv->getCrAddr(), fencYuv->getCStride(), outReconYuv->getCrAddr(), outReconYuv->getCStride()));<br>
+<br>
+ cu->m_totalBits = bits;<br>
+ cu->m_totalDistortion = distortion;<br>
+ cu->m_totalCost = m_rdCost->calcRdCost(distortion, bits);<br>
+}<br>
+<br>
+uint32_t TEncSearch::estimateZerobits(TComDataCU* cu)<br>
+{<br>
+ if (cu->isIntra(0))<br>
+ {<br>
+ return 0;<br>
+ }<br>
+<br>
+ uint32_t zeroResiBits = 0;<br>
+<br>
+ uint32_t width = cu->getWidth(0);<br>
+ uint32_t height = cu->getHeight(0);<br>
+<br>
+ const uint32_t qpartnum = cu->getPic()->getNumPartInCU() >> (cu->getDepth(0) << 1);<br>
+ ::memset(cu->getTransformIdx(), 0, qpartnum * sizeof(UChar));<br>
+ ::memset(cu->getCbf(TEXT_LUMA), 0, qpartnum * sizeof(UChar));<br>
+ ::memset(cu->getCbf(TEXT_CHROMA_U), 0, qpartnum * sizeof(UChar));<br>
+ ::memset(cu->getCbf(TEXT_CHROMA_V), 0, qpartnum * sizeof(UChar));<br>
+ ::memset(cu->getCoeffY(), 0, width * height * sizeof(TCoeff));<br>
+ ::memset(cu->getCoeffCb(), 0, width * height * sizeof(TCoeff) >> 2);<br>
+ ::memset(cu->getCoeffCr(), 0, width * height * sizeof(TCoeff) >> 2);<br>
+ cu->setTransformSkipSubParts(0, 0, 0, 0, cu->getDepth(0));<br>
+<br>
+ m_rdGoOnSbacCoder->load(m_rdSbacCoders[cu->getDepth(0)][CI_CURR_BEST]);<br>
+ zeroResiBits = xSymbolBitsInter(cu);<br>
+ // Reset skipflags to false which would have set to true by xSymbolBitsInter if merge-skip<br>
+ cu->setSkipFlagSubParts(false, 0, cu->getDepth(0));<br>
+ return zeroResiBits;<br>
+}<br>
+<br>
+uint32_t TEncSearch::estimateZeroDist(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv)<br>
+{<br>
+ uint32_t distortion = 0;<br>
+<br>
+ uint32_t width = cu->getWidth(0);<br>
+ uint32_t height = cu->getHeight(0);<br>
+<br>
+ int part = partitionFromSizes(width, height);<br>
+<br>
+ distortion = primitives.sse_pp[part](fencYuv->getLumaAddr(), fencYuv->getStride(), predYuv->getLumaAddr(), predYuv->getStride());<br>
+ part = partitionFromSizes(width >> 1, height >> 1);<br>
+ distortion += m_rdCost->scaleChromaDistCb(primitives.sse_pp[part](fencYuv->getCbAddr(), fencYuv->getCStride(), predYuv->getCbAddr(), predYuv->getCStride()));<br>
+ distortion += m_rdCost->scaleChromaDistCr(primitives.sse_pp[part](fencYuv->getCrAddr(), fencYuv->getCStride(), predYuv->getCrAddr(), predYuv->getCStride()));<br>
+ return distortion;<br>
+}<br>
+<br>
+void TEncSearch::generateRecon(TComDataCU* cu, TComYuv* predYuv, TShortYUV* resiYuv, TComYuv* reconYuv, bool skipRes)<br>
+{<br>
+ if (skipRes)<br>
+ {<br>
+ predYuv->copyToPartYuv(reconYuv, 0);<br>
+ return;<br>
+ }<br>
+ else<br>
+ {<br>
+ uint32_t width = cu->getWidth(0);<br>
+ xSetResidualQTData(cu, 0, 0, resiYuv, cu->getDepth(0), true);<br>
+ reconYuv->addClip(predYuv, resiYuv, 0, width);<br>
+ }<br>
+}<br>
+<br>
+void TEncSearch::estimateBitsDist(TComDataCU* cu, TShortYUV* resiYuv, uint32_t& bits, uint32_t& distortion, bool curUseRDOQ)<br>
+{<br>
+ if (cu->isIntra(0))<br>
+ {<br>
+ return;<br>
+ }<br>
+<br>
+ bits = 0;<br>
+ distortion = 0;<br>
+ uint64_t cost = 0;<br>
+ uint32_t zeroDistortion = 0;<br>
+ m_rdGoOnSbacCoder->load(m_rdSbacCoders[cu->getDepth(0)][CI_CURR_BEST]);<br>
+ xEstimateResidualQT(cu, 0, 0, resiYuv, cu->getDepth(0), cost, bits, distortion, &zeroDistortion, curUseRDOQ);<br>
+<br>
+ xSetResidualQTData(cu, 0, 0, NULL, cu->getDepth(0), false);<br>
+ m_rdGoOnSbacCoder->load(m_rdSbacCoders[cu->getDepth(0)][CI_CURR_BEST]);<br>
+ bits = xSymbolBitsInter(cu);<br>
+ m_rdGoOnSbacCoder->store(m_rdSbacCoders[cu->getDepth(0)][CI_TEMP_BEST]);<br>
+}<br>
+<br>
#if _MSC_VER<br>
#pragma warning(disable: 4701) // potentially uninitialized local variable<br>
#endif<br>
diff -r fef74c2e329d -r a73bc98e632c source/Lib/TLibEncoder/TEncSearch.h<br>
--- a/source/Lib/TLibEncoder/TEncSearch.h Fri Nov 08 02:57:47 2013 -0600<br>
+++ b/source/Lib/TLibEncoder/TEncSearch.h Fri Nov 08 12:33:47 2013 +0530<br>
@@ -153,6 +153,17 @@<br>
void encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, TShortYUV* resiYuv, TShortYUV* bestResiYuv,<br>
TComYuv* reconYuv, bool bSkipRes, bool curUseRDOQ = true);<br>
<br>
+ void estimateRDInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, TShortYUV* resiYuv, TShortYUV* bestResiYuv,<br>
+ TComYuv* reconYuv, bool bSkipRes, bool curUseRDOQ = true);<br>
+<br>
+ uint32_t estimateZerobits(TComDataCU* cu);<br>
+<br>
+ uint32_t estimateZeroDist(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv);<br>
+<br>
+ void generateRecon(TComDataCU* cu, TComYuv* predYuv, TShortYUV* resiYuv, TComYuv* reconYuv, bool skipRes);<br>
+<br>
+ void estimateBitsDist(TComDataCU* cu, TShortYUV* resiYuv, uint32_t& bits, uint32_t& distortion, bool curUseRDOQ);<br>
+<br>
/// set ME search range<br>
void setAdaptiveSearchRange(int dir, int refIdx, int merange) { m_adaptiveRange[dir][refIdx] = merange; }<br>
<br>
diff -r fef74c2e329d -r a73bc98e632c source/encoder/compress.cpp<br>
--- a/source/encoder/compress.cpp Fri Nov 08 02:57:47 2013 -0600<br>
+++ b/source/encoder/compress.cpp Fri Nov 08 12:33:47 2013 +0530<br>
@@ -319,7 +319,7 @@<br>
m_tmpRecoYuv[depth] = yuv;<br>
<br>
//Encode with residue<br>
- m_search->encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth], bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], false);<br>
+ m_search->estimateRDInterCU(outTempCU, m_origYuv[depth], bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], false);<br>
<br>
if (outTempCU->m_totalCost < outBestCU->m_totalCost) //Choose best from no-residue mode and residue mode<br>
{<br>
@@ -476,8 +476,9 @@<br>
m_search->motionCompensation(outBestCU, m_bestPredYuv[depth], REF_PIC_LIST_X, partIdx, false, true);<br>
}<br>
<br>
- m_search->encodeResAndCalcRdInterCU(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth],<br>
- m_bestResiYuv[depth], m_bestRecoYuv[depth], false);<br>
+ m_search->estimateRDInterCU(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth],<br>
+ m_bestResiYuv[depth], m_bestRecoYuv[depth], false);<br>
+<br>
#if CU_STAT_LOGFILE<br>
fprintf(fp1, "\n N : %d , Best Inter : %d , ", outBestCU->getWidth(0) / 2, outBestCU->m_totalCost);<br>
#endif<br>
</blockquote></div><br></div>