<div dir="ltr"><br><div><div>The bitrate/PSNR changes after this is patch is applied is because, this patch uses full pseudo-encode to estimate the bits for both residual and zero-residue mode.</div><div><br></div><div><div>
Before<span class="" style="white-space:pre"> </span> BasketballPass_416x240<span class="" style="white-space:pre">   </span>  318.48 kb/s, Global PSNR: 35.468</div><div>After<span class="" style="white-space:pre">     </span>        BasketballPass_416x240<span class="" style="white-space:pre">   </span>  316.62 kb/s, Global PSNR: 35.463</div>
<div><br></div><div>Before<span class="" style="white-space:pre">     </span> big_buck_bunny_360p24<span class="" style="white-space:pre">    </span>  50.14 kb/s, Global PSNR: 43.662</div><div>After<span class="" style="white-space:pre">     </span>         big_buck_bunny_360p24<span class="" style="white-space:pre">    </span>  49.92 kb/s, Global PSNR: 43.657</div>
<div><br></div><div>Before<span class="" style="white-space:pre">     </span> FourPeople_1280x720_60<span class="" style="white-space:pre">   </span>  501.80 kb/s, Global PSNR: 39.633</div><div>After<span class="" style="white-space:pre">      </span>       FourPeople_1280x720_60<span class="" style="white-space:pre">   </span>  500.74 kb/s, Global PSNR: 39.627</div>
<div><br></div><div>Before<span class="" style="white-space:pre">     </span> sintel_trailer_2k_720<span class="" style="white-space:pre">    </span>  89.05 kb/s, Global PSNR: 55.233</div><div>After<span class="" style="white-space:pre">     </span>         sintel_trailer_2k_720<span class="" style="white-space:pre">    </span>  89.61 kb/s, Global PSNR: 55.189</div>
<div><br></div><div>Before<span class="" style="white-space:pre">     </span> Johnny_1280x720_60<span class="" style="white-space:pre">       </span>  289.92 kb/s, Global PSNR: 40.470</div><div>After<span class="" style="white-space:pre">    </span>         Johnny_1280x720_60<span class="" style="white-space:pre">       </span>  289.00 kb/s, Global PSNR: 40.473</div>
<div><br></div><div>Before<span class="" style="white-space:pre">     </span> Kimono1_1920x1080_24<span class="" style="white-space:pre">     </span>  1783.15 kb/s, Global PSNR: 38.594</div><div>After<span class="" style="white-space:pre">   </span>         Kimono1_1920x1080_24<span class="" style="white-space:pre">     </span>  1778.44 kb/s, Global PSNR: 38.592</div>
<div><br></div><div>Before<span class="" style="white-space:pre">     </span> BBDrive_1920x1080_50<span class="" style="white-space:pre">     </span>  3799.89 kb/s, Global PSNR: 37.109</div><div>After<span class="" style="white-space:pre">   </span>         BBDrive_1920x1080_50<span class="" style="white-space:pre">     </span>  3802.98 kb/s, Global PSNR: 37.107</div>
</div></div><div><br></div><div><br></div><div>-</div><div>Deepthi</div><div><br></div></div><div class="gmail_extra"><br><br><div class="gmail_quote">On Fri, Nov 8, 2013 at 2:50 PM,  <span dir="ltr"><<a href="mailto:deepthidevaki@multicorewareinc.com" target="_blank">deepthidevaki@multicorewareinc.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Deepthi Devaki <<a href="mailto:deepthidevaki@multicorewareinc.com">deepthidevaki@multicorewareinc.com</a>><br>
# Date 1383894227 -19800<br>
# Node ID a73bc98e632c668c9ebd5a1a9ed40557cb44d00c<br>
# Parent  fef74c2e329dc24d9e93624de217babc2d6fa77f<br>
no-rdo: refactor enodeResandCalcRDInterCU function<br>
<br>
Divide estimateBits and modeDecision inside the function. EstimateBits uses a pseudo encode. Bitstream changes with this patch for --rd 1.<br>
<br>
diff -r fef74c2e329d -r a73bc98e632c source/Lib/TLibEncoder/TEncSearch.cpp<br>
--- a/source/Lib/TLibEncoder/TEncSearch.cpp     Fri Nov 08 02:57:47 2013 -0600<br>
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp     Fri Nov 08 12:33:47 2013 +0530<br>
@@ -2941,6 +2941,144 @@<br>
     cu->setQPSubParts(qpBest, 0, cu->getDepth(0));<br>
 }<br>
<br>
+void TEncSearch::estimateRDInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, TShortYUV* outResiYuv,<br>
+                                   TShortYUV* outBestResiYuv, TComYuv* outReconYuv, bool /*bSkipRes*/, bool curUseRDOQ)<br>
+{<br>
+    uint32_t width  = cu->getWidth(0);<br>
+    uint32_t height = cu->getHeight(0);<br>
+<br>
+    outResiYuv->subtract(fencYuv, predYuv, 0, width);<br>
+<br>
+    uint32_t zerobits = estimateZerobits(cu);<br>
+    uint32_t zerodistortion = estimateZeroDist(cu, fencYuv, predYuv);<br>
+    uint64_t zerocost = m_rdCost->calcRdCost(zerodistortion, zerobits);<br>
+<br>
+    uint32_t distortion = 0;<br>
+    uint32_t bits = 0;<br>
+    estimateBitsDist(cu, outResiYuv, bits, distortion, curUseRDOQ);<br>
+    uint64_t cost = m_rdCost->calcRdCost(distortion, bits);<br>
+<br>
+    if (cu->isLosslessCoded(0))<br>
+    {<br>
+        zerocost = cost + 1;<br>
+    }<br>
+<br>
+    if (zerocost < cost)<br>
+    {<br>
+        const uint32_t qpartnum = cu->getPic()->getNumPartInCU() >> (cu->getDepth(0) << 1);<br>
+        ::memset(cu->getTransformIdx(), 0, qpartnum * sizeof(UChar));<br>
+        ::memset(cu->getCbf(TEXT_LUMA), 0, qpartnum * sizeof(UChar));<br>
+        ::memset(cu->getCbf(TEXT_CHROMA_U), 0, qpartnum * sizeof(UChar));<br>
+        ::memset(cu->getCbf(TEXT_CHROMA_V), 0, qpartnum * sizeof(UChar));<br>
+        ::memset(cu->getCoeffY(), 0, width * height * sizeof(TCoeff));<br>
+        ::memset(cu->getCoeffCb(), 0, width * height * sizeof(TCoeff) >> 2);<br>
+        ::memset(cu->getCoeffCr(), 0, width * height * sizeof(TCoeff) >> 2);<br>
+        cu->setTransformSkipSubParts(0, 0, 0, 0, cu->getDepth(0));<br>
+        if (cu->getMergeFlag(0) && cu->getPartitionSize(0) == SIZE_2Nx2N)<br>
+        {<br>
+            cu->setSkipFlagSubParts(true, 0, cu->getDepth(0));<br>
+        }<br>
+        bits = zerobits;<br>
+        outBestResiYuv->clear();<br>
+        generateRecon(cu, predYuv, outBestResiYuv, outReconYuv, true);<br>
+    }<br>
+    else<br>
+    {<br>
+        xSetResidualQTData(cu, 0, 0, outBestResiYuv, cu->getDepth(0), true);<br>
+        generateRecon(cu, predYuv, outBestResiYuv, outReconYuv, false);<br>
+    }<br>
+<br>
+    int part = partitionFromSizes(width, height);<br>
+    distortion = primitives.sse_pp[part](fencYuv->getLumaAddr(), fencYuv->getStride(), outReconYuv->getLumaAddr(), outReconYuv->getStride());<br>
+    part = partitionFromSizes(width >> 1, height >> 1);<br>
+    distortion += m_rdCost->scaleChromaDistCb(primitives.sse_pp[part](fencYuv->getCbAddr(), fencYuv->getCStride(), outReconYuv->getCbAddr(), outReconYuv->getCStride()));<br>
+    distortion += m_rdCost->scaleChromaDistCr(primitives.sse_pp[part](fencYuv->getCrAddr(), fencYuv->getCStride(), outReconYuv->getCrAddr(), outReconYuv->getCStride()));<br>
+<br>
+    cu->m_totalBits       = bits;<br>
+    cu->m_totalDistortion = distortion;<br>
+    cu->m_totalCost       = m_rdCost->calcRdCost(distortion, bits);<br>
+}<br>
+<br>
+uint32_t TEncSearch::estimateZerobits(TComDataCU* cu)<br>
+{<br>
+    if (cu->isIntra(0))<br>
+    {<br>
+        return 0;<br>
+    }<br>
+<br>
+    uint32_t zeroResiBits = 0;<br>
+<br>
+    uint32_t width  = cu->getWidth(0);<br>
+    uint32_t height = cu->getHeight(0);<br>
+<br>
+    const uint32_t qpartnum = cu->getPic()->getNumPartInCU() >> (cu->getDepth(0) << 1);<br>
+    ::memset(cu->getTransformIdx(), 0, qpartnum * sizeof(UChar));<br>
+    ::memset(cu->getCbf(TEXT_LUMA), 0, qpartnum * sizeof(UChar));<br>
+    ::memset(cu->getCbf(TEXT_CHROMA_U), 0, qpartnum * sizeof(UChar));<br>
+    ::memset(cu->getCbf(TEXT_CHROMA_V), 0, qpartnum * sizeof(UChar));<br>
+    ::memset(cu->getCoeffY(), 0, width * height * sizeof(TCoeff));<br>
+    ::memset(cu->getCoeffCb(), 0, width * height * sizeof(TCoeff) >> 2);<br>
+    ::memset(cu->getCoeffCr(), 0, width * height * sizeof(TCoeff) >> 2);<br>
+    cu->setTransformSkipSubParts(0, 0, 0, 0, cu->getDepth(0));<br>
+<br>
+    m_rdGoOnSbacCoder->load(m_rdSbacCoders[cu->getDepth(0)][CI_CURR_BEST]);<br>
+    zeroResiBits = xSymbolBitsInter(cu);<br>
+    // Reset skipflags to false which would have set to true by xSymbolBitsInter if merge-skip<br>
+    cu->setSkipFlagSubParts(false, 0, cu->getDepth(0));<br>
+    return zeroResiBits;<br>
+}<br>
+<br>
+uint32_t TEncSearch::estimateZeroDist(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv)<br>
+{<br>
+    uint32_t distortion = 0;<br>
+<br>
+    uint32_t width  = cu->getWidth(0);<br>
+    uint32_t height = cu->getHeight(0);<br>
+<br>
+    int part = partitionFromSizes(width, height);<br>
+<br>
+    distortion = primitives.sse_pp[part](fencYuv->getLumaAddr(), fencYuv->getStride(), predYuv->getLumaAddr(), predYuv->getStride());<br>
+    part = partitionFromSizes(width >> 1, height >> 1);<br>
+    distortion += m_rdCost->scaleChromaDistCb(primitives.sse_pp[part](fencYuv->getCbAddr(), fencYuv->getCStride(), predYuv->getCbAddr(), predYuv->getCStride()));<br>
+    distortion += m_rdCost->scaleChromaDistCr(primitives.sse_pp[part](fencYuv->getCrAddr(), fencYuv->getCStride(), predYuv->getCrAddr(), predYuv->getCStride()));<br>
+    return distortion;<br>
+}<br>
+<br>
+void TEncSearch::generateRecon(TComDataCU* cu, TComYuv* predYuv, TShortYUV* resiYuv, TComYuv* reconYuv, bool skipRes)<br>
+{<br>
+    if (skipRes)<br>
+    {<br>
+        predYuv->copyToPartYuv(reconYuv, 0);<br>
+        return;<br>
+    }<br>
+    else<br>
+    {<br>
+        uint32_t width  = cu->getWidth(0);<br>
+        xSetResidualQTData(cu, 0, 0, resiYuv, cu->getDepth(0), true);<br>
+        reconYuv->addClip(predYuv, resiYuv, 0, width);<br>
+    }<br>
+}<br>
+<br>
+void TEncSearch::estimateBitsDist(TComDataCU* cu, TShortYUV* resiYuv, uint32_t& bits, uint32_t& distortion, bool curUseRDOQ)<br>
+{<br>
+    if (cu->isIntra(0))<br>
+    {<br>
+        return;<br>
+    }<br>
+<br>
+    bits = 0;<br>
+    distortion = 0;<br>
+    uint64_t cost = 0;<br>
+    uint32_t zeroDistortion = 0;<br>
+    m_rdGoOnSbacCoder->load(m_rdSbacCoders[cu->getDepth(0)][CI_CURR_BEST]);<br>
+    xEstimateResidualQT(cu, 0, 0, resiYuv, cu->getDepth(0), cost, bits, distortion, &zeroDistortion, curUseRDOQ);<br>
+<br>
+    xSetResidualQTData(cu, 0, 0, NULL, cu->getDepth(0), false);<br>
+    m_rdGoOnSbacCoder->load(m_rdSbacCoders[cu->getDepth(0)][CI_CURR_BEST]);<br>
+    bits = xSymbolBitsInter(cu);<br>
+    m_rdGoOnSbacCoder->store(m_rdSbacCoders[cu->getDepth(0)][CI_TEMP_BEST]);<br>
+}<br>
+<br>
 #if _MSC_VER<br>
 #pragma warning(disable: 4701) // potentially uninitialized local variable<br>
 #endif<br>
diff -r fef74c2e329d -r a73bc98e632c source/Lib/TLibEncoder/TEncSearch.h<br>
--- a/source/Lib/TLibEncoder/TEncSearch.h       Fri Nov 08 02:57:47 2013 -0600<br>
+++ b/source/Lib/TLibEncoder/TEncSearch.h       Fri Nov 08 12:33:47 2013 +0530<br>
@@ -153,6 +153,17 @@<br>
     void encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, TShortYUV* resiYuv, TShortYUV* bestResiYuv,<br>
                                    TComYuv* reconYuv, bool bSkipRes, bool curUseRDOQ = true);<br>
<br>
+    void estimateRDInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, TShortYUV* resiYuv, TShortYUV* bestResiYuv,<br>
+                           TComYuv* reconYuv, bool bSkipRes, bool curUseRDOQ = true);<br>
+<br>
+    uint32_t estimateZerobits(TComDataCU* cu);<br>
+<br>
+    uint32_t estimateZeroDist(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv);<br>
+<br>
+    void generateRecon(TComDataCU* cu, TComYuv* predYuv, TShortYUV* resiYuv, TComYuv* reconYuv, bool skipRes);<br>
+<br>
+    void estimateBitsDist(TComDataCU* cu, TShortYUV* resiYuv, uint32_t& bits, uint32_t& distortion, bool curUseRDOQ);<br>
+<br>
     /// set ME search range<br>
     void setAdaptiveSearchRange(int dir, int refIdx, int merange) { m_adaptiveRange[dir][refIdx] = merange; }<br>
<br>
diff -r fef74c2e329d -r a73bc98e632c source/encoder/compress.cpp<br>
--- a/source/encoder/compress.cpp       Fri Nov 08 02:57:47 2013 -0600<br>
+++ b/source/encoder/compress.cpp       Fri Nov 08 12:33:47 2013 +0530<br>
@@ -319,7 +319,7 @@<br>
     m_tmpRecoYuv[depth] = yuv;<br>
<br>
     //Encode with residue<br>
-    m_search->encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth], bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], false);<br>
+    m_search->estimateRDInterCU(outTempCU, m_origYuv[depth], bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], false);<br>
<br>
     if (outTempCU->m_totalCost < outBestCU->m_totalCost)    //Choose best from no-residue mode and residue mode<br>
     {<br>
@@ -476,8 +476,9 @@<br>
                 m_search->motionCompensation(outBestCU, m_bestPredYuv[depth], REF_PIC_LIST_X, partIdx, false, true);<br>
             }<br>
<br>
-            m_search->encodeResAndCalcRdInterCU(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth],<br>
-                                                m_bestResiYuv[depth], m_bestRecoYuv[depth], false);<br>
+            m_search->estimateRDInterCU(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth],<br>
+                                        m_bestResiYuv[depth], m_bestRecoYuv[depth], false);<br>
+<br>
 #if CU_STAT_LOGFILE<br>
             fprintf(fp1, "\n N : %d ,  Best Inter : %d , ", outBestCU->getWidth(0) / 2, outBestCU->m_totalCost);<br>
 #endif<br>
</blockquote></div><br></div>