<div dir="ltr">Sorry, the output mismatch was due to asm. Pushed.<br></div><div class="gmail_extra"><br><div class="gmail_quote">On Sun, Sep 14, 2014 at 4:35 PM, Deepthi Nandakumar <span dir="ltr"><<a href="mailto:deepthi@multicorewareinc.com" target="_blank">deepthi@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr">This significantly changes outputs for P and B frames. Higher bitrates and higher SSIM. Lets do full regression testing on this - and compare the bitrate/ssim for all combinations to be reasonably sure there are no bugs. <br></div><div class="HOEnZb"><div class="h5"><div class="gmail_extra"><br><div class="gmail_quote">On Fri, Sep 12, 2014 at 7:47 PM,  <span dir="ltr"><<a href="mailto:ashok@multicorewareinc.com" target="_blank">ashok@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Ashok Kumar Mishra<<a href="mailto:ashok@multicorewareinc.com" target="_blank">ashok@multicorewareinc.com</a>><br>
# Date 1410341620 -19800<br>
#      Wed Sep 10 15:03:40 2014 +0530<br>
# Node ID d8be3c38915d4a628b804522da8946a152041203<br>
# Parent  cd8fd0afd4e873fc940ae3384fac4deed3ec7b4f<br>
Search: remove redundant encode coefficients in intra for performance<br>
<br>
diff -r cd8fd0afd4e8 -r d8be3c38915d source/encoder/analysis.cpp<br>
--- a/source/encoder/analysis.cpp       Thu Sep 11 17:25:40 2014 -0700<br>
+++ b/source/encoder/analysis.cpp       Wed Sep 10 15:03:40 2014 +0530<br>
@@ -1840,6 +1840,7 @@<br>
 void Analysis::encodeIntraInInter(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv,  ShortYuv* outResiYuv, TComYuv* outReconYuv)<br>
 {<br>
     uint64_t puCost = 0;<br>
+    uint32_t puBits = 0;<br>
     uint32_t depth = cu->getDepth(0);<br>
     uint32_t initTrDepth = cu->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;<br>
<br>
@@ -1851,7 +1852,7 @@<br>
     uint32_t tuDepthRange[2];<br>
     cu->getQuadtreeTULog2MinSizeInCU(tuDepthRange, 0);<br>
<br>
-    uint32_t puDistY = xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv, predYuv, outResiYuv, false, puCost, tuDepthRange);<br>
+    uint32_t puDistY = xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv, predYuv, outResiYuv, false, puCost, puBits, tuDepthRange);<br>
     xSetIntraResultQT(cu, initTrDepth, 0, outReconYuv);<br>
<br>
     //=== update PU data ====<br>
diff -r cd8fd0afd4e8 -r d8be3c38915d source/encoder/search.cpp<br>
--- a/source/encoder/search.cpp Thu Sep 11 17:25:40 2014 -0700<br>
+++ b/source/encoder/search.cpp Wed Sep 10 15:03:40 2014 +0530<br>
@@ -111,47 +111,6 @@<br>
     return false;<br>
 }<br>
<br>
-void Search::xEncSubdivCbfQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t depthRange[2])<br>
-{<br>
-    uint32_t fullDepth  = cu->getDepth(0) + trDepth;<br>
-    uint32_t trMode     = cu->getTransformIdx(absPartIdx);<br>
-    uint32_t subdiv     = (trMode > trDepth ? 1 : 0);<br>
-    uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;<br>
-<br>
-    if (cu->getPredictionMode(0) == MODE_INTRA && cu->getPartitionSize(0) == SIZE_NxN && trDepth == 0)<br>
-    {<br>
-        X265_CHECK(subdiv, "subdivision not present\n");<br>
-    }<br>
-    else if (log2TrSize > *(depthRange + 1))<br>
-    {<br>
-        X265_CHECK(subdiv, "subdivision not present\n");<br>
-    }<br>
-    else if (log2TrSize == cu->m_slice->m_sps->quadtreeTULog2MinSize)<br>
-    {<br>
-        X265_CHECK(!subdiv, "subdivision present\n");<br>
-    }<br>
-    else if (log2TrSize == *depthRange)<br>
-    {<br>
-        X265_CHECK(!subdiv, "subdivision present\n");<br>
-    }<br>
-    else<br>
-    {<br>
-        X265_CHECK(log2TrSize > *depthRange, "transform size too small\n");<br>
-        m_entropyCoder->codeTransformSubdivFlag(subdiv, 5 - log2TrSize);<br>
-    }<br>
-<br>
-    if (subdiv)<br>
-    {<br>
-        uint32_t qtPartNum = cu->m_pic->getNumPartInCU() >> ((fullDepth + 1) << 1);<br>
-        for (uint32_t part = 0; part < 4; part++)<br>
-            xEncSubdivCbfQTLuma(cu, trDepth + 1, absPartIdx + part * qtPartNum, depthRange);<br>
-<br>
-        return;<br>
-    }<br>
-<br>
-    m_entropyCoder->codeQtCbf(cu, absPartIdx, TEXT_LUMA, trMode);<br>
-}<br>
-<br>
 void Search::xEncSubdivCbfQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t height)<br>
 {<br>
     uint32_t fullDepth  = cu->getDepth(0) + trDepth;<br>
@@ -183,32 +142,6 @@<br>
     }<br>
 }<br>
<br>
-void Search::xEncCoeffQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx)<br>
-{<br>
-    const TextType ttype = TEXT_LUMA;<br>
-<br>
-    if (!cu->getCbf(absPartIdx, ttype, trDepth))<br>
-        return;<br>
-<br>
-    uint32_t fullDepth = cu->getDepth(0) + trDepth;<br>
-    uint32_t trMode    = cu->getTransformIdx(absPartIdx);<br>
-<br>
-    if (trMode > trDepth)<br>
-    {<br>
-        uint32_t qtPartNum = cu->m_pic->getNumPartInCU() >> ((fullDepth + 1) << 1);<br>
-        for (uint32_t part = 0; part < 4; part++)<br>
-            xEncCoeffQTLuma(cu, trDepth + 1, absPartIdx + part * qtPartNum);<br>
-<br>
-        return;<br>
-    }<br>
-<br>
-    uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;<br>
-    uint32_t qtLayer    = log2TrSize - 2;<br>
-    uint32_t coeffOffset = absPartIdx << LOG2_UNIT_SIZE * 2;<br>
-    coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset;<br>
-    m_entropyCoder->codeCoeffNxN(cu, coeff, absPartIdx, log2TrSize, ttype);<br>
-}<br>
-<br>
 void Search::xEncCoeffQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TextType ttype)<br>
 {<br>
     if (!cu->getCbf(absPartIdx, ttype, trDepth))<br>
@@ -316,15 +249,6 @@<br>
     }<br>
 }<br>
<br>
-uint32_t Search::xGetIntraBitsQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t depthRange[2])<br>
-{<br>
-    m_entropyCoder->resetBits();<br>
-    xEncIntraHeaderLuma(cu, trDepth, absPartIdx);<br>
-    xEncSubdivCbfQTLuma(cu, trDepth, absPartIdx, depthRange);<br>
-    xEncCoeffQTLuma(cu, trDepth, absPartIdx);<br>
-    return m_entropyCoder->getNumberOfWrittenBits();<br>
-}<br>
-<br>
 uint32_t Search::xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep)<br>
 {<br>
     int cuSize = 1 << cu->getLog2CUSize(absPartIdx);<br>
@@ -340,7 +264,14 @@<br>
 {<br>
     m_entropyCoder->resetBits();<br>
     xEncIntraHeaderLuma(cu, trDepth, absPartIdx);<br>
-    xEncSubdivCbfQTLuma(cu, trDepth, absPartIdx, depthRange);<br>
+<br>
+    //Transform subdiv flag<br>
+    if (log2TrSize != *depthRange)<br>
+        m_entropyCoder->codeTransformSubdivFlag(0, 5 - log2TrSize);<br>
+<br>
+    //===== Cbfs =====<br>
+    uint32_t trMode = cu->getTransformIdx(absPartIdx);<br>
+    m_entropyCoder->codeQtCbf(cu, absPartIdx, TEXT_LUMA, trMode);<br>
<br>
     if (cu->getCbf(absPartIdx, TEXT_LUMA, trDepth))<br>
         m_entropyCoder->codeCoeffNxN(cu, coeff, absPartIdx, log2TrSize, TEXT_LUMA);<br>
@@ -463,7 +394,7 @@<br>
<br>
 /* returns distortion. TODO reorder params */<br>
 uint32_t Search::xRecurIntraCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv,<br>
-                                     ShortYuv* resiYuv, bool bAllowRQTSplit, uint64_t& rdCost, uint32_t depthRange[2])<br>
+                                     ShortYuv* resiYuv, bool bAllowRQTSplit, uint64_t& rdCost, uint32_t& rdBits, uint32_t depthRange[2])<br>
 {<br>
     uint32_t fullDepth   = cu->getDepth(0) + trDepth;<br>
     uint32_t log2TrSize  = g_maxLog2CUSize - fullDepth;<br>
@@ -490,8 +421,9 @@<br>
     if (!bAllowRQTSplit && noSplitIntraMaxTuSize)<br>
         bCheckSplit = false;<br>
<br>
-    uint64_t singleCost   = MAX_INT64;<br>
-    uint32_t singleDistY  = 0;<br>
+    uint64_t singleCost  = MAX_INT64;<br>
+    uint32_t singleDistY = 0;<br>
+    uint32_t singleBits  = 0;<br>
     uint32_t singlePsyEnergyY = 0;<br>
     uint32_t singleCbfY   = 0;<br>
     int      bestModeId   = 0;<br>
@@ -580,7 +512,7 @@<br>
                     break;<br>
                 else<br>
                 {<br>
-                    uint32_t singleBits = xGetIntraBitsLuma(cu, trDepth, absPartIdx, log2TrSize, coeff, depthRange);<br>
+                    singleBits = xGetIntraBitsLuma(cu, trDepth, absPartIdx, log2TrSize, coeff, depthRange);<br>
                     if (m_rdCost.m_psyRd)<br>
                         singleCostTmp = m_rdCost.calcPsyRdCost(singleDistYTmp, singleBits, singlePsyEnergyYTmp);<br>
                     else<br>
@@ -634,7 +566,7 @@<br>
             }<br>
             cu->setCbfSubParts(singleCbfY << trDepth, TEXT_LUMA, absPartIdx, fullDepth);<br>
<br>
-            uint32_t singleBits = xGetIntraBitsLuma(cu, trDepth, absPartIdx, log2TrSize, coeffY, depthRange);<br>
+            singleBits = xGetIntraBitsLuma(cu, trDepth, absPartIdx, log2TrSize, coeffY, depthRange);<br>
             if (m_param->rdPenalty && (log2TrSize == 5) && !isIntraSlice)<br>
                 singleBits *= 4;<br>
<br>
@@ -663,23 +595,30 @@<br>
         uint32_t qPartsDiv     = cu->m_pic->getNumPartInCU() >> ((fullDepth + 1) << 1);<br>
         uint32_t absPartIdxSub = absPartIdx;<br>
         uint32_t splitCbfY     = 0;<br>
+        uint32_t splitBits     = 0;<br>
<br>
         for (uint32_t part = 0; part < 4; part++, absPartIdxSub += qPartsDiv)<br>
         {<br>
             cu->m_psyEnergy = 0;<br>
-            splitDistY += xRecurIntraCodingQT(cu, trDepth + 1, absPartIdxSub, fencYuv, predYuv, resiYuv, bAllowRQTSplit, splitCost, depthRange);<br>
+            splitDistY += xRecurIntraCodingQT(cu, trDepth + 1, absPartIdxSub, fencYuv, predYuv, resiYuv, bAllowRQTSplit, splitCost, splitBits, depthRange);<br>
             splitPsyEnergyY += cu->m_psyEnergy;<br>
             splitCbfY |= cu->getCbf(absPartIdxSub, TEXT_LUMA, trDepth + 1);<br>
         }<br>
+<br>
+        if (bCheckFull)<br>
+        {<br>
+            m_entropyCoder->resetBits();<br>
+<br>
+            //subdiv<br>
+            if (log2TrSize != *depthRange)<br>
+                m_entropyCoder->codeTransformSubdivFlag(1, 5 - log2TrSize);<br>
+<br>
+             splitBits += m_entropyCoder->getNumberOfWrittenBits();<br>
+        }<br>
<br>
         for (uint32_t offs = 0; offs < 4 * qPartsDiv; offs++)<br>
             cu->getCbf(TEXT_LUMA)[absPartIdx + offs] |= (splitCbfY << trDepth);<br>
<br>
-        // restore context states<br>
-        m_entropyCoder->load(m_rdEntropyCoders[fullDepth][CI_QT_TRAFO_ROOT]);<br>
-<br>
-        // determine rate and r-d cost<br>
-        uint32_t splitBits = xGetIntraBitsQTLuma(cu, trDepth, absPartIdx, depthRange);<br>
         if (m_rdCost.m_psyRd)<br>
             splitCost = m_rdCost.calcPsyRdCost(splitDistY, splitBits, splitPsyEnergyY);<br>
         else<br>
@@ -689,6 +628,7 @@<br>
         {<br>
             outDist  += splitDistY;<br>
             rdCost   += splitCost;<br>
+            rdBits   += splitBits;<br>
             cu->m_psyEnergy = splitPsyEnergyY;<br>
             return outDist;<br>
         }<br>
@@ -717,6 +657,7 @@<br>
     }<br>
<br>
     rdCost += singleCost;<br>
+    rdBits += singleBits;<br>
     cu->m_psyEnergy = singlePsyEnergyY;<br>
     return outDist + singleDistY;<br>
 }<br>
@@ -1416,6 +1357,7 @@<br>
         uint32_t bestPUDistY = 0;<br>
         uint64_t bestPUCost  = MAX_INT64;<br>
         uint32_t puDistY;<br>
+        uint32_t puBits;<br>
         uint64_t puCost;<br>
         for (int mode = 0; mode < numModesForFullRD; mode++)<br>
         {<br>
@@ -1427,7 +1369,8 @@<br>
<br>
             // determine residual for partition<br>
             puCost = 0;<br>
-            puDistY = xRecurIntraCodingQT(cu, initTrDepth, partOffset, fencYuv, predYuv, resiYuv, false, puCost, depthRange);<br>
+            puBits = 0;<br>
+            puDistY = xRecurIntraCodingQT(cu, initTrDepth, partOffset, fencYuv, predYuv, resiYuv, false, puCost, puBits, depthRange);<br>
<br>
             // check r-d cost<br>
             if (puCost < bestPUCost)<br>
@@ -1446,7 +1389,8 @@<br>
<br>
         // determine residual for partition<br>
         puCost = 0;<br>
-        puDistY = xRecurIntraCodingQT(cu, initTrDepth, partOffset, fencYuv, predYuv, resiYuv, true, puCost, depthRange);<br>
+        puBits = 0;<br>
+        puDistY = xRecurIntraCodingQT(cu, initTrDepth, partOffset, fencYuv, predYuv, resiYuv, true, puCost, puBits, depthRange);<br>
<br>
         overallDistY += (puCost >= bestPUCost) ? bestPUDistY : puDistY;<br>
<br>
diff -r cd8fd0afd4e8 -r d8be3c38915d source/encoder/search.h<br>
--- a/source/encoder/search.h   Thu Sep 11 17:25:40 2014 -0700<br>
+++ b/source/encoder/search.h   Wed Sep 10 15:03:40 2014 +0530<br>
@@ -129,14 +129,11 @@<br>
     void xSetResidualQTData(TComDataCU* cu, uint32_t absPartIdx, ShortYuv* resiYuv, uint32_t depth, bool bSpatial);<br>
     void xSetIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* reconYuv);<br>
<br>
-    void xEncSubdivCbfQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t depthRange[2]);<br>
     void xEncSubdivCbfQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx,  uint32_t absPartIdxStep, uint32_t width, uint32_t height);<br>
-<br>
-    void xEncCoeffQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx);<br>
     void xEncCoeffQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TextType ttype);<br>
     void xEncIntraHeaderLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx);<br>
     void xEncIntraHeaderChroma(TComDataCU* cu, uint32_t absPartIdx);<br>
-    uint32_t xGetIntraBitsQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t depthRange[2]);<br>
+<br>
     uint32_t xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep);<br>
     uint32_t xGetIntraBitsLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t log2TrSize, coeff_t* coeff, uint32_t depthRange[2]);<br>
     uint32_t xGetIntraBitsChroma(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSizeC, uint32_t chromaId, coeff_t* coeff);<br>
@@ -147,7 +144,7 @@<br>
                                  uint64_t &rdCost, uint32_t &outBits, uint32_t *zeroDist, uint32_t tuDepthRange[2]);<br>
<br>
     uint32_t xRecurIntraCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv,<br>
-                                 ShortYuv* resiYuv, bool bAllowRQTSplit, uint64_t& dRDCost, uint32_t depthRange[2]);<br>
+                                 ShortYuv* resiYuv, bool bAllowRQTSplit, uint64_t& dRDCost, uint32_t& puBits, uint32_t depthRange[2]);<br>
<br>
     uint32_t xRecurIntraChromaCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv);<br>
<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org" target="_blank">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div><br></div>
</div></div></blockquote></div><br></div>