<div dir="ltr">Ok, pushing this series in. After the additional patch, it's pretty much a win, especially the efficiency improvements in 10-bit are really solid. <br></div><div class="gmail_extra"><br><div class="gmail_quote">On Wed, May 20, 2015 at 4:09 PM, Deepthi Nandakumar <span dir="ltr"><<a href="mailto:deepthi@multicorewareinc.com" target="_blank">deepthi@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr"><div>Thanks.<br><br></div>With the smoke tests, about 2/3rd of the tests
 show positive/neutral encode efficiency gains, while a third show 
marginally lower encode efficiency, with a couple of commandlines 
showing a surprising drop. <div class="gmail_extra"><br><div class="gmail_quote"><div><div class="h5">On Tue, May 19, 2015 at 6:45 PM,  <span dir="ltr"><<a href="mailto:ashok@multicorewareinc.com" target="_blank">ashok@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Ashok Kumar Mishra<<a href="mailto:ashok@multicorewareinc.com" target="_blank">ashok@multicorewareinc.com</a>><br>
# Date 1431933378 -19800<br>
#      Mon May 18 12:46:18 2015 +0530<br>
# Node ID 1e2e70f90e4484b32217c7579bca98180929cf72<br>
# Parent  d7b100e51e828833eee006f1da93e499ac161d28<br>
analysis: re-order RD 0/4 analysis to do splits before ME or intra<br>
<br>
diff -r d7b100e51e82 -r 1e2e70f90e44 source/encoder/analysis.cpp<br>
--- a/source/encoder/analysis.cpp       Mon May 18 18:24:08 2015 -0500<br>
+++ b/source/encoder/analysis.cpp       Mon May 18 12:46:18 2015 +0530<br>
@@ -756,19 +756,79 @@<br>
     bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);<br>
     bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);<br>
     uint32_t minDepth = topSkipMinDepth(parentCTU, cuGeom);<br>
-<br>
+    bool earlyskip = false;<br>
     if (mightNotSplit && depth >= minDepth)<br>
     {<br>
-        bool bTryIntra = m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames;<br>
-<br>
         /* Compute Merge Cost */<br>
         md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);<br>
         md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);<br>
         checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);<br>
-<br>
-        bool earlyskip = false;<br>
         if (m_param->rdLevel)<br>
             earlyskip = m_param->bEnableEarlySkip && md.bestMode && md.bestMode->cu.isSkipped(0); // TODO: sa8d threshold per depth<br>
+    }<br>
+<br>
+    bool bNoSplit = false;<br>
+    if (md.bestMode)<br>
+    {<br>
+        bNoSplit = md.bestMode->cu.isSkipped(0);<br>
+        if (mightSplit && depth && depth >= minDepth && !bNoSplit)<br>
+            bNoSplit = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode);<br>
+    }<br>
+<br>
+    if (mightSplit && !bNoSplit)<br>
+    {<br>
+        Mode* splitPred = &md.pred[PRED_SPLIT];<br>
+        splitPred->initCosts();<br>
+        CUData* splitCU = &splitPred->cu;<br>
+        splitCU->initSubCU(parentCTU, cuGeom, qp);<br>
+<br>
+        uint32_t nextDepth = depth + 1;<br>
+        ModeDepth& nd = m_modeDepth[nextDepth];<br>
+        invalidateContexts(nextDepth);<br>
+        Entropy* nextContext = &m_rqt[depth].cur;<br>
+        int nextQP = qp;<br>
+<br>
+        for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)<br>
+        {<br>
+            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);<br>
+            if (childGeom.flags & CUGeom::PRESENT)<br>
+            {<br>
+                m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.absPartIdx);<br>
+                m_rqt[nextDepth].cur.load(*nextContext);<br>
+<br>
+                if (m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth)<br>
+                    nextQP = setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, childGeom));<br>
+<br>
+                compressInterCU_rd0_4(parentCTU, childGeom, nextQP);<br>
+<br>
+                // Save best CU and pred data for this sub CU<br>
+                splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);<br>
+                splitPred->addSubCosts(*nd.bestMode);<br>
+<br>
+                if (m_param->rdLevel)<br>
+                    nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv, childGeom.numPartitions * subPartIdx);<br>
+                else<br>
+                    nd.bestMode->predYuv.copyToPartYuv(splitPred->predYuv, childGeom.numPartitions * subPartIdx);<br>
+                if (m_param->rdLevel > 1)<br>
+                    nextContext = &nd.bestMode->contexts;<br>
+            }<br>
+            else<br>
+                splitCU->setEmptyPart(childGeom, subPartIdx);<br>
+        }<br>
+        nextContext->store(splitPred->contexts);<br>
+<br>
+        if (mightNotSplit)<br>
+            addSplitFlagCost(*splitPred, cuGeom.depth);<br>
+        else if (m_param->rdLevel > 1)<br>
+            updateModeCost(*splitPred);<br>
+        else<br>
+            splitPred->sa8dCost = m_rdCost.calcRdSADCost(splitPred->distortion, splitPred->sa8dBits);<br>
+    }<br>
+<br>
+    if (mightNotSplit && depth >= minDepth)<br>
+    {<br>
+        if (m_slice->m_pps->bUseDQP && depth <= m_slice->m_pps->maxCuDQPDepth && m_slice->m_pps->maxCuDQPDepth != 0)<br>
+            setLambdaFromQP(parentCTU, qp);<br>
<br>
         if (!earlyskip)<br>
         {<br>
@@ -834,7 +894,7 @@<br>
                         bestInter = &md.pred[PRED_nRx2N];<br>
                 }<br>
             }<br>
-<br>
+            bool bTryIntra = m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames;<br>
             if (m_param->rdLevel >= 3)<br>
             {<br>
                 /* Calculate RD cost of best inter option */<br>
@@ -950,63 +1010,19 @@<br>
             addSplitFlagCost(*md.bestMode, cuGeom.depth);<br>
     }<br>
<br>
-    bool bNoSplit = false;<br>
-    if (md.bestMode)<br>
+    if (mightNotSplit && md.bestMode)<br>
     {<br>
-        bNoSplit = md.bestMode->cu.isSkipped(0);<br>
-        if (mightSplit && depth && depth >= minDepth && !bNoSplit)<br>
-            bNoSplit = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode);<br>
+        /* early-out statistics */<br>
+        FrameData& curEncData = *m_frame->m_encData;<br>
+        FrameData::RCStatCU& cuStat = curEncData.m_cuStat[parentCTU.m_cuAddr];<br>
+        uint64_t temp = cuStat.avgCost[depth] * cuStat.count[depth];<br>
+        cuStat.count[depth] += 1;<br>
+        cuStat.avgCost[depth] = (temp + md.bestMode->rdCost) / cuStat.count[depth];<br>
     }<br></blockquote><div><br></div></div></div><div>This stats accumulation above should be moved further below - so in the case where only split costs were available, the early out stats would not change significantly. I have a suspicion this caused the drop in encode efficiency. <br></div><div><div class="h5"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<br>
     if (mightSplit && !bNoSplit)<br>
     {<br>
         Mode* splitPred = &md.pred[PRED_SPLIT];<br>
-        splitPred->initCosts();<br>
-        CUData* splitCU = &splitPred->cu;<br>
-        splitCU->initSubCU(parentCTU, cuGeom, qp);<br>
-<br>
-        uint32_t nextDepth = depth + 1;<br>
-        ModeDepth& nd = m_modeDepth[nextDepth];<br>
-        invalidateContexts(nextDepth);<br>
-        Entropy* nextContext = &m_rqt[depth].cur;<br>
-        int nextQP = qp;<br>
-<br>
-        for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)<br>
-        {<br>
-            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);<br>
-            if (childGeom.flags & CUGeom::PRESENT)<br>
-            {<br>
-                m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.absPartIdx);<br>
-                m_rqt[nextDepth].cur.load(*nextContext);<br>
-<br>
-                if (m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth)<br>
-                    nextQP = setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, childGeom));<br>
-<br>
-                compressInterCU_rd0_4(parentCTU, childGeom, nextQP);<br>
-<br>
-                // Save best CU and pred data for this sub CU<br>
-                splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);<br>
-                splitPred->addSubCosts(*nd.bestMode);<br>
-<br>
-                if (m_param->rdLevel)<br>
-                    nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv, childGeom.numPartitions * subPartIdx);<br>
-                else<br>
-                    nd.bestMode->predYuv.copyToPartYuv(splitPred->predYuv, childGeom.numPartitions * subPartIdx);<br>
-                if (m_param->rdLevel > 1)<br>
-                    nextContext = &nd.bestMode->contexts;<br>
-            }<br>
-            else<br>
-                splitCU->setEmptyPart(childGeom, subPartIdx);<br>
-        }<br>
-        nextContext->store(splitPred->contexts);<br>
-<br>
-        if (mightNotSplit)<br>
-            addSplitFlagCost(*splitPred, cuGeom.depth);<br>
-        else if (m_param->rdLevel > 1)<br>
-            updateModeCost(*splitPred);<br>
-        else<br>
-            splitPred->sa8dCost = m_rdCost.calcRdSADCost(splitPred->distortion, splitPred->sa8dBits);<br>
-<br>
         if (!md.bestMode)<br>
             md.bestMode = splitPred;<br>
         else if (m_param->rdLevel > 1)<br>
@@ -1016,21 +1032,11 @@<br>
<br>
         checkDQPForSplitPred(*md.bestMode, cuGeom);<br>
     }<br>
-    if (mightNotSplit)<br>
-    {<br>
-        /* early-out statistics */<br>
-        FrameData& curEncData = *m_frame->m_encData;<br>
-        FrameData::RCStatCU& cuStat = curEncData.m_cuStat[parentCTU.m_cuAddr];<br>
-        uint64_t temp = cuStat.avgCost[depth] * cuStat.count[depth];<br>
-        cuStat.count[depth] += 1;<br>
-        cuStat.avgCost[depth] = (temp + md.bestMode->rdCost) / cuStat.count[depth];<br>
-    }<br>
<br>
     /* Copy best data to encData CTU and recon */<br>
     X265_CHECK(md.bestMode->ok(), "best mode is not ok");<br>
     md.bestMode->cu.copyToPic(depth);<br>
-    if (md.bestMode != &md.pred[PRED_SPLIT] && m_param->rdLevel)<br>
-        md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr, cuGeom.absPartIdx);<br>
+    md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr, cuGeom.absPartIdx);<br>
 }<br>
<br>
 void Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder, int32_t qp)<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org" target="_blank">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div></div></div><br></div></div>
</blockquote></div><br></div>