<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Mon, Feb 2, 2015 at 10:23 AM,  <span dir="ltr"><<a href="mailto:gopu@multicorewareinc.com" target="_blank">gopu@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Gopu Govindaswamy <<a href="mailto:gopu@multicorewareinc.com">gopu@multicorewareinc.com</a>><br>
# Date 1422852790 -19800<br>
#      Mon Feb 02 10:23:10 2015 +0530<br>
# Node ID db56dc779466c5b54a55b5dadbcd04e882011729<br>
# Parent  6c5156500d6d4fa655acaf7a8b77f2ba3a0f794b<br>
analysis: dump and reuse the bestmergeCand for skip and merge mode<br>
<br>
diff -r 6c5156500d6d -r db56dc779466 source/common/common.h<br>
--- a/source/common/common.h    Fri Jan 30 11:54:22 2015 -0600<br>
+++ b/source/common/common.h    Mon Feb 02 10:23:10 2015 +0530<br>
@@ -376,6 +376,7 @@<br>
     int32_t*    ref;<br>
     uint8_t*    depth;<br>
     uint8_t*    modes;<br>
+    uint32_t*   bestMergeCand;<br>
 };<br>
<br>
 /* Stores intra analysis data for a single frame. This struct needs better packing */<br>
diff -r 6c5156500d6d -r db56dc779466 source/encoder/analysis.cpp<br>
--- a/source/encoder/analysis.cpp       Fri Jan 30 11:54:22 2015 -0600<br>
+++ b/source/encoder/analysis.cpp       Mon Feb 02 10:23:10 2015 +0530<br>
@@ -140,6 +140,7 @@<br>
             int numPredDir = m_slice->isInterP() ? 1 : 2;<br>
             m_reuseInterDataCTU = (analysis_inter_data *)m_frame->m_analysisData.interData;<br>
             reuseRef = &m_reuseInterDataCTU->ref[ctu.m_cuAddr * X265_MAX_PRED_MODE_PER_CTU * numPredDir];<br>
+            reuseBestMergeCand = &m_reuseInterDataCTU->bestMergeCand[ctu.m_cuAddr * CUGeom::MAX_GEOMS];<br>
         }<br>
     }<br>
<br>
@@ -1066,21 +1067,6 @@<br>
             md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom);<br>
             checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);<br>
<br>
-            if ((m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames) &&<br>
-                (!m_param->bEnableCbfFastMode || md.bestMode->cu.getQtRootCbf(0)))<br>
-            {<br>
-                md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);<br>
-                checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL);<br>
-                checkBestMode(md.pred[PRED_INTRA], depth);<br>
-<br>
-                if (depth == g_maxCUDepth && cuGeom.log2CUSize > m_slice->m_sps->quadtreeTULog2MinSize)<br>
-                {<br>
-                    md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom);<br>
-                    checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN, &reuseModes[zOrder]);<br>
-                    checkBestMode(md.pred[PRED_INTRA_NxN], depth);<br>
-                }<br>
-            }<br>
-<br>
             if (m_bTryLossless)<br>
                 tryLossless(cuGeom);<br>
<br>
@@ -1388,29 +1374,10 @@<br>
     bool foundCbf0Merge = false;<br>
     bool triedPZero = false, triedBZero = false;<br>
     bestPred->rdCost = MAX_INT64;<br>
-    for (uint32_t i = 0; i < maxNumMergeCand; i++)<br>
+<br>
+    if (m_param->analysisMode == X265_ANALYSIS_LOAD)<br>
     {<br>
-        if (m_bFrameParallel &&<br>
-            (mvFieldNeighbours[i][0].mv.y >= (m_param->searchRange + 1) * 4 ||<br>
-             mvFieldNeighbours[i][1].mv.y >= (m_param->searchRange + 1) * 4))<br>
-            continue;<br>
-<br>
-        /* the merge candidate list is packed with MV(0,0) ref 0 when it is not full */<br>
-        if (interDirNeighbours[i] == 1 && !mvFieldNeighbours[i][0].mv.word && !mvFieldNeighbours[i][0].refIdx)<br>
-        {<br>
-            if (triedPZero)<br>
-                continue;<br>
-            triedPZero = true;<br>
-        }<br>
-        else if (interDirNeighbours[i] == 3 &&<br>
-                 !mvFieldNeighbours[i][0].mv.word && !mvFieldNeighbours[i][0].refIdx &&<br>
-                 !mvFieldNeighbours[i][1].mv.word && !mvFieldNeighbours[i][1].refIdx)<br>
-        {<br>
-            if (triedBZero)<br>
-                continue;<br>
-            triedBZero = true;<br>
-        }<br>
-<br>
+        uint32_t i = *reuseBestMergeCand;<br>
         tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i;    /* merge candidate ID is stored in L0 MVP idx */<br>
         tempPred->cu.m_interDir[0] = interDirNeighbours[i];<br>
         tempPred->cu.m_mv[0][0] = mvFieldNeighbours[i][0].mv;<br>
@@ -1424,24 +1391,20 @@<br>
<br>
         uint8_t hasCbf = true;<br>
         bool swapped = false;<br>
-        if (!foundCbf0Merge)<br>
+<br>
+        /* if the best prediction has CBF (not a skip) then try merge with residual */<br>
+        encodeResAndCalcRdInterCU(*tempPred, cuGeom);<br>
+        hasCbf = tempPred->cu.getQtRootCbf(0);<br>
+        foundCbf0Merge = !hasCbf;<br>
+<br>
+        if (tempPred->rdCost < bestPred->rdCost)<br>
         {<br>
-            /* if the best prediction has CBF (not a skip) then try merge with residual */<br>
-<br>
-            encodeResAndCalcRdInterCU(*tempPred, cuGeom);<br>
-            hasCbf = tempPred->cu.getQtRootCbf(0);<br>
-            foundCbf0Merge = !hasCbf;<br>
-<br>
-            if (tempPred->rdCost < bestPred->rdCost)<br>
-            {<br>
-                std::swap(tempPred, bestPred);<br>
-                swapped = true;<br>
-            }<br>
+            std::swap(tempPred, bestPred);<br>
+            swapped = true;<br>
         }<br>
         if (!m_param->bLossless && hasCbf)<br>
         {<br>
             /* try merge without residual (skip), if not lossless coding */<br>
-<br>
             if (swapped)<br>
             {<br>
                 tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i;<br>
@@ -1453,12 +1416,88 @@<br>
                 tempPred->cu.setPredModeSubParts(MODE_INTER);<br>
                 tempPred->predYuv.copyFromYuv(bestPred->predYuv);<br>
             }<br>
-<br>
+<br>
             encodeResAndCalcRdSkipCU(*tempPred);<br>
<br>
             if (tempPred->rdCost < bestPred->rdCost)<br>
                 std::swap(tempPred, bestPred);<br>
         }<br>
+        reuseBestMergeCand++;<br>
+    }<br></blockquote><div><br></div><div>This is way too much code duplication. Lets fold this in by changing maxNumMergeCand based on analysis-mode. <br> <br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+    else<br>
+    {<br>
+        for (uint32_t i = 0; i < maxNumMergeCand; i++)<br>
+        {<br>
+            if (m_bFrameParallel &&<br>
+                (mvFieldNeighbours[i][0].mv.y >= (m_param->searchRange + 1) * 4 ||<br>
+                mvFieldNeighbours[i][1].mv.y >= (m_param->searchRange + 1) * 4))<br>
+                continue;<br>
+<br>
+            /* the merge candidate list is packed with MV(0,0) ref 0 when it is not full */<br>
+            if (interDirNeighbours[i] == 1 && !mvFieldNeighbours[i][0].mv.word && !mvFieldNeighbours[i][0].refIdx)<br>
+            {<br>
+                if (triedPZero)<br>
+                    continue;<br>
+                triedPZero = true;<br>
+            }<br>
+            else if (interDirNeighbours[i] == 3 &&<br>
+                !mvFieldNeighbours[i][0].mv.word && !mvFieldNeighbours[i][0].refIdx &&<br>
+                !mvFieldNeighbours[i][1].mv.word && !mvFieldNeighbours[i][1].refIdx)<br>
+            {<br>
+                if (triedBZero)<br>
+                    continue;<br>
+                triedBZero = true;<br>
+            }<br>
+<br>
+            tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i;    /* merge candidate ID is stored in L0 MVP idx */<br>
+            tempPred->cu.m_interDir[0] = interDirNeighbours[i];<br>
+            tempPred->cu.m_mv[0][0] = mvFieldNeighbours[i][0].mv;<br>
+            tempPred->cu.m_refIdx[0][0] = (int8_t)mvFieldNeighbours[i][0].refIdx;<br>
+            tempPred->cu.m_mv[1][0] = mvFieldNeighbours[i][1].mv;<br>
+            tempPred->cu.m_refIdx[1][0] = (int8_t)mvFieldNeighbours[i][1].refIdx;<br>
+            tempPred->cu.setPredModeSubParts(MODE_INTER); /* must be cleared between encode iterations */<br>
+<br>
+            prepMotionCompensation(tempPred->cu, cuGeom, 0);<br>
+            motionCompensation(tempPred->predYuv, true, true);<br>
+<br>
+            uint8_t hasCbf = true;<br>
+            bool swapped = false;<br>
+            if (!foundCbf0Merge)<br>
+            {<br>
+                /* if the best prediction has CBF (not a skip) then try merge with residual */<br>
+<br>
+                encodeResAndCalcRdInterCU(*tempPred, cuGeom);<br>
+                hasCbf = tempPred->cu.getQtRootCbf(0);<br>
+                foundCbf0Merge = !hasCbf;<br>
+<br>
+                if (tempPred->rdCost < bestPred->rdCost)<br>
+                {<br>
+                    std::swap(tempPred, bestPred);<br>
+                    swapped = true;<br>
+                }<br>
+            }<br>
+            if (!m_param->bLossless && hasCbf)<br>
+            {<br>
+                /* try merge without residual (skip), if not lossless coding */<br>
+<br>
+                if (swapped)<br>
+                {<br>
+                    tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i;<br>
+                    tempPred->cu.m_interDir[0] = interDirNeighbours[i];<br>
+                    tempPred->cu.m_mv[0][0] = mvFieldNeighbours[i][0].mv;<br>
+                    tempPred->cu.m_refIdx[0][0] = (int8_t)mvFieldNeighbours[i][0].refIdx;<br>
+                    tempPred->cu.m_mv[1][0] = mvFieldNeighbours[i][1].mv;<br>
+                    tempPred->cu.m_refIdx[1][0] = (int8_t)mvFieldNeighbours[i][1].refIdx;<br>
+                    tempPred->cu.setPredModeSubParts(MODE_INTER);<br>
+                    tempPred->predYuv.copyFromYuv(bestPred->predYuv);<br>
+                }<br>
+<br>
+                encodeResAndCalcRdSkipCU(*tempPred);<br>
+<br>
+                if (tempPred->rdCost < bestPred->rdCost)<br>
+                    std::swap(tempPred, bestPred);<br>
+            }<br>
+        }<br>
     }<br>
<br>
     if (bestPred->rdCost < MAX_INT64)<br>
@@ -1473,6 +1512,12 @@<br>
         bestPred->cu.setPUMv(1, mvFieldNeighbours[bestCand][1].mv, 0, 0);<br>
         bestPred->cu.setPURefIdx(1, (int8_t)mvFieldNeighbours[bestCand][1].refIdx, 0, 0);<br>
     }<br>
+<br>
+    if (m_param->analysisMode == X265_ANALYSIS_SAVE)<br>
+    {<br>
+        *reuseBestMergeCand = bestPred->cu.m_mvpIdx[0][0];<br>
+        reuseBestMergeCand++;<br>
+    }<br>
 }<br>
<br>
 void Analysis::checkInter_rd0_4(Mode& interMode, const CUGeom& cuGeom, PartSize partSize)<br>
diff -r 6c5156500d6d -r db56dc779466 source/encoder/analysis.h<br>
--- a/source/encoder/analysis.h Fri Jan 30 11:54:22 2015 -0600<br>
+++ b/source/encoder/analysis.h Mon Feb 02 10:23:10 2015 +0530<br>
@@ -78,6 +78,7 @@<br>
     analysis_intra_data* m_reuseIntraDataCTU;<br>
     analysis_inter_data* m_reuseInterDataCTU;<br>
     int32_t* reuseRef;<br>
+    uint32_t* reuseBestMergeCand;<br>
     Analysis();<br>
     bool create(ThreadLocalData* tld);<br>
     void destroy();<br>
diff -r 6c5156500d6d -r db56dc779466 source/encoder/encoder.cpp<br>
--- a/source/encoder/encoder.cpp        Fri Jan 30 11:54:22 2015 -0600<br>
+++ b/source/encoder/encoder.cpp        Mon Feb 02 10:23:10 2015 +0530<br>
@@ -1628,6 +1628,7 @@<br>
         CHECKED_MALLOC_ZERO(interData->ref, int32_t, analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2);<br>
         CHECKED_MALLOC(interData->depth, uint8_t, analysis->numPartitions * analysis->numCUsInFrame);<br>
         CHECKED_MALLOC(interData->modes, uint8_t, analysis->numPartitions * analysis->numCUsInFrame);<br>
+        CHECKED_MALLOC_ZERO(interData->bestMergeCand, uint32_t, analysis->numCUsInFrame * CUGeom::MAX_GEOMS);<br>
         analysis->interData = interData;<br>
     }<br>
     return;<br>
@@ -1651,6 +1652,7 @@<br>
         X265_FREE(((analysis_inter_data*)analysis->interData)->ref);<br>
         X265_FREE(((analysis_inter_data*)analysis->interData)->depth);<br>
         X265_FREE(((analysis_inter_data*)analysis->interData)->modes);<br>
+        X265_FREE(((analysis_inter_data*)analysis->interData)->bestMergeCand);<br>
         X265_FREE(analysis->interData);<br>
     }<br>
 }<br>
@@ -1716,6 +1718,7 @@<br>
         X265_FREAD(((analysis_inter_data *)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU, m_analysisFile);<br>
         X265_FREAD(((analysis_inter_data *)analysis->interData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);<br>
         X265_FREAD(((analysis_inter_data *)analysis->interData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);<br>
+        X265_FREAD(((analysis_inter_data *)analysis->interData)->bestMergeCand, sizeof(uint32_t), analysis->numCUsInFrame * CUGeom::MAX_GEOMS, m_analysisFile);<br>
         consumedBytes += frameRecordSize;<br>
         totalConsumedBytes = consumedBytes;<br>
     }<br>
@@ -1724,6 +1727,7 @@<br>
         X265_FREAD(((analysis_inter_data *)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2, m_analysisFile);<br>
         X265_FREAD(((analysis_inter_data *)analysis->interData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);<br>
         X265_FREAD(((analysis_inter_data *)analysis->interData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);<br>
+        X265_FREAD(((analysis_inter_data *)analysis->interData)->bestMergeCand, sizeof(uint32_t), analysis->numCUsInFrame * CUGeom::MAX_GEOMS, m_analysisFile);<br>
         consumedBytes += frameRecordSize;<br>
     }<br>
 #undef X265_FREAD<br>
@@ -1750,11 +1754,13 @@<br>
     {<br>
         analysis->frameRecordSize += sizeof(int32_t) * analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU;<br>
         analysis->frameRecordSize += sizeof(uint8_t) * analysis->numCUsInFrame * analysis->numPartitions * 2;<br>
+        analysis->frameRecordSize += sizeof(uint32_t) * analysis->numCUsInFrame * CUGeom::MAX_GEOMS;<br>
     }<br>
     else<br>
     {<br>
         analysis->frameRecordSize += sizeof(int32_t) * analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2;<br>
         analysis->frameRecordSize += sizeof(uint8_t) * analysis->numCUsInFrame * analysis->numPartitions * 2;<br>
+        analysis->frameRecordSize += sizeof(uint32_t) * analysis->numCUsInFrame * CUGeom::MAX_GEOMS;<br>
     }<br>
<br>
     X265_FWRITE(&analysis->frameRecordSize, sizeof(uint32_t), 1, m_analysisFile);<br>
@@ -1774,12 +1780,14 @@<br>
         X265_FWRITE(((analysis_inter_data*)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU, m_analysisFile);<br>
         X265_FWRITE(((analysis_inter_data*)analysis->interData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);<br>
         X265_FWRITE(((analysis_inter_data*)analysis->interData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);<br>
+        X265_FWRITE(((analysis_inter_data*)analysis->interData)->bestMergeCand, sizeof(uint32_t), analysis->numCUsInFrame * CUGeom::MAX_GEOMS, m_analysisFile);<br>
     }<br>
     else<br>
     {<br>
         X265_FWRITE(((analysis_inter_data*)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2, m_analysisFile);<br>
         X265_FWRITE(((analysis_inter_data*)analysis->interData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);<br>
         X265_FWRITE(((analysis_inter_data*)analysis->interData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);<br>
+        X265_FWRITE(((analysis_inter_data*)analysis->interData)->bestMergeCand, sizeof(uint32_t), analysis->numCUsInFrame * CUGeom::MAX_GEOMS, m_analysisFile);<br>
     }<br>
 #undef X265_FWRITE<br>
 }<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div><br></div></div>