[x265] [PATCH] analysis: skip rect/amp in analysis load mode

sagar at multicorewareinc.com sagar at multicorewareinc.com
Mon Mar 28 05:51:49 CEST 2016


# HG changeset patch
# User Sagar Kotecha<sagar at multicorewareinc.com>
# Date 1458817615 -19800
#      Thu Mar 24 16:36:55 2016 +0530
# Node ID 5bccf2596d8a1d66a6a9d460e65b1b9b93c2d112
# Parent  2de6cb99313a03c3577934ac5e2e116f7ba6cd10
analysis: skip rect/amp in analysis load mode

Avoid doing rect/amp analysis in load mode if the save mode has not chosen it as the best partition

diff -r 2de6cb99313a -r 5bccf2596d8a source/common/framedata.h
--- a/source/common/framedata.h	Mon Mar 21 13:50:14 2016 +0530
+++ b/source/common/framedata.h	Thu Mar 24 16:36:55 2016 +0530
@@ -172,6 +172,8 @@
     int32_t*    ref;
     uint8_t*    depth;
     uint8_t*    modes;
+    uint8_t*    partSize;
+    uint8_t*    mergeFlag;
 };
 }
 #endif // ifndef X265_FRAMEDATA_H
diff -r 2de6cb99313a -r 5bccf2596d8a source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Mon Mar 21 13:50:14 2016 +0530
+++ b/source/encoder/analysis.cpp	Thu Mar 24 16:36:55 2016 +0530
@@ -149,6 +149,8 @@
         m_reuseRef = &m_reuseInterDataCTU->ref[ctu.m_cuAddr * X265_MAX_PRED_MODE_PER_CTU * numPredDir];
         m_reuseDepth = &m_reuseInterDataCTU->depth[ctu.m_cuAddr * ctu.m_numPartitions];
         m_reuseModes = &m_reuseInterDataCTU->modes[ctu.m_cuAddr * ctu.m_numPartitions];
+        m_reusePartSize = &m_reuseInterDataCTU->partSize[ctu.m_cuAddr * ctu.m_numPartitions];
+        m_reuseMergeFlag = &m_reuseInterDataCTU->mergeFlag[ctu.m_cuAddr * ctu.m_numPartitions];
         if (m_param->analysisMode == X265_ANALYSIS_SAVE)
             for (int i = 0; i < X265_MAX_PRED_MODE_PER_CTU * numPredDir; i++)
                 m_reuseRef[i] = -1;
@@ -885,6 +887,8 @@
     uint32_t minDepth = topSkipMinDepth(parentCTU, cuGeom);
     bool earlyskip = false;
     bool splitIntra = true;
+    bool skipRectAmp = false;
+    bool chooseMerge = false;
 
     SplitData splitData[4];
     splitData[0].initSplitCUData();
@@ -903,15 +907,26 @@
     bool foundSkip = false;
     if (m_param->analysisMode == X265_ANALYSIS_LOAD)
     {
-        if (mightNotSplit && depth == m_reuseDepth[cuGeom.absPartIdx] && m_reuseModes[cuGeom.absPartIdx] == MODE_SKIP)
+        if (mightNotSplit && depth == m_reuseDepth[cuGeom.absPartIdx])
         {
-            md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
-            md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
-            checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
+            if (m_reuseModes[cuGeom.absPartIdx] == MODE_SKIP)
+            {
+                md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
+                md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
+                checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
 
-            foundSkip = true;
-            if (m_param->rdLevel)
-                earlyskip = md.bestMode && m_param->bEnableEarlySkip;
+                foundSkip = true;
+                if (m_param->rdLevel)
+                    earlyskip = md.bestMode && m_param->bEnableEarlySkip;
+            }
+            if (m_reusePartSize[cuGeom.absPartIdx] == SIZE_2Nx2N)
+            {
+                if (m_reuseModes[cuGeom.absPartIdx] != MODE_INTRA  && m_reuseModes[cuGeom.absPartIdx] != 4)
+                {
+                    skipRectAmp = true && !!md.bestMode;
+                    chooseMerge = !!m_reuseMergeFlag[cuGeom.absPartIdx] && !!md.bestMode;
+                }
+            }
         }
     }
 
@@ -1017,158 +1032,161 @@
             }
 
             Mode *bestInter = &md.pred[PRED_2Nx2N];
-            if (m_param->bEnableRectInter)
+            if (!skipRectAmp)
             {
-                uint64_t splitCost = splitData[0].sa8dCost + splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
-                uint32_t threshold_2NxN, threshold_Nx2N;
+                if (m_param->bEnableRectInter)
+                {
+                    uint64_t splitCost = splitData[0].sa8dCost + splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
+                    uint32_t threshold_2NxN, threshold_Nx2N;
 
-                if (m_slice->m_sliceType == P_SLICE)
-                {
-                    threshold_2NxN = splitData[0].mvCost[0] + splitData[1].mvCost[0];
-                    threshold_Nx2N = splitData[0].mvCost[0] + splitData[2].mvCost[0];
-                }
-                else
-                {
-                    threshold_2NxN = (splitData[0].mvCost[0] + splitData[1].mvCost[0] 
-                                    + splitData[0].mvCost[1] + splitData[1].mvCost[1] + 1) >> 1;
-                    threshold_Nx2N = (splitData[0].mvCost[0] + splitData[2].mvCost[0] 
-                                    + splitData[0].mvCost[1] + splitData[2].mvCost[1] + 1) >> 1;
+                    if (m_slice->m_sliceType == P_SLICE)
+                    {
+                        threshold_2NxN = splitData[0].mvCost[0] + splitData[1].mvCost[0];
+                        threshold_Nx2N = splitData[0].mvCost[0] + splitData[2].mvCost[0];
+                    }
+                    else
+                    {
+                        threshold_2NxN = (splitData[0].mvCost[0] + splitData[1].mvCost[0]
+                                       + splitData[0].mvCost[1] + splitData[1].mvCost[1] + 1) >> 1;
+                        threshold_Nx2N = (splitData[0].mvCost[0] + splitData[2].mvCost[0]
+                                       + splitData[0].mvCost[1] + splitData[2].mvCost[1] + 1) >> 1;
+                    }
+
+                    int try_2NxN_first = threshold_2NxN < threshold_Nx2N;
+                    if (try_2NxN_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxN)
+                    {
+                        refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* top */
+                        refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* bot */
+                        md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp);
+                        checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks);
+                        if (md.pred[PRED_2NxN].sa8dCost < bestInter->sa8dCost)
+                            bestInter = &md.pred[PRED_2NxN];
+                    }
+
+                    if (splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_Nx2N)
+                    {
+                        refMasks[0] = splitData[0].splitRefs | splitData[2].splitRefs; /* left */
+                        refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* right */
+                        md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+                        checkInter_rd0_4(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N, refMasks);
+                        if (md.pred[PRED_Nx2N].sa8dCost < bestInter->sa8dCost)
+                            bestInter = &md.pred[PRED_Nx2N];
+                    }
+
+                    if (!try_2NxN_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxN)
+                    {
+                        refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* top */
+                        refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* bot */
+                        md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp);
+                        checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks);
+                        if (md.pred[PRED_2NxN].sa8dCost < bestInter->sa8dCost)
+                            bestInter = &md.pred[PRED_2NxN];
+                    }
                 }
 
-                int try_2NxN_first = threshold_2NxN < threshold_Nx2N;
-                if (try_2NxN_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxN)
+                if (m_slice->m_sps->maxAMPDepth > depth)
                 {
-                    refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* top */
-                    refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* bot */
-                    md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp);
-                    checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks);
-                    if (md.pred[PRED_2NxN].sa8dCost < bestInter->sa8dCost)
-                        bestInter = &md.pred[PRED_2NxN];
-                }
+                    uint64_t splitCost = splitData[0].sa8dCost + splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
+                    uint32_t threshold_2NxnU, threshold_2NxnD, threshold_nLx2N, threshold_nRx2N;
 
-                if (splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_Nx2N)
-                {
-                    refMasks[0] = splitData[0].splitRefs | splitData[2].splitRefs; /* left */
-                    refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* right */
-                    md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
-                    checkInter_rd0_4(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N, refMasks);
-                    if (md.pred[PRED_Nx2N].sa8dCost < bestInter->sa8dCost)
-                        bestInter = &md.pred[PRED_Nx2N];
-                }
+                    if (m_slice->m_sliceType == P_SLICE)
+                    {
+                        threshold_2NxnU = splitData[0].mvCost[0] + splitData[1].mvCost[0];
+                        threshold_2NxnD = splitData[2].mvCost[0] + splitData[3].mvCost[0];
 
-                if (!try_2NxN_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxN)
-                {
-                    refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* top */
-                    refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* bot */
-                    md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp);
-                    checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks);
-                    if (md.pred[PRED_2NxN].sa8dCost < bestInter->sa8dCost)
-                        bestInter = &md.pred[PRED_2NxN];
-                }
-            }
+                        threshold_nLx2N = splitData[0].mvCost[0] + splitData[2].mvCost[0];
+                        threshold_nRx2N = splitData[1].mvCost[0] + splitData[3].mvCost[0];
+                    }
+                    else
+                    {
+                        threshold_2NxnU = (splitData[0].mvCost[0] + splitData[1].mvCost[0]
+                                         + splitData[0].mvCost[1] + splitData[1].mvCost[1] + 1) >> 1;
+                        threshold_2NxnD = (splitData[2].mvCost[0] + splitData[3].mvCost[0]
+                                         + splitData[2].mvCost[1] + splitData[3].mvCost[1] + 1) >> 1;
 
-            if (m_slice->m_sps->maxAMPDepth > depth)
-            {
-                uint64_t splitCost = splitData[0].sa8dCost + splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
-                uint32_t threshold_2NxnU, threshold_2NxnD, threshold_nLx2N, threshold_nRx2N;
-
-                if (m_slice->m_sliceType == P_SLICE)
-                {
-                    threshold_2NxnU = splitData[0].mvCost[0] + splitData[1].mvCost[0];
-                    threshold_2NxnD = splitData[2].mvCost[0] + splitData[3].mvCost[0];
-
-                    threshold_nLx2N = splitData[0].mvCost[0] + splitData[2].mvCost[0];
-                    threshold_nRx2N = splitData[1].mvCost[0] + splitData[3].mvCost[0];
-                }
-                else
-                {
-                    threshold_2NxnU = (splitData[0].mvCost[0] + splitData[1].mvCost[0] 
-                                       + splitData[0].mvCost[1] + splitData[1].mvCost[1] + 1) >> 1;
-                    threshold_2NxnD = (splitData[2].mvCost[0] + splitData[3].mvCost[0] 
-                                       + splitData[2].mvCost[1] + splitData[3].mvCost[1] + 1) >> 1;
-
-                    threshold_nLx2N = (splitData[0].mvCost[0] + splitData[2].mvCost[0] 
-                                       + splitData[0].mvCost[1] + splitData[2].mvCost[1] + 1) >> 1;
-                    threshold_nRx2N = (splitData[1].mvCost[0] + splitData[3].mvCost[0] 
-                                       + splitData[1].mvCost[1] + splitData[3].mvCost[1] + 1) >> 1;
-                }
-
-                bool bHor = false, bVer = false;
-                if (bestInter->cu.m_partSize[0] == SIZE_2NxN)
-                    bHor = true;
-                else if (bestInter->cu.m_partSize[0] == SIZE_Nx2N)
-                    bVer = true;
-                else if (bestInter->cu.m_partSize[0] == SIZE_2Nx2N &&
-                         md.bestMode && md.bestMode->cu.getQtRootCbf(0))
-                {
-                    bHor = true;
-                    bVer = true;
-                }
-
-                if (bHor)
-                {
-                    int try_2NxnD_first = threshold_2NxnD < threshold_2NxnU;
-                    if (try_2NxnD_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxnD)
-                    {
-                        refMasks[0] = allSplitRefs;                                    /* 75% top */
-                        refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* 25% bot */
-                        md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp);
-                        checkInter_rd0_4(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks);
-                        if (md.pred[PRED_2NxnD].sa8dCost < bestInter->sa8dCost)
-                            bestInter = &md.pred[PRED_2NxnD];
+                        threshold_nLx2N = (splitData[0].mvCost[0] + splitData[2].mvCost[0]
+                                        + splitData[0].mvCost[1] + splitData[2].mvCost[1] + 1) >> 1;
+                        threshold_nRx2N = (splitData[1].mvCost[0] + splitData[3].mvCost[0]
+                                        + splitData[1].mvCost[1] + splitData[3].mvCost[1] + 1) >> 1;
                     }
 
-                    if (splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxnU)
+                    bool bHor = false, bVer = false;
+                    if (bestInter->cu.m_partSize[0] == SIZE_2NxN)
+                        bHor = true;
+                    else if (bestInter->cu.m_partSize[0] == SIZE_Nx2N)
+                        bVer = true;
+                    else if (bestInter->cu.m_partSize[0] == SIZE_2Nx2N &&
+                        md.bestMode && md.bestMode->cu.getQtRootCbf(0))
                     {
-                        refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* 25% top */
-                        refMasks[1] = allSplitRefs;                                    /* 75% bot */
-                        md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom, qp);
-                        checkInter_rd0_4(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU, refMasks);
-                        if (md.pred[PRED_2NxnU].sa8dCost < bestInter->sa8dCost)
-                            bestInter = &md.pred[PRED_2NxnU];
+                        bHor = true;
+                        bVer = true;
                     }
 
-                    if (!try_2NxnD_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxnD)
+                    if (bHor)
                     {
-                        refMasks[0] = allSplitRefs;                                    /* 75% top */
-                        refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* 25% bot */
-                        md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp);
-                        checkInter_rd0_4(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks);
-                        if (md.pred[PRED_2NxnD].sa8dCost < bestInter->sa8dCost)
-                            bestInter = &md.pred[PRED_2NxnD];
+                        int try_2NxnD_first = threshold_2NxnD < threshold_2NxnU;
+                        if (try_2NxnD_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxnD)
+                        {
+                            refMasks[0] = allSplitRefs;                                    /* 75% top */
+                            refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* 25% bot */
+                            md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp);
+                            checkInter_rd0_4(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks);
+                            if (md.pred[PRED_2NxnD].sa8dCost < bestInter->sa8dCost)
+                                bestInter = &md.pred[PRED_2NxnD];
+                        }
+
+                        if (splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxnU)
+                        {
+                            refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* 25% top */
+                            refMasks[1] = allSplitRefs;                                    /* 75% bot */
+                            md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom, qp);
+                            checkInter_rd0_4(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU, refMasks);
+                            if (md.pred[PRED_2NxnU].sa8dCost < bestInter->sa8dCost)
+                                bestInter = &md.pred[PRED_2NxnU];
+                        }
+
+                        if (!try_2NxnD_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxnD)
+                        {
+                            refMasks[0] = allSplitRefs;                                    /* 75% top */
+                            refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* 25% bot */
+                            md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp);
+                            checkInter_rd0_4(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks);
+                            if (md.pred[PRED_2NxnD].sa8dCost < bestInter->sa8dCost)
+                                bestInter = &md.pred[PRED_2NxnD];
+                        }
                     }
-                }
-                if (bVer)
-                {
-                    int try_nRx2N_first = threshold_nRx2N < threshold_nLx2N;
-                    if (try_nRx2N_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_nRx2N)
+                    if (bVer)
                     {
-                        refMasks[0] = allSplitRefs;                                    /* 75% left  */
-                        refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* 25% right */
-                        md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp);
-                        checkInter_rd0_4(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks);
-                        if (md.pred[PRED_nRx2N].sa8dCost < bestInter->sa8dCost)
-                            bestInter = &md.pred[PRED_nRx2N];
-                    }
+                        int try_nRx2N_first = threshold_nRx2N < threshold_nLx2N;
+                        if (try_nRx2N_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_nRx2N)
+                        {
+                            refMasks[0] = allSplitRefs;                                    /* 75% left  */
+                            refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* 25% right */
+                            md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+                            checkInter_rd0_4(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks);
+                            if (md.pred[PRED_nRx2N].sa8dCost < bestInter->sa8dCost)
+                                bestInter = &md.pred[PRED_nRx2N];
+                        }
 
-                    if (splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_nLx2N)
-                    {
-                        refMasks[0] = splitData[0].splitRefs | splitData[2].splitRefs; /* 25% left  */
-                        refMasks[1] = allSplitRefs;                                    /* 75% right */
-                        md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom, qp);
-                        checkInter_rd0_4(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N, refMasks);
-                        if (md.pred[PRED_nLx2N].sa8dCost < bestInter->sa8dCost)
-                            bestInter = &md.pred[PRED_nLx2N];
-                    }
+                        if (splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_nLx2N)
+                        {
+                            refMasks[0] = splitData[0].splitRefs | splitData[2].splitRefs; /* 25% left  */
+                            refMasks[1] = allSplitRefs;                                    /* 75% right */
+                            md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+                            checkInter_rd0_4(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N, refMasks);
+                            if (md.pred[PRED_nLx2N].sa8dCost < bestInter->sa8dCost)
+                                bestInter = &md.pred[PRED_nLx2N];
+                        }
 
-                    if (!try_nRx2N_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_nRx2N)
-                    {
-                        refMasks[0] = allSplitRefs;                                    /* 75% left  */
-                        refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* 25% right */
-                        md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp);
-                        checkInter_rd0_4(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks);
-                        if (md.pred[PRED_nRx2N].sa8dCost < bestInter->sa8dCost)
-                            bestInter = &md.pred[PRED_nRx2N];
+                        if (!try_nRx2N_first && splitCost < md.pred[PRED_2Nx2N].sa8dCost + threshold_nRx2N)
+                        {
+                            refMasks[0] = allSplitRefs;                                    /* 75% left  */
+                            refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* 25% right */
+                            md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+                            checkInter_rd0_4(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks);
+                            if (md.pred[PRED_nRx2N].sa8dCost < bestInter->sa8dCost)
+                                bestInter = &md.pred[PRED_nRx2N];
+                        }
                     }
                 }
             }
@@ -1185,15 +1203,19 @@
                         motionCompensation(bestInter->cu, pu, bestInter->predYuv, false, true);
                     }
                 }
-                encodeResAndCalcRdInterCU(*bestInter, cuGeom);
-                checkBestMode(*bestInter, depth);
 
-                /* If BIDIR is available and within 17/16 of best inter option, choose by RDO */
-                if (m_slice->m_sliceType == B_SLICE && md.pred[PRED_BIDIR].sa8dCost != MAX_INT64 &&
-                    md.pred[PRED_BIDIR].sa8dCost * 16 <= bestInter->sa8dCost * 17)
+                if (!chooseMerge)
                 {
-                    encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR], cuGeom);
-                    checkBestMode(md.pred[PRED_BIDIR], depth);
+                    encodeResAndCalcRdInterCU(*bestInter, cuGeom);
+                    checkBestMode(*bestInter, depth);
+
+                    /* If BIDIR is available and within 17/16 of best inter option, choose by RDO */
+                    if (m_slice->m_sliceType == B_SLICE && md.pred[PRED_BIDIR].sa8dCost != MAX_INT64 &&
+                        md.pred[PRED_BIDIR].sa8dCost * 16 <= bestInter->sa8dCost * 17)
+                    {
+                        encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR], cuGeom);
+                        checkBestMode(md.pred[PRED_BIDIR], depth);
+                    }
                 }
 
                 if ((bTryIntra && md.bestMode->cu.getQtRootCbf(0)) ||
@@ -1378,6 +1400,7 @@
     bool foundSkip = false;
     bool earlyskip = false;
     bool splitIntra = true;
+    bool skipRectAmp = false;
 
     // avoid uninitialize value in below reference
     if (m_param->limitModes)
@@ -1389,14 +1412,19 @@
 
     if (m_param->analysisMode == X265_ANALYSIS_LOAD)
     {
-        if (mightNotSplit && depth == m_reuseDepth[cuGeom.absPartIdx] && m_reuseModes[cuGeom.absPartIdx] == MODE_SKIP)
+        if (mightNotSplit && depth == m_reuseDepth[cuGeom.absPartIdx])
         {
-            md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
-            md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
-            checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
+            if (m_reuseModes[cuGeom.absPartIdx] == MODE_SKIP)
+            {
+                md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
+                md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
+                checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
 
-            foundSkip = true;
-            earlyskip = !!m_param->bEnableEarlySkip;
+                foundSkip = true;
+                earlyskip = !!m_param->bEnableEarlySkip;
+            }
+            if (m_reusePartSize[cuGeom.absPartIdx] == SIZE_2Nx2N)
+                skipRectAmp = true && !!md.bestMode;
         }
     }
 
@@ -1502,150 +1530,153 @@
                 }
             }
 
-            if (m_param->bEnableRectInter)
+            if (!skipRectAmp)
             {
-                uint64_t splitCost = splitData[0].sa8dCost + splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
-                uint32_t threshold_2NxN, threshold_Nx2N;
+                if (m_param->bEnableRectInter)
+                {
+                    uint64_t splitCost = splitData[0].sa8dCost + splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
+                    uint32_t threshold_2NxN, threshold_Nx2N;
 
-                if (m_slice->m_sliceType == P_SLICE)
-                {
-                    threshold_2NxN = splitData[0].mvCost[0] + splitData[1].mvCost[0];
-                    threshold_Nx2N = splitData[0].mvCost[0] + splitData[2].mvCost[0];
-                }
-                else
-                {
-                    threshold_2NxN = (splitData[0].mvCost[0] + splitData[1].mvCost[0] 
-                                    + splitData[0].mvCost[1] + splitData[1].mvCost[1] + 1) >> 1;
-                    threshold_Nx2N = (splitData[0].mvCost[0] + splitData[2].mvCost[0] 
-                                    + splitData[0].mvCost[1] + splitData[2].mvCost[1] + 1) >> 1;
-                }
-
-                int try_2NxN_first = threshold_2NxN < threshold_Nx2N;
-                if (try_2NxN_first && splitCost < md.bestMode->rdCost + threshold_2NxN)
-                {
-                    refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* top */
-                    refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* bot */
-                    md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp);
-                    checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks);
-                    checkBestMode(md.pred[PRED_2NxN], cuGeom.depth);
-                }
-
-                if (splitCost < md.bestMode->rdCost + threshold_Nx2N)
-                {
-                    refMasks[0] = splitData[0].splitRefs | splitData[2].splitRefs; /* left */
-                    refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* right */
-                    md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
-                    checkInter_rd5_6(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N, refMasks);
-                    checkBestMode(md.pred[PRED_Nx2N], cuGeom.depth);
-                }
-
-                if (!try_2NxN_first && splitCost < md.bestMode->rdCost + threshold_2NxN)
-                {
-                    refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* top */
-                    refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* bot */
-                    md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp);
-                    checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks);
-                    checkBestMode(md.pred[PRED_2NxN], cuGeom.depth);
-                }
-            }
-
-            // Try AMP (SIZE_2NxnU, SIZE_2NxnD, SIZE_nLx2N, SIZE_nRx2N)
-            if (m_slice->m_sps->maxAMPDepth > depth)
-            {
-                uint64_t splitCost = splitData[0].sa8dCost + splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
-                uint32_t threshold_2NxnU, threshold_2NxnD, threshold_nLx2N, threshold_nRx2N;
-
-                if (m_slice->m_sliceType == P_SLICE)
-                {
-                    threshold_2NxnU = splitData[0].mvCost[0] + splitData[1].mvCost[0];
-                    threshold_2NxnD = splitData[2].mvCost[0] + splitData[3].mvCost[0];
-
-                    threshold_nLx2N = splitData[0].mvCost[0] + splitData[2].mvCost[0];
-                    threshold_nRx2N = splitData[1].mvCost[0] + splitData[3].mvCost[0];
-                }
-                else
-                {
-                    threshold_2NxnU = (splitData[0].mvCost[0] + splitData[1].mvCost[0] 
+                    if (m_slice->m_sliceType == P_SLICE)
+                    {
+                        threshold_2NxN = splitData[0].mvCost[0] + splitData[1].mvCost[0];
+                        threshold_Nx2N = splitData[0].mvCost[0] + splitData[2].mvCost[0];
+                    }
+                    else
+                    {
+                        threshold_2NxN = (splitData[0].mvCost[0] + splitData[1].mvCost[0]
                                        + splitData[0].mvCost[1] + splitData[1].mvCost[1] + 1) >> 1;
-                    threshold_2NxnD = (splitData[2].mvCost[0] + splitData[3].mvCost[0] 
-                                       + splitData[2].mvCost[1] + splitData[3].mvCost[1] + 1) >> 1;
-
-                    threshold_nLx2N = (splitData[0].mvCost[0] + splitData[2].mvCost[0] 
+                        threshold_Nx2N = (splitData[0].mvCost[0] + splitData[2].mvCost[0]
                                        + splitData[0].mvCost[1] + splitData[2].mvCost[1] + 1) >> 1;
-                    threshold_nRx2N = (splitData[1].mvCost[0] + splitData[3].mvCost[0] 
-                                       + splitData[1].mvCost[1] + splitData[3].mvCost[1] + 1) >> 1;
-                }
-
-                bool bHor = false, bVer = false;
-                if (md.bestMode->cu.m_partSize[0] == SIZE_2NxN)
-                    bHor = true;
-                else if (md.bestMode->cu.m_partSize[0] == SIZE_Nx2N)
-                    bVer = true;
-                else if (md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N && !md.bestMode->cu.m_mergeFlag[0])
-                {
-                    bHor = true;
-                    bVer = true;
-                }
-
-                if (bHor)
-                {
-                    int try_2NxnD_first = threshold_2NxnD < threshold_2NxnU;
-                    if (try_2NxnD_first && splitCost < md.bestMode->rdCost + threshold_2NxnD)
-                    {
-                        refMasks[0] = allSplitRefs;                                    /* 75% top */
-                        refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* 25% bot */
-                        md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp);
-                        checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks);
-                        checkBestMode(md.pred[PRED_2NxnD], cuGeom.depth);
                     }
 
-                    if (splitCost < md.bestMode->rdCost + threshold_2NxnU)
+                    int try_2NxN_first = threshold_2NxN < threshold_Nx2N;
+                    if (try_2NxN_first && splitCost < md.bestMode->rdCost + threshold_2NxN)
                     {
-                        refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* 25% top */
-                        refMasks[1] = allSplitRefs;                                    /* 75% bot */
-                        md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom, qp);
-                        checkInter_rd5_6(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU, refMasks);
-                        checkBestMode(md.pred[PRED_2NxnU], cuGeom.depth);
+                        refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* top */
+                        refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* bot */
+                        md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp);
+                        checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks);
+                        checkBestMode(md.pred[PRED_2NxN], cuGeom.depth);
                     }
 
-                    if (!try_2NxnD_first && splitCost < md.bestMode->rdCost + threshold_2NxnD)
+                    if (splitCost < md.bestMode->rdCost + threshold_Nx2N)
                     {
-                        refMasks[0] = allSplitRefs;                                    /* 75% top */
-                        refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* 25% bot */
-                        md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp);
-                        checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks);
-                        checkBestMode(md.pred[PRED_2NxnD], cuGeom.depth);
+                        refMasks[0] = splitData[0].splitRefs | splitData[2].splitRefs; /* left */
+                        refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* right */
+                        md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+                        checkInter_rd5_6(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N, refMasks);
+                        checkBestMode(md.pred[PRED_Nx2N], cuGeom.depth);
+                    }
+
+                    if (!try_2NxN_first && splitCost < md.bestMode->rdCost + threshold_2NxN)
+                    {
+                        refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* top */
+                        refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* bot */
+                        md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp);
+                        checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks);
+                        checkBestMode(md.pred[PRED_2NxN], cuGeom.depth);
                     }
                 }
 
-                if (bVer)
+                // Try AMP (SIZE_2NxnU, SIZE_2NxnD, SIZE_nLx2N, SIZE_nRx2N)
+                if (m_slice->m_sps->maxAMPDepth > depth)
                 {
-                    int try_nRx2N_first = threshold_nRx2N < threshold_nLx2N;
-                    if (try_nRx2N_first && splitCost < md.bestMode->rdCost + threshold_nRx2N)
+                    uint64_t splitCost = splitData[0].sa8dCost + splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
+                    uint32_t threshold_2NxnU, threshold_2NxnD, threshold_nLx2N, threshold_nRx2N;
+
+                    if (m_slice->m_sliceType == P_SLICE)
                     {
-                        refMasks[0] = allSplitRefs;                                    /* 75% left  */
-                        refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* 25% right */
-                        md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp);
-                        checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks);
-                        checkBestMode(md.pred[PRED_nRx2N], cuGeom.depth);
+                        threshold_2NxnU = splitData[0].mvCost[0] + splitData[1].mvCost[0];
+                        threshold_2NxnD = splitData[2].mvCost[0] + splitData[3].mvCost[0];
+
+                        threshold_nLx2N = splitData[0].mvCost[0] + splitData[2].mvCost[0];
+                        threshold_nRx2N = splitData[1].mvCost[0] + splitData[3].mvCost[0];
+                    }
+                    else
+                    {
+                        threshold_2NxnU = (splitData[0].mvCost[0] + splitData[1].mvCost[0]
+                                        + splitData[0].mvCost[1] + splitData[1].mvCost[1] + 1) >> 1;
+                        threshold_2NxnD = (splitData[2].mvCost[0] + splitData[3].mvCost[0]
+                                        + splitData[2].mvCost[1] + splitData[3].mvCost[1] + 1) >> 1;
+
+                        threshold_nLx2N = (splitData[0].mvCost[0] + splitData[2].mvCost[0]
+                                        + splitData[0].mvCost[1] + splitData[2].mvCost[1] + 1) >> 1;
+                        threshold_nRx2N = (splitData[1].mvCost[0] + splitData[3].mvCost[0]
+                                        + splitData[1].mvCost[1] + splitData[3].mvCost[1] + 1) >> 1;
                     }
 
-                    if (splitCost < md.bestMode->rdCost + threshold_nLx2N)
+                    bool bHor = false, bVer = false;
+                    if (md.bestMode->cu.m_partSize[0] == SIZE_2NxN)
+                        bHor = true;
+                    else if (md.bestMode->cu.m_partSize[0] == SIZE_Nx2N)
+                        bVer = true;
+                    else if (md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N && !md.bestMode->cu.m_mergeFlag[0])
                     {
-                        refMasks[0] = splitData[0].splitRefs | splitData[2].splitRefs; /* 25% left  */
-                        refMasks[1] = allSplitRefs;                                    /* 75% right */
-                        md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom, qp);
-                        checkInter_rd5_6(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N, refMasks);
-                        checkBestMode(md.pred[PRED_nLx2N], cuGeom.depth);
+                        bHor = true;
+                        bVer = true;
                     }
 
-                    if (!try_nRx2N_first && splitCost < md.bestMode->rdCost + threshold_nRx2N)
+                    if (bHor)
                     {
-                        refMasks[0] = allSplitRefs;                                    /* 75% left  */
-                        refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* 25% right */
-                        md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp);
-                        checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks);
-                        checkBestMode(md.pred[PRED_nRx2N], cuGeom.depth);
+                        int try_2NxnD_first = threshold_2NxnD < threshold_2NxnU;
+                        if (try_2NxnD_first && splitCost < md.bestMode->rdCost + threshold_2NxnD)
+                        {
+                            refMasks[0] = allSplitRefs;                                    /* 75% top */
+                            refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* 25% bot */
+                            md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp);
+                            checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks);
+                            checkBestMode(md.pred[PRED_2NxnD], cuGeom.depth);
+                        }
+
+                        if (splitCost < md.bestMode->rdCost + threshold_2NxnU)
+                        {
+                            refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* 25% top */
+                            refMasks[1] = allSplitRefs;                                    /* 75% bot */
+                            md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom, qp);
+                            checkInter_rd5_6(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU, refMasks);
+                            checkBestMode(md.pred[PRED_2NxnU], cuGeom.depth);
+                        }
+
+                        if (!try_2NxnD_first && splitCost < md.bestMode->rdCost + threshold_2NxnD)
+                        {
+                            refMasks[0] = allSplitRefs;                                    /* 75% top */
+                            refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* 25% bot */
+                            md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp);
+                            checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks);
+                            checkBestMode(md.pred[PRED_2NxnD], cuGeom.depth);
+                        }
+                    }
+
+                    if (bVer)
+                    {
+                        int try_nRx2N_first = threshold_nRx2N < threshold_nLx2N;
+                        if (try_nRx2N_first && splitCost < md.bestMode->rdCost + threshold_nRx2N)
+                        {
+                            refMasks[0] = allSplitRefs;                                    /* 75% left  */
+                            refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* 25% right */
+                            md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+                            checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks);
+                            checkBestMode(md.pred[PRED_nRx2N], cuGeom.depth);
+                        }
+
+                        if (splitCost < md.bestMode->rdCost + threshold_nLx2N)
+                        {
+                            refMasks[0] = splitData[0].splitRefs | splitData[2].splitRefs; /* 25% left  */
+                            refMasks[1] = allSplitRefs;                                    /* 75% right */
+                            md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+                            checkInter_rd5_6(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N, refMasks);
+                            checkBestMode(md.pred[PRED_nLx2N], cuGeom.depth);
+                        }
+
+                        if (!try_nRx2N_first && splitCost < md.bestMode->rdCost + threshold_nRx2N)
+                        {
+                            refMasks[0] = allSplitRefs;                                    /* 75% left  */
+                            refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* 25% right */
+                            md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+                            checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks);
+                            checkBestMode(md.pred[PRED_nRx2N], cuGeom.depth);
+                        }
                     }
                 }
             }
diff -r 2de6cb99313a -r 5bccf2596d8a source/encoder/analysis.h
--- a/source/encoder/analysis.h	Mon Mar 21 13:50:14 2016 +0530
+++ b/source/encoder/analysis.h	Thu Mar 24 16:36:55 2016 +0530
@@ -122,6 +122,8 @@
     int32_t*             m_reuseRef;
     uint8_t*             m_reuseDepth;
     uint8_t*             m_reuseModes;
+    uint8_t*             m_reusePartSize;
+    uint8_t*             m_reuseMergeFlag;
 
     uint32_t m_splitRefIdx[4];
     uint64_t* cacheCost;
diff -r 2de6cb99313a -r 5bccf2596d8a source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Mon Mar 21 13:50:14 2016 +0530
+++ b/source/encoder/encoder.cpp	Thu Mar 24 16:36:55 2016 +0530
@@ -1918,6 +1918,8 @@
         CHECKED_MALLOC_ZERO(interData->ref, int32_t, analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir);
         CHECKED_MALLOC(interData->depth, uint8_t, analysis->numPartitions * analysis->numCUsInFrame);
         CHECKED_MALLOC(interData->modes, uint8_t, analysis->numPartitions * analysis->numCUsInFrame);
+        CHECKED_MALLOC(interData->partSize, uint8_t, analysis->numPartitions * analysis->numCUsInFrame);
+        CHECKED_MALLOC(interData->mergeFlag, uint8_t, analysis->numPartitions * analysis->numCUsInFrame);
         CHECKED_MALLOC_ZERO(interData->wt, WeightParam, 3 * numDir);
         analysis->interData = interData;
     }
@@ -1943,6 +1945,8 @@
         X265_FREE(((analysis_inter_data*)analysis->interData)->ref);
         X265_FREE(((analysis_inter_data*)analysis->interData)->depth);
         X265_FREE(((analysis_inter_data*)analysis->interData)->modes);
+        X265_FREE(((analysis_inter_data*)analysis->interData)->mergeFlag);
+        X265_FREE(((analysis_inter_data*)analysis->interData)->partSize);
         X265_FREE(((analysis_inter_data*)analysis->interData)->wt);
         X265_FREE(analysis->interData);
     }
@@ -2029,13 +2033,15 @@
 
     else
     {
-        uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL;
+        uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL, *partSize = NULL, *mergeFlag = NULL;
 
-        tempBuf = X265_MALLOC(uint8_t, depthBytes * 2);
-        X265_FREAD(tempBuf, sizeof(uint8_t), depthBytes * 2, m_analysisFile);
+        tempBuf = X265_MALLOC(uint8_t, depthBytes * 4);
+        X265_FREAD(tempBuf, sizeof(uint8_t), depthBytes * 4, m_analysisFile);
 
         depthBuf = tempBuf;
-        modeBuf  = tempBuf + depthBytes;
+        modeBuf = tempBuf + depthBytes;
+        partSize = modeBuf + depthBytes;
+        mergeFlag = partSize + depthBytes;
 
         size_t count = 0;
         for (uint32_t d = 0; d < depthBytes; d++)
@@ -2043,13 +2049,15 @@
             int bytes = analysis->numPartitions >> (depthBuf[d] * 2);
             memset(&((analysis_inter_data *)analysis->interData)->depth[count], depthBuf[d], bytes);
             memset(&((analysis_inter_data *)analysis->interData)->modes[count], modeBuf[d], bytes);
+            memset(&((analysis_inter_data *)analysis->interData)->partSize[count], partSize[d], bytes);
+            memset(&((analysis_inter_data *)analysis->interData)->mergeFlag[count], mergeFlag[d], bytes);
             count += bytes;
         }
-        
+
         X265_FREE(tempBuf);
-        
+
         int numDir = analysis->sliceType == X265_TYPE_P ? 1 : 2;
-        X265_FREAD(((analysis_inter_data *)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir, m_analysisFile);      
+        X265_FREAD(((analysis_inter_data *)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir, m_analysisFile);
         uint32_t numPlanes = m_param->internalCsp == X265_CSP_I400 ? 1 : 3;
         X265_FREAD(((analysis_inter_data *)analysis->interData)->wt, sizeof(WeightParam), numPlanes * numDir, m_analysisFile);
         consumedBytes += frameRecordSize;
@@ -2105,6 +2113,8 @@
         {
             uint8_t depth = 0;
             uint8_t predMode = 0;
+            uint8_t partSize = 0;
+            uint8_t mergeFlag = 0;
 
             CUData* ctu = curEncData.getPicCTU(cuAddr);
             analysis_inter_data* interDataCTU = (analysis_inter_data*)analysis->interData;
@@ -2115,8 +2125,17 @@
                 interDataCTU->depth[depthBytes] = depth;
 
                 predMode = ctu->m_predMode[absPartIdx];
+                if (ctu->m_refIdx[1][absPartIdx] != -1)
+                    predMode = 4; // used as indiacator if the block is coded as bidir
+
                 interDataCTU->modes[depthBytes] = predMode;
 
+                partSize = ctu->m_partSize[absPartIdx];
+                interDataCTU->partSize[depthBytes] = partSize;
+
+                mergeFlag = ctu->m_mergeFlag[absPartIdx];
+                interDataCTU->mergeFlag[depthBytes] = mergeFlag;
+
                 absPartIdx += ctu->m_numPartitions >> (depth * 2);
             }
         }
@@ -2130,9 +2149,9 @@
     else
     {
         int numDir = (analysis->sliceType == X265_TYPE_P) ? 1 : 2;
-        analysis->frameRecordSize += depthBytes * 2;
-        analysis->frameRecordSize += sizeof(MV) * analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir;
-        analysis->frameRecordSize += sizeof(WeightParam) * 3 * numDir;
+        analysis->frameRecordSize += depthBytes * 4;
+        analysis->frameRecordSize += sizeof(int32_t)* analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir;
+        analysis->frameRecordSize += sizeof(WeightParam)* 3 * numDir;
     }
     X265_FWRITE(&analysis->frameRecordSize, sizeof(uint32_t), 1, m_analysisFile);
     X265_FWRITE(&depthBytes, sizeof(uint32_t), 1, m_analysisFile);
@@ -2155,6 +2174,8 @@
         int numDir = analysis->sliceType == X265_TYPE_P ? 1 : 2;
         X265_FWRITE(((analysis_inter_data*)analysis->interData)->depth, sizeof(uint8_t), depthBytes, m_analysisFile);
         X265_FWRITE(((analysis_inter_data*)analysis->interData)->modes, sizeof(uint8_t), depthBytes, m_analysisFile);
+        X265_FWRITE(((analysis_inter_data*)analysis->interData)->partSize, sizeof(uint8_t), depthBytes, m_analysisFile);
+        X265_FWRITE(((analysis_inter_data*)analysis->interData)->mergeFlag, sizeof(uint8_t), depthBytes, m_analysisFile);
         X265_FWRITE(((analysis_inter_data*)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir, m_analysisFile);
         uint32_t numPlanes = m_param->internalCsp == X265_CSP_I400 ? 1 : 3;
         X265_FWRITE(((analysis_inter_data*)analysis->interData)->wt, sizeof(WeightParam), numPlanes * numDir, m_analysisFile);


More information about the x265-devel mailing list