[x265] [PATCH] analysis: skip rect/amp in analysis load mode

Deepthi Nandakumar deepthi at multicorewareinc.com
Fri Apr 8 16:51:48 CEST 2016


Can you please regenerate this patch at the current tip?

On Mon, Mar 28, 2016 at 9:21 AM, <sagar at multicorewareinc.com> wrote:

> # HG changeset patch
> # User Sagar Kotecha<sagar at multicorewareinc.com>
> # Date 1458817615 -19800
> #      Thu Mar 24 16:36:55 2016 +0530
> # Node ID 5bccf2596d8a1d66a6a9d460e65b1b9b93c2d112
> # Parent  2de6cb99313a03c3577934ac5e2e116f7ba6cd10
> analysis: skip rect/amp in analysis load mode
>
> Avoid doing rect/amp analysis in load mode if the save mode has not chosen
> it as the best partition
>
> diff -r 2de6cb99313a -r 5bccf2596d8a source/common/framedata.h
> --- a/source/common/framedata.h Mon Mar 21 13:50:14 2016 +0530
> +++ b/source/common/framedata.h Thu Mar 24 16:36:55 2016 +0530
> @@ -172,6 +172,8 @@
>      int32_t*    ref;
>      uint8_t*    depth;
>      uint8_t*    modes;
> +    uint8_t*    partSize;
> +    uint8_t*    mergeFlag;
>  };
>  }
>  #endif // ifndef X265_FRAMEDATA_H
> diff -r 2de6cb99313a -r 5bccf2596d8a source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp       Mon Mar 21 13:50:14 2016 +0530
> +++ b/source/encoder/analysis.cpp       Thu Mar 24 16:36:55 2016 +0530
> @@ -149,6 +149,8 @@
>          m_reuseRef = &m_reuseInterDataCTU->ref[ctu.m_cuAddr *
> X265_MAX_PRED_MODE_PER_CTU * numPredDir];
>          m_reuseDepth = &m_reuseInterDataCTU->depth[ctu.m_cuAddr *
> ctu.m_numPartitions];
>          m_reuseModes = &m_reuseInterDataCTU->modes[ctu.m_cuAddr *
> ctu.m_numPartitions];
> +        m_reusePartSize = &m_reuseInterDataCTU->partSize[ctu.m_cuAddr *
> ctu.m_numPartitions];
> +        m_reuseMergeFlag = &m_reuseInterDataCTU->mergeFlag[ctu.m_cuAddr *
> ctu.m_numPartitions];
>          if (m_param->analysisMode == X265_ANALYSIS_SAVE)
>              for (int i = 0; i < X265_MAX_PRED_MODE_PER_CTU * numPredDir;
> i++)
>                  m_reuseRef[i] = -1;
> @@ -885,6 +887,8 @@
>      uint32_t minDepth = topSkipMinDepth(parentCTU, cuGeom);
>      bool earlyskip = false;
>      bool splitIntra = true;
> +    bool skipRectAmp = false;
> +    bool chooseMerge = false;
>
>      SplitData splitData[4];
>      splitData[0].initSplitCUData();
> @@ -903,15 +907,26 @@
>      bool foundSkip = false;
>      if (m_param->analysisMode == X265_ANALYSIS_LOAD)
>      {
> -        if (mightNotSplit && depth == m_reuseDepth[cuGeom.absPartIdx] &&
> m_reuseModes[cuGeom.absPartIdx] == MODE_SKIP)
> +        if (mightNotSplit && depth == m_reuseDepth[cuGeom.absPartIdx])
>          {
> -            md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
> -            md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
> -            checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP],
> md.pred[PRED_MERGE], cuGeom);
> +            if (m_reuseModes[cuGeom.absPartIdx] == MODE_SKIP)
> +            {
> +                md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
> +                md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
> +                checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP],
> md.pred[PRED_MERGE], cuGeom);
>
> -            foundSkip = true;
> -            if (m_param->rdLevel)
> -                earlyskip = md.bestMode && m_param->bEnableEarlySkip;
> +                foundSkip = true;
> +                if (m_param->rdLevel)
> +                    earlyskip = md.bestMode && m_param->bEnableEarlySkip;
> +            }
> +            if (m_reusePartSize[cuGeom.absPartIdx] == SIZE_2Nx2N)
> +            {
> +                if (m_reuseModes[cuGeom.absPartIdx] != MODE_INTRA  &&
> m_reuseModes[cuGeom.absPartIdx] != 4)
> +                {
> +                    skipRectAmp = true && !!md.bestMode;
> +                    chooseMerge = !!m_reuseMergeFlag[cuGeom.absPartIdx]
> && !!md.bestMode;
> +                }
> +            }
>          }
>      }
>
> @@ -1017,158 +1032,161 @@
>              }
>
>              Mode *bestInter = &md.pred[PRED_2Nx2N];
> -            if (m_param->bEnableRectInter)
> +            if (!skipRectAmp)
>              {
> -                uint64_t splitCost = splitData[0].sa8dCost +
> splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
> -                uint32_t threshold_2NxN, threshold_Nx2N;
> +                if (m_param->bEnableRectInter)
> +                {
> +                    uint64_t splitCost = splitData[0].sa8dCost +
> splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
> +                    uint32_t threshold_2NxN, threshold_Nx2N;
>
> -                if (m_slice->m_sliceType == P_SLICE)
> -                {
> -                    threshold_2NxN = splitData[0].mvCost[0] +
> splitData[1].mvCost[0];
> -                    threshold_Nx2N = splitData[0].mvCost[0] +
> splitData[2].mvCost[0];
> -                }
> -                else
> -                {
> -                    threshold_2NxN = (splitData[0].mvCost[0] +
> splitData[1].mvCost[0]
> -                                    + splitData[0].mvCost[1] +
> splitData[1].mvCost[1] + 1) >> 1;
> -                    threshold_Nx2N = (splitData[0].mvCost[0] +
> splitData[2].mvCost[0]
> -                                    + splitData[0].mvCost[1] +
> splitData[2].mvCost[1] + 1) >> 1;
> +                    if (m_slice->m_sliceType == P_SLICE)
> +                    {
> +                        threshold_2NxN = splitData[0].mvCost[0] +
> splitData[1].mvCost[0];
> +                        threshold_Nx2N = splitData[0].mvCost[0] +
> splitData[2].mvCost[0];
> +                    }
> +                    else
> +                    {
> +                        threshold_2NxN = (splitData[0].mvCost[0] +
> splitData[1].mvCost[0]
> +                                       + splitData[0].mvCost[1] +
> splitData[1].mvCost[1] + 1) >> 1;
> +                        threshold_Nx2N = (splitData[0].mvCost[0] +
> splitData[2].mvCost[0]
> +                                       + splitData[0].mvCost[1] +
> splitData[2].mvCost[1] + 1) >> 1;
> +                    }
> +
> +                    int try_2NxN_first = threshold_2NxN < threshold_Nx2N;
> +                    if (try_2NxN_first && splitCost <
> md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxN)
> +                    {
> +                        refMasks[0] = splitData[0].splitRefs |
> splitData[1].splitRefs; /* top */
> +                        refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* bot */
> +                        md.pred[PRED_2NxN].cu.initSubCU(parentCTU,
> cuGeom, qp);
> +                        checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom,
> SIZE_2NxN, refMasks);
> +                        if (md.pred[PRED_2NxN].sa8dCost <
> bestInter->sa8dCost)
> +                            bestInter = &md.pred[PRED_2NxN];
> +                    }
> +
> +                    if (splitCost < md.pred[PRED_2Nx2N].sa8dCost +
> threshold_Nx2N)
> +                    {
> +                        refMasks[0] = splitData[0].splitRefs |
> splitData[2].splitRefs; /* left */
> +                        refMasks[1] = splitData[1].splitRefs |
> splitData[3].splitRefs; /* right */
> +                        md.pred[PRED_Nx2N].cu.initSubCU(parentCTU,
> cuGeom, qp);
> +                        checkInter_rd0_4(md.pred[PRED_Nx2N], cuGeom,
> SIZE_Nx2N, refMasks);
> +                        if (md.pred[PRED_Nx2N].sa8dCost <
> bestInter->sa8dCost)
> +                            bestInter = &md.pred[PRED_Nx2N];
> +                    }
> +
> +                    if (!try_2NxN_first && splitCost <
> md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxN)
> +                    {
> +                        refMasks[0] = splitData[0].splitRefs |
> splitData[1].splitRefs; /* top */
> +                        refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* bot */
> +                        md.pred[PRED_2NxN].cu.initSubCU(parentCTU,
> cuGeom, qp);
> +                        checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom,
> SIZE_2NxN, refMasks);
> +                        if (md.pred[PRED_2NxN].sa8dCost <
> bestInter->sa8dCost)
> +                            bestInter = &md.pred[PRED_2NxN];
> +                    }
>                  }
>
> -                int try_2NxN_first = threshold_2NxN < threshold_Nx2N;
> -                if (try_2NxN_first && splitCost <
> md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxN)
> +                if (m_slice->m_sps->maxAMPDepth > depth)
>                  {
> -                    refMasks[0] = splitData[0].splitRefs |
> splitData[1].splitRefs; /* top */
> -                    refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* bot */
> -                    md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom,
> qp);
> -                    checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom,
> SIZE_2NxN, refMasks);
> -                    if (md.pred[PRED_2NxN].sa8dCost < bestInter->sa8dCost)
> -                        bestInter = &md.pred[PRED_2NxN];
> -                }
> +                    uint64_t splitCost = splitData[0].sa8dCost +
> splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
> +                    uint32_t threshold_2NxnU, threshold_2NxnD,
> threshold_nLx2N, threshold_nRx2N;
>
> -                if (splitCost < md.pred[PRED_2Nx2N].sa8dCost +
> threshold_Nx2N)
> -                {
> -                    refMasks[0] = splitData[0].splitRefs |
> splitData[2].splitRefs; /* left */
> -                    refMasks[1] = splitData[1].splitRefs |
> splitData[3].splitRefs; /* right */
> -                    md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom,
> qp);
> -                    checkInter_rd0_4(md.pred[PRED_Nx2N], cuGeom,
> SIZE_Nx2N, refMasks);
> -                    if (md.pred[PRED_Nx2N].sa8dCost < bestInter->sa8dCost)
> -                        bestInter = &md.pred[PRED_Nx2N];
> -                }
> +                    if (m_slice->m_sliceType == P_SLICE)
> +                    {
> +                        threshold_2NxnU = splitData[0].mvCost[0] +
> splitData[1].mvCost[0];
> +                        threshold_2NxnD = splitData[2].mvCost[0] +
> splitData[3].mvCost[0];
>
> -                if (!try_2NxN_first && splitCost <
> md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxN)
> -                {
> -                    refMasks[0] = splitData[0].splitRefs |
> splitData[1].splitRefs; /* top */
> -                    refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* bot */
> -                    md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom,
> qp);
> -                    checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom,
> SIZE_2NxN, refMasks);
> -                    if (md.pred[PRED_2NxN].sa8dCost < bestInter->sa8dCost)
> -                        bestInter = &md.pred[PRED_2NxN];
> -                }
> -            }
> +                        threshold_nLx2N = splitData[0].mvCost[0] +
> splitData[2].mvCost[0];
> +                        threshold_nRx2N = splitData[1].mvCost[0] +
> splitData[3].mvCost[0];
> +                    }
> +                    else
> +                    {
> +                        threshold_2NxnU = (splitData[0].mvCost[0] +
> splitData[1].mvCost[0]
> +                                         + splitData[0].mvCost[1] +
> splitData[1].mvCost[1] + 1) >> 1;
> +                        threshold_2NxnD = (splitData[2].mvCost[0] +
> splitData[3].mvCost[0]
> +                                         + splitData[2].mvCost[1] +
> splitData[3].mvCost[1] + 1) >> 1;
>
> -            if (m_slice->m_sps->maxAMPDepth > depth)
> -            {
> -                uint64_t splitCost = splitData[0].sa8dCost +
> splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
> -                uint32_t threshold_2NxnU, threshold_2NxnD,
> threshold_nLx2N, threshold_nRx2N;
> -
> -                if (m_slice->m_sliceType == P_SLICE)
> -                {
> -                    threshold_2NxnU = splitData[0].mvCost[0] +
> splitData[1].mvCost[0];
> -                    threshold_2NxnD = splitData[2].mvCost[0] +
> splitData[3].mvCost[0];
> -
> -                    threshold_nLx2N = splitData[0].mvCost[0] +
> splitData[2].mvCost[0];
> -                    threshold_nRx2N = splitData[1].mvCost[0] +
> splitData[3].mvCost[0];
> -                }
> -                else
> -                {
> -                    threshold_2NxnU = (splitData[0].mvCost[0] +
> splitData[1].mvCost[0]
> -                                       + splitData[0].mvCost[1] +
> splitData[1].mvCost[1] + 1) >> 1;
> -                    threshold_2NxnD = (splitData[2].mvCost[0] +
> splitData[3].mvCost[0]
> -                                       + splitData[2].mvCost[1] +
> splitData[3].mvCost[1] + 1) >> 1;
> -
> -                    threshold_nLx2N = (splitData[0].mvCost[0] +
> splitData[2].mvCost[0]
> -                                       + splitData[0].mvCost[1] +
> splitData[2].mvCost[1] + 1) >> 1;
> -                    threshold_nRx2N = (splitData[1].mvCost[0] +
> splitData[3].mvCost[0]
> -                                       + splitData[1].mvCost[1] +
> splitData[3].mvCost[1] + 1) >> 1;
> -                }
> -
> -                bool bHor = false, bVer = false;
> -                if (bestInter->cu.m_partSize[0] == SIZE_2NxN)
> -                    bHor = true;
> -                else if (bestInter->cu.m_partSize[0] == SIZE_Nx2N)
> -                    bVer = true;
> -                else if (bestInter->cu.m_partSize[0] == SIZE_2Nx2N &&
> -                         md.bestMode && md.bestMode->cu.getQtRootCbf(0))
> -                {
> -                    bHor = true;
> -                    bVer = true;
> -                }
> -
> -                if (bHor)
> -                {
> -                    int try_2NxnD_first = threshold_2NxnD <
> threshold_2NxnU;
> -                    if (try_2NxnD_first && splitCost <
> md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxnD)
> -                    {
> -                        refMasks[0] = allSplitRefs;
>               /* 75% top */
> -                        refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* 25% bot */
> -                        md.pred[PRED_2NxnD].cu.initSubCU(parentCTU,
> cuGeom, qp);
> -                        checkInter_rd0_4(md.pred[PRED_2NxnD], cuGeom,
> SIZE_2NxnD, refMasks);
> -                        if (md.pred[PRED_2NxnD].sa8dCost <
> bestInter->sa8dCost)
> -                            bestInter = &md.pred[PRED_2NxnD];
> +                        threshold_nLx2N = (splitData[0].mvCost[0] +
> splitData[2].mvCost[0]
> +                                        + splitData[0].mvCost[1] +
> splitData[2].mvCost[1] + 1) >> 1;
> +                        threshold_nRx2N = (splitData[1].mvCost[0] +
> splitData[3].mvCost[0]
> +                                        + splitData[1].mvCost[1] +
> splitData[3].mvCost[1] + 1) >> 1;
>                      }
>
> -                    if (splitCost < md.pred[PRED_2Nx2N].sa8dCost +
> threshold_2NxnU)
> +                    bool bHor = false, bVer = false;
> +                    if (bestInter->cu.m_partSize[0] == SIZE_2NxN)
> +                        bHor = true;
> +                    else if (bestInter->cu.m_partSize[0] == SIZE_Nx2N)
> +                        bVer = true;
> +                    else if (bestInter->cu.m_partSize[0] == SIZE_2Nx2N &&
> +                        md.bestMode && md.bestMode->cu.getQtRootCbf(0))
>                      {
> -                        refMasks[0] = splitData[0].splitRefs |
> splitData[1].splitRefs; /* 25% top */
> -                        refMasks[1] = allSplitRefs;
>               /* 75% bot */
> -                        md.pred[PRED_2NxnU].cu.initSubCU(parentCTU,
> cuGeom, qp);
> -                        checkInter_rd0_4(md.pred[PRED_2NxnU], cuGeom,
> SIZE_2NxnU, refMasks);
> -                        if (md.pred[PRED_2NxnU].sa8dCost <
> bestInter->sa8dCost)
> -                            bestInter = &md.pred[PRED_2NxnU];
> +                        bHor = true;
> +                        bVer = true;
>                      }
>
> -                    if (!try_2NxnD_first && splitCost <
> md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxnD)
> +                    if (bHor)
>                      {
> -                        refMasks[0] = allSplitRefs;
>               /* 75% top */
> -                        refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* 25% bot */
> -                        md.pred[PRED_2NxnD].cu.initSubCU(parentCTU,
> cuGeom, qp);
> -                        checkInter_rd0_4(md.pred[PRED_2NxnD], cuGeom,
> SIZE_2NxnD, refMasks);
> -                        if (md.pred[PRED_2NxnD].sa8dCost <
> bestInter->sa8dCost)
> -                            bestInter = &md.pred[PRED_2NxnD];
> +                        int try_2NxnD_first = threshold_2NxnD <
> threshold_2NxnU;
> +                        if (try_2NxnD_first && splitCost <
> md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxnD)
> +                        {
> +                            refMasks[0] = allSplitRefs;
>                   /* 75% top */
> +                            refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* 25% bot */
> +                            md.pred[PRED_2NxnD].cu.initSubCU(parentCTU,
> cuGeom, qp);
> +                            checkInter_rd0_4(md.pred[PRED_2NxnD], cuGeom,
> SIZE_2NxnD, refMasks);
> +                            if (md.pred[PRED_2NxnD].sa8dCost <
> bestInter->sa8dCost)
> +                                bestInter = &md.pred[PRED_2NxnD];
> +                        }
> +
> +                        if (splitCost < md.pred[PRED_2Nx2N].sa8dCost +
> threshold_2NxnU)
> +                        {
> +                            refMasks[0] = splitData[0].splitRefs |
> splitData[1].splitRefs; /* 25% top */
> +                            refMasks[1] = allSplitRefs;
>                   /* 75% bot */
> +                            md.pred[PRED_2NxnU].cu.initSubCU(parentCTU,
> cuGeom, qp);
> +                            checkInter_rd0_4(md.pred[PRED_2NxnU], cuGeom,
> SIZE_2NxnU, refMasks);
> +                            if (md.pred[PRED_2NxnU].sa8dCost <
> bestInter->sa8dCost)
> +                                bestInter = &md.pred[PRED_2NxnU];
> +                        }
> +
> +                        if (!try_2NxnD_first && splitCost <
> md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxnD)
> +                        {
> +                            refMasks[0] = allSplitRefs;
>                   /* 75% top */
> +                            refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* 25% bot */
> +                            md.pred[PRED_2NxnD].cu.initSubCU(parentCTU,
> cuGeom, qp);
> +                            checkInter_rd0_4(md.pred[PRED_2NxnD], cuGeom,
> SIZE_2NxnD, refMasks);
> +                            if (md.pred[PRED_2NxnD].sa8dCost <
> bestInter->sa8dCost)
> +                                bestInter = &md.pred[PRED_2NxnD];
> +                        }
>                      }
> -                }
> -                if (bVer)
> -                {
> -                    int try_nRx2N_first = threshold_nRx2N <
> threshold_nLx2N;
> -                    if (try_nRx2N_first && splitCost <
> md.pred[PRED_2Nx2N].sa8dCost + threshold_nRx2N)
> +                    if (bVer)
>                      {
> -                        refMasks[0] = allSplitRefs;
>               /* 75% left  */
> -                        refMasks[1] = splitData[1].splitRefs |
> splitData[3].splitRefs; /* 25% right */
> -                        md.pred[PRED_nRx2N].cu.initSubCU(parentCTU,
> cuGeom, qp);
> -                        checkInter_rd0_4(md.pred[PRED_nRx2N], cuGeom,
> SIZE_nRx2N, refMasks);
> -                        if (md.pred[PRED_nRx2N].sa8dCost <
> bestInter->sa8dCost)
> -                            bestInter = &md.pred[PRED_nRx2N];
> -                    }
> +                        int try_nRx2N_first = threshold_nRx2N <
> threshold_nLx2N;
> +                        if (try_nRx2N_first && splitCost <
> md.pred[PRED_2Nx2N].sa8dCost + threshold_nRx2N)
> +                        {
> +                            refMasks[0] = allSplitRefs;
>                   /* 75% left  */
> +                            refMasks[1] = splitData[1].splitRefs |
> splitData[3].splitRefs; /* 25% right */
> +                            md.pred[PRED_nRx2N].cu.initSubCU(parentCTU,
> cuGeom, qp);
> +                            checkInter_rd0_4(md.pred[PRED_nRx2N], cuGeom,
> SIZE_nRx2N, refMasks);
> +                            if (md.pred[PRED_nRx2N].sa8dCost <
> bestInter->sa8dCost)
> +                                bestInter = &md.pred[PRED_nRx2N];
> +                        }
>
> -                    if (splitCost < md.pred[PRED_2Nx2N].sa8dCost +
> threshold_nLx2N)
> -                    {
> -                        refMasks[0] = splitData[0].splitRefs |
> splitData[2].splitRefs; /* 25% left  */
> -                        refMasks[1] = allSplitRefs;
>               /* 75% right */
> -                        md.pred[PRED_nLx2N].cu.initSubCU(parentCTU,
> cuGeom, qp);
> -                        checkInter_rd0_4(md.pred[PRED_nLx2N], cuGeom,
> SIZE_nLx2N, refMasks);
> -                        if (md.pred[PRED_nLx2N].sa8dCost <
> bestInter->sa8dCost)
> -                            bestInter = &md.pred[PRED_nLx2N];
> -                    }
> +                        if (splitCost < md.pred[PRED_2Nx2N].sa8dCost +
> threshold_nLx2N)
> +                        {
> +                            refMasks[0] = splitData[0].splitRefs |
> splitData[2].splitRefs; /* 25% left  */
> +                            refMasks[1] = allSplitRefs;
>                   /* 75% right */
> +                            md.pred[PRED_nLx2N].cu.initSubCU(parentCTU,
> cuGeom, qp);
> +                            checkInter_rd0_4(md.pred[PRED_nLx2N], cuGeom,
> SIZE_nLx2N, refMasks);
> +                            if (md.pred[PRED_nLx2N].sa8dCost <
> bestInter->sa8dCost)
> +                                bestInter = &md.pred[PRED_nLx2N];
> +                        }
>
> -                    if (!try_nRx2N_first && splitCost <
> md.pred[PRED_2Nx2N].sa8dCost + threshold_nRx2N)
> -                    {
> -                        refMasks[0] = allSplitRefs;
>               /* 75% left  */
> -                        refMasks[1] = splitData[1].splitRefs |
> splitData[3].splitRefs; /* 25% right */
> -                        md.pred[PRED_nRx2N].cu.initSubCU(parentCTU,
> cuGeom, qp);
> -                        checkInter_rd0_4(md.pred[PRED_nRx2N], cuGeom,
> SIZE_nRx2N, refMasks);
> -                        if (md.pred[PRED_nRx2N].sa8dCost <
> bestInter->sa8dCost)
> -                            bestInter = &md.pred[PRED_nRx2N];
> +                        if (!try_nRx2N_first && splitCost <
> md.pred[PRED_2Nx2N].sa8dCost + threshold_nRx2N)
> +                        {
> +                            refMasks[0] = allSplitRefs;
>                   /* 75% left  */
> +                            refMasks[1] = splitData[1].splitRefs |
> splitData[3].splitRefs; /* 25% right */
> +                            md.pred[PRED_nRx2N].cu.initSubCU(parentCTU,
> cuGeom, qp);
> +                            checkInter_rd0_4(md.pred[PRED_nRx2N], cuGeom,
> SIZE_nRx2N, refMasks);
> +                            if (md.pred[PRED_nRx2N].sa8dCost <
> bestInter->sa8dCost)
> +                                bestInter = &md.pred[PRED_nRx2N];
> +                        }
>                      }
>                  }
>              }
> @@ -1185,15 +1203,19 @@
>                          motionCompensation(bestInter->cu, pu,
> bestInter->predYuv, false, true);
>                      }
>                  }
> -                encodeResAndCalcRdInterCU(*bestInter, cuGeom);
> -                checkBestMode(*bestInter, depth);
>
> -                /* If BIDIR is available and within 17/16 of best inter
> option, choose by RDO */
> -                if (m_slice->m_sliceType == B_SLICE &&
> md.pred[PRED_BIDIR].sa8dCost != MAX_INT64 &&
> -                    md.pred[PRED_BIDIR].sa8dCost * 16 <=
> bestInter->sa8dCost * 17)
> +                if (!chooseMerge)
>                  {
> -                    encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR],
> cuGeom);
> -                    checkBestMode(md.pred[PRED_BIDIR], depth);
> +                    encodeResAndCalcRdInterCU(*bestInter, cuGeom);
> +                    checkBestMode(*bestInter, depth);
> +
> +                    /* If BIDIR is available and within 17/16 of best
> inter option, choose by RDO */
> +                    if (m_slice->m_sliceType == B_SLICE &&
> md.pred[PRED_BIDIR].sa8dCost != MAX_INT64 &&
> +                        md.pred[PRED_BIDIR].sa8dCost * 16 <=
> bestInter->sa8dCost * 17)
> +                    {
> +                        encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR],
> cuGeom);
> +                        checkBestMode(md.pred[PRED_BIDIR], depth);
> +                    }
>                  }
>
>                  if ((bTryIntra && md.bestMode->cu.getQtRootCbf(0)) ||
> @@ -1378,6 +1400,7 @@
>      bool foundSkip = false;
>      bool earlyskip = false;
>      bool splitIntra = true;
> +    bool skipRectAmp = false;
>
>      // avoid uninitialize value in below reference
>      if (m_param->limitModes)
> @@ -1389,14 +1412,19 @@
>
>      if (m_param->analysisMode == X265_ANALYSIS_LOAD)
>      {
> -        if (mightNotSplit && depth == m_reuseDepth[cuGeom.absPartIdx] &&
> m_reuseModes[cuGeom.absPartIdx] == MODE_SKIP)
> +        if (mightNotSplit && depth == m_reuseDepth[cuGeom.absPartIdx])
>          {
> -            md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
> -            md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
> -            checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP],
> md.pred[PRED_MERGE], cuGeom);
> +            if (m_reuseModes[cuGeom.absPartIdx] == MODE_SKIP)
> +            {
> +                md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
> +                md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
> +                checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP],
> md.pred[PRED_MERGE], cuGeom);
>
> -            foundSkip = true;
> -            earlyskip = !!m_param->bEnableEarlySkip;
> +                foundSkip = true;
> +                earlyskip = !!m_param->bEnableEarlySkip;
> +            }
> +            if (m_reusePartSize[cuGeom.absPartIdx] == SIZE_2Nx2N)
> +                skipRectAmp = true && !!md.bestMode;
>          }
>      }
>
> @@ -1502,150 +1530,153 @@
>                  }
>              }
>
> -            if (m_param->bEnableRectInter)
> +            if (!skipRectAmp)
>              {
> -                uint64_t splitCost = splitData[0].sa8dCost +
> splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
> -                uint32_t threshold_2NxN, threshold_Nx2N;
> +                if (m_param->bEnableRectInter)
> +                {
> +                    uint64_t splitCost = splitData[0].sa8dCost +
> splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
> +                    uint32_t threshold_2NxN, threshold_Nx2N;
>
> -                if (m_slice->m_sliceType == P_SLICE)
> -                {
> -                    threshold_2NxN = splitData[0].mvCost[0] +
> splitData[1].mvCost[0];
> -                    threshold_Nx2N = splitData[0].mvCost[0] +
> splitData[2].mvCost[0];
> -                }
> -                else
> -                {
> -                    threshold_2NxN = (splitData[0].mvCost[0] +
> splitData[1].mvCost[0]
> -                                    + splitData[0].mvCost[1] +
> splitData[1].mvCost[1] + 1) >> 1;
> -                    threshold_Nx2N = (splitData[0].mvCost[0] +
> splitData[2].mvCost[0]
> -                                    + splitData[0].mvCost[1] +
> splitData[2].mvCost[1] + 1) >> 1;
> -                }
> -
> -                int try_2NxN_first = threshold_2NxN < threshold_Nx2N;
> -                if (try_2NxN_first && splitCost < md.bestMode->rdCost +
> threshold_2NxN)
> -                {
> -                    refMasks[0] = splitData[0].splitRefs |
> splitData[1].splitRefs; /* top */
> -                    refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* bot */
> -                    md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom,
> qp);
> -                    checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom,
> SIZE_2NxN, refMasks);
> -                    checkBestMode(md.pred[PRED_2NxN], cuGeom.depth);
> -                }
> -
> -                if (splitCost < md.bestMode->rdCost + threshold_Nx2N)
> -                {
> -                    refMasks[0] = splitData[0].splitRefs |
> splitData[2].splitRefs; /* left */
> -                    refMasks[1] = splitData[1].splitRefs |
> splitData[3].splitRefs; /* right */
> -                    md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom,
> qp);
> -                    checkInter_rd5_6(md.pred[PRED_Nx2N], cuGeom,
> SIZE_Nx2N, refMasks);
> -                    checkBestMode(md.pred[PRED_Nx2N], cuGeom.depth);
> -                }
> -
> -                if (!try_2NxN_first && splitCost < md.bestMode->rdCost +
> threshold_2NxN)
> -                {
> -                    refMasks[0] = splitData[0].splitRefs |
> splitData[1].splitRefs; /* top */
> -                    refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* bot */
> -                    md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom,
> qp);
> -                    checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom,
> SIZE_2NxN, refMasks);
> -                    checkBestMode(md.pred[PRED_2NxN], cuGeom.depth);
> -                }
> -            }
> -
> -            // Try AMP (SIZE_2NxnU, SIZE_2NxnD, SIZE_nLx2N, SIZE_nRx2N)
> -            if (m_slice->m_sps->maxAMPDepth > depth)
> -            {
> -                uint64_t splitCost = splitData[0].sa8dCost +
> splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
> -                uint32_t threshold_2NxnU, threshold_2NxnD,
> threshold_nLx2N, threshold_nRx2N;
> -
> -                if (m_slice->m_sliceType == P_SLICE)
> -                {
> -                    threshold_2NxnU = splitData[0].mvCost[0] +
> splitData[1].mvCost[0];
> -                    threshold_2NxnD = splitData[2].mvCost[0] +
> splitData[3].mvCost[0];
> -
> -                    threshold_nLx2N = splitData[0].mvCost[0] +
> splitData[2].mvCost[0];
> -                    threshold_nRx2N = splitData[1].mvCost[0] +
> splitData[3].mvCost[0];
> -                }
> -                else
> -                {
> -                    threshold_2NxnU = (splitData[0].mvCost[0] +
> splitData[1].mvCost[0]
> +                    if (m_slice->m_sliceType == P_SLICE)
> +                    {
> +                        threshold_2NxN = splitData[0].mvCost[0] +
> splitData[1].mvCost[0];
> +                        threshold_Nx2N = splitData[0].mvCost[0] +
> splitData[2].mvCost[0];
> +                    }
> +                    else
> +                    {
> +                        threshold_2NxN = (splitData[0].mvCost[0] +
> splitData[1].mvCost[0]
>                                         + splitData[0].mvCost[1] +
> splitData[1].mvCost[1] + 1) >> 1;
> -                    threshold_2NxnD = (splitData[2].mvCost[0] +
> splitData[3].mvCost[0]
> -                                       + splitData[2].mvCost[1] +
> splitData[3].mvCost[1] + 1) >> 1;
> -
> -                    threshold_nLx2N = (splitData[0].mvCost[0] +
> splitData[2].mvCost[0]
> +                        threshold_Nx2N = (splitData[0].mvCost[0] +
> splitData[2].mvCost[0]
>                                         + splitData[0].mvCost[1] +
> splitData[2].mvCost[1] + 1) >> 1;
> -                    threshold_nRx2N = (splitData[1].mvCost[0] +
> splitData[3].mvCost[0]
> -                                       + splitData[1].mvCost[1] +
> splitData[3].mvCost[1] + 1) >> 1;
> -                }
> -
> -                bool bHor = false, bVer = false;
> -                if (md.bestMode->cu.m_partSize[0] == SIZE_2NxN)
> -                    bHor = true;
> -                else if (md.bestMode->cu.m_partSize[0] == SIZE_Nx2N)
> -                    bVer = true;
> -                else if (md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N &&
> !md.bestMode->cu.m_mergeFlag[0])
> -                {
> -                    bHor = true;
> -                    bVer = true;
> -                }
> -
> -                if (bHor)
> -                {
> -                    int try_2NxnD_first = threshold_2NxnD <
> threshold_2NxnU;
> -                    if (try_2NxnD_first && splitCost <
> md.bestMode->rdCost + threshold_2NxnD)
> -                    {
> -                        refMasks[0] = allSplitRefs;
>               /* 75% top */
> -                        refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* 25% bot */
> -                        md.pred[PRED_2NxnD].cu.initSubCU(parentCTU,
> cuGeom, qp);
> -                        checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom,
> SIZE_2NxnD, refMasks);
> -                        checkBestMode(md.pred[PRED_2NxnD], cuGeom.depth);
>                      }
>
> -                    if (splitCost < md.bestMode->rdCost + threshold_2NxnU)
> +                    int try_2NxN_first = threshold_2NxN < threshold_Nx2N;
> +                    if (try_2NxN_first && splitCost < md.bestMode->rdCost
> + threshold_2NxN)
>                      {
> -                        refMasks[0] = splitData[0].splitRefs |
> splitData[1].splitRefs; /* 25% top */
> -                        refMasks[1] = allSplitRefs;
>               /* 75% bot */
> -                        md.pred[PRED_2NxnU].cu.initSubCU(parentCTU,
> cuGeom, qp);
> -                        checkInter_rd5_6(md.pred[PRED_2NxnU], cuGeom,
> SIZE_2NxnU, refMasks);
> -                        checkBestMode(md.pred[PRED_2NxnU], cuGeom.depth);
> +                        refMasks[0] = splitData[0].splitRefs |
> splitData[1].splitRefs; /* top */
> +                        refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* bot */
> +                        md.pred[PRED_2NxN].cu.initSubCU(parentCTU,
> cuGeom, qp);
> +                        checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom,
> SIZE_2NxN, refMasks);
> +                        checkBestMode(md.pred[PRED_2NxN], cuGeom.depth);
>                      }
>
> -                    if (!try_2NxnD_first && splitCost <
> md.bestMode->rdCost + threshold_2NxnD)
> +                    if (splitCost < md.bestMode->rdCost + threshold_Nx2N)
>                      {
> -                        refMasks[0] = allSplitRefs;
>               /* 75% top */
> -                        refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* 25% bot */
> -                        md.pred[PRED_2NxnD].cu.initSubCU(parentCTU,
> cuGeom, qp);
> -                        checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom,
> SIZE_2NxnD, refMasks);
> -                        checkBestMode(md.pred[PRED_2NxnD], cuGeom.depth);
> +                        refMasks[0] = splitData[0].splitRefs |
> splitData[2].splitRefs; /* left */
> +                        refMasks[1] = splitData[1].splitRefs |
> splitData[3].splitRefs; /* right */
> +                        md.pred[PRED_Nx2N].cu.initSubCU(parentCTU,
> cuGeom, qp);
> +                        checkInter_rd5_6(md.pred[PRED_Nx2N], cuGeom,
> SIZE_Nx2N, refMasks);
> +                        checkBestMode(md.pred[PRED_Nx2N], cuGeom.depth);
> +                    }
> +
> +                    if (!try_2NxN_first && splitCost <
> md.bestMode->rdCost + threshold_2NxN)
> +                    {
> +                        refMasks[0] = splitData[0].splitRefs |
> splitData[1].splitRefs; /* top */
> +                        refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* bot */
> +                        md.pred[PRED_2NxN].cu.initSubCU(parentCTU,
> cuGeom, qp);
> +                        checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom,
> SIZE_2NxN, refMasks);
> +                        checkBestMode(md.pred[PRED_2NxN], cuGeom.depth);
>                      }
>                  }
>
> -                if (bVer)
> +                // Try AMP (SIZE_2NxnU, SIZE_2NxnD, SIZE_nLx2N,
> SIZE_nRx2N)
> +                if (m_slice->m_sps->maxAMPDepth > depth)
>                  {
> -                    int try_nRx2N_first = threshold_nRx2N <
> threshold_nLx2N;
> -                    if (try_nRx2N_first && splitCost <
> md.bestMode->rdCost + threshold_nRx2N)
> +                    uint64_t splitCost = splitData[0].sa8dCost +
> splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
> +                    uint32_t threshold_2NxnU, threshold_2NxnD,
> threshold_nLx2N, threshold_nRx2N;
> +
> +                    if (m_slice->m_sliceType == P_SLICE)
>                      {
> -                        refMasks[0] = allSplitRefs;
>               /* 75% left  */
> -                        refMasks[1] = splitData[1].splitRefs |
> splitData[3].splitRefs; /* 25% right */
> -                        md.pred[PRED_nRx2N].cu.initSubCU(parentCTU,
> cuGeom, qp);
> -                        checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom,
> SIZE_nRx2N, refMasks);
> -                        checkBestMode(md.pred[PRED_nRx2N], cuGeom.depth);
> +                        threshold_2NxnU = splitData[0].mvCost[0] +
> splitData[1].mvCost[0];
> +                        threshold_2NxnD = splitData[2].mvCost[0] +
> splitData[3].mvCost[0];
> +
> +                        threshold_nLx2N = splitData[0].mvCost[0] +
> splitData[2].mvCost[0];
> +                        threshold_nRx2N = splitData[1].mvCost[0] +
> splitData[3].mvCost[0];
> +                    }
> +                    else
> +                    {
> +                        threshold_2NxnU = (splitData[0].mvCost[0] +
> splitData[1].mvCost[0]
> +                                        + splitData[0].mvCost[1] +
> splitData[1].mvCost[1] + 1) >> 1;
> +                        threshold_2NxnD = (splitData[2].mvCost[0] +
> splitData[3].mvCost[0]
> +                                        + splitData[2].mvCost[1] +
> splitData[3].mvCost[1] + 1) >> 1;
> +
> +                        threshold_nLx2N = (splitData[0].mvCost[0] +
> splitData[2].mvCost[0]
> +                                        + splitData[0].mvCost[1] +
> splitData[2].mvCost[1] + 1) >> 1;
> +                        threshold_nRx2N = (splitData[1].mvCost[0] +
> splitData[3].mvCost[0]
> +                                        + splitData[1].mvCost[1] +
> splitData[3].mvCost[1] + 1) >> 1;
>                      }
>
> -                    if (splitCost < md.bestMode->rdCost + threshold_nLx2N)
> +                    bool bHor = false, bVer = false;
> +                    if (md.bestMode->cu.m_partSize[0] == SIZE_2NxN)
> +                        bHor = true;
> +                    else if (md.bestMode->cu.m_partSize[0] == SIZE_Nx2N)
> +                        bVer = true;
> +                    else if (md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N
> && !md.bestMode->cu.m_mergeFlag[0])
>                      {
> -                        refMasks[0] = splitData[0].splitRefs |
> splitData[2].splitRefs; /* 25% left  */
> -                        refMasks[1] = allSplitRefs;
>               /* 75% right */
> -                        md.pred[PRED_nLx2N].cu.initSubCU(parentCTU,
> cuGeom, qp);
> -                        checkInter_rd5_6(md.pred[PRED_nLx2N], cuGeom,
> SIZE_nLx2N, refMasks);
> -                        checkBestMode(md.pred[PRED_nLx2N], cuGeom.depth);
> +                        bHor = true;
> +                        bVer = true;
>                      }
>
> -                    if (!try_nRx2N_first && splitCost <
> md.bestMode->rdCost + threshold_nRx2N)
> +                    if (bHor)
>                      {
> -                        refMasks[0] = allSplitRefs;
>               /* 75% left  */
> -                        refMasks[1] = splitData[1].splitRefs |
> splitData[3].splitRefs; /* 25% right */
> -                        md.pred[PRED_nRx2N].cu.initSubCU(parentCTU,
> cuGeom, qp);
> -                        checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom,
> SIZE_nRx2N, refMasks);
> -                        checkBestMode(md.pred[PRED_nRx2N], cuGeom.depth);
> +                        int try_2NxnD_first = threshold_2NxnD <
> threshold_2NxnU;
> +                        if (try_2NxnD_first && splitCost <
> md.bestMode->rdCost + threshold_2NxnD)
> +                        {
> +                            refMasks[0] = allSplitRefs;
>                   /* 75% top */
> +                            refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* 25% bot */
> +                            md.pred[PRED_2NxnD].cu.initSubCU(parentCTU,
> cuGeom, qp);
> +                            checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom,
> SIZE_2NxnD, refMasks);
> +                            checkBestMode(md.pred[PRED_2NxnD],
> cuGeom.depth);
> +                        }
> +
> +                        if (splitCost < md.bestMode->rdCost +
> threshold_2NxnU)
> +                        {
> +                            refMasks[0] = splitData[0].splitRefs |
> splitData[1].splitRefs; /* 25% top */
> +                            refMasks[1] = allSplitRefs;
>                   /* 75% bot */
> +                            md.pred[PRED_2NxnU].cu.initSubCU(parentCTU,
> cuGeom, qp);
> +                            checkInter_rd5_6(md.pred[PRED_2NxnU], cuGeom,
> SIZE_2NxnU, refMasks);
> +                            checkBestMode(md.pred[PRED_2NxnU],
> cuGeom.depth);
> +                        }
> +
> +                        if (!try_2NxnD_first && splitCost <
> md.bestMode->rdCost + threshold_2NxnD)
> +                        {
> +                            refMasks[0] = allSplitRefs;
>                   /* 75% top */
> +                            refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* 25% bot */
> +                            md.pred[PRED_2NxnD].cu.initSubCU(parentCTU,
> cuGeom, qp);
> +                            checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom,
> SIZE_2NxnD, refMasks);
> +                            checkBestMode(md.pred[PRED_2NxnD],
> cuGeom.depth);
> +                        }
> +                    }
> +
> +                    if (bVer)
> +                    {
> +                        int try_nRx2N_first = threshold_nRx2N <
> threshold_nLx2N;
> +                        if (try_nRx2N_first && splitCost <
> md.bestMode->rdCost + threshold_nRx2N)
> +                        {
> +                            refMasks[0] = allSplitRefs;
>                   /* 75% left  */
> +                            refMasks[1] = splitData[1].splitRefs |
> splitData[3].splitRefs; /* 25% right */
> +                            md.pred[PRED_nRx2N].cu.initSubCU(parentCTU,
> cuGeom, qp);
> +                            checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom,
> SIZE_nRx2N, refMasks);
> +                            checkBestMode(md.pred[PRED_nRx2N],
> cuGeom.depth);
> +                        }
> +
> +                        if (splitCost < md.bestMode->rdCost +
> threshold_nLx2N)
> +                        {
> +                            refMasks[0] = splitData[0].splitRefs |
> splitData[2].splitRefs; /* 25% left  */
> +                            refMasks[1] = allSplitRefs;
>                   /* 75% right */
> +                            md.pred[PRED_nLx2N].cu.initSubCU(parentCTU,
> cuGeom, qp);
> +                            checkInter_rd5_6(md.pred[PRED_nLx2N], cuGeom,
> SIZE_nLx2N, refMasks);
> +                            checkBestMode(md.pred[PRED_nLx2N],
> cuGeom.depth);
> +                        }
> +
> +                        if (!try_nRx2N_first && splitCost <
> md.bestMode->rdCost + threshold_nRx2N)
> +                        {
> +                            refMasks[0] = allSplitRefs;
>                   /* 75% left  */
> +                            refMasks[1] = splitData[1].splitRefs |
> splitData[3].splitRefs; /* 25% right */
> +                            md.pred[PRED_nRx2N].cu.initSubCU(parentCTU,
> cuGeom, qp);
> +                            checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom,
> SIZE_nRx2N, refMasks);
> +                            checkBestMode(md.pred[PRED_nRx2N],
> cuGeom.depth);
> +                        }
>                      }
>                  }
>              }
> diff -r 2de6cb99313a -r 5bccf2596d8a source/encoder/analysis.h
> --- a/source/encoder/analysis.h Mon Mar 21 13:50:14 2016 +0530
> +++ b/source/encoder/analysis.h Thu Mar 24 16:36:55 2016 +0530
> @@ -122,6 +122,8 @@
>      int32_t*             m_reuseRef;
>      uint8_t*             m_reuseDepth;
>      uint8_t*             m_reuseModes;
> +    uint8_t*             m_reusePartSize;
> +    uint8_t*             m_reuseMergeFlag;
>
>      uint32_t m_splitRefIdx[4];
>      uint64_t* cacheCost;
> diff -r 2de6cb99313a -r 5bccf2596d8a source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp        Mon Mar 21 13:50:14 2016 +0530
> +++ b/source/encoder/encoder.cpp        Thu Mar 24 16:36:55 2016 +0530
> @@ -1918,6 +1918,8 @@
>          CHECKED_MALLOC_ZERO(interData->ref, int32_t,
> analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir);
>          CHECKED_MALLOC(interData->depth, uint8_t, analysis->numPartitions
> * analysis->numCUsInFrame);
>          CHECKED_MALLOC(interData->modes, uint8_t, analysis->numPartitions
> * analysis->numCUsInFrame);
> +        CHECKED_MALLOC(interData->partSize, uint8_t,
> analysis->numPartitions * analysis->numCUsInFrame);
> +        CHECKED_MALLOC(interData->mergeFlag, uint8_t,
> analysis->numPartitions * analysis->numCUsInFrame);
>          CHECKED_MALLOC_ZERO(interData->wt, WeightParam, 3 * numDir);
>          analysis->interData = interData;
>      }
> @@ -1943,6 +1945,8 @@
>          X265_FREE(((analysis_inter_data*)analysis->interData)->ref);
>          X265_FREE(((analysis_inter_data*)analysis->interData)->depth);
>          X265_FREE(((analysis_inter_data*)analysis->interData)->modes);
> +        X265_FREE(((analysis_inter_data*)analysis->interData)->mergeFlag);
> +        X265_FREE(((analysis_inter_data*)analysis->interData)->partSize);
>          X265_FREE(((analysis_inter_data*)analysis->interData)->wt);
>          X265_FREE(analysis->interData);
>      }
> @@ -2029,13 +2033,15 @@
>
>      else
>      {
> -        uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL;
> +        uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
> *partSize = NULL, *mergeFlag = NULL;
>
> -        tempBuf = X265_MALLOC(uint8_t, depthBytes * 2);
> -        X265_FREAD(tempBuf, sizeof(uint8_t), depthBytes * 2,
> m_analysisFile);
> +        tempBuf = X265_MALLOC(uint8_t, depthBytes * 4);
> +        X265_FREAD(tempBuf, sizeof(uint8_t), depthBytes * 4,
> m_analysisFile);
>
>          depthBuf = tempBuf;
> -        modeBuf  = tempBuf + depthBytes;
> +        modeBuf = tempBuf + depthBytes;
> +        partSize = modeBuf + depthBytes;
> +        mergeFlag = partSize + depthBytes;
>
>          size_t count = 0;
>          for (uint32_t d = 0; d < depthBytes; d++)
> @@ -2043,13 +2049,15 @@
>              int bytes = analysis->numPartitions >> (depthBuf[d] * 2);
>              memset(&((analysis_inter_data
> *)analysis->interData)->depth[count], depthBuf[d], bytes);
>              memset(&((analysis_inter_data
> *)analysis->interData)->modes[count], modeBuf[d], bytes);
> +            memset(&((analysis_inter_data
> *)analysis->interData)->partSize[count], partSize[d], bytes);
> +            memset(&((analysis_inter_data
> *)analysis->interData)->mergeFlag[count], mergeFlag[d], bytes);
>              count += bytes;
>          }
> -
> +
>          X265_FREE(tempBuf);
> -
> +
>          int numDir = analysis->sliceType == X265_TYPE_P ? 1 : 2;
> -        X265_FREAD(((analysis_inter_data *)analysis->interData)->ref,
> sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU *
> numDir, m_analysisFile);
> +        X265_FREAD(((analysis_inter_data *)analysis->interData)->ref,
> sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU *
> numDir, m_analysisFile);
>          uint32_t numPlanes = m_param->internalCsp == X265_CSP_I400 ? 1 :
> 3;
>          X265_FREAD(((analysis_inter_data *)analysis->interData)->wt,
> sizeof(WeightParam), numPlanes * numDir, m_analysisFile);
>          consumedBytes += frameRecordSize;
> @@ -2105,6 +2113,8 @@
>          {
>              uint8_t depth = 0;
>              uint8_t predMode = 0;
> +            uint8_t partSize = 0;
> +            uint8_t mergeFlag = 0;
>
>              CUData* ctu = curEncData.getPicCTU(cuAddr);
>              analysis_inter_data* interDataCTU =
> (analysis_inter_data*)analysis->interData;
> @@ -2115,8 +2125,17 @@
>                  interDataCTU->depth[depthBytes] = depth;
>
>                  predMode = ctu->m_predMode[absPartIdx];
> +                if (ctu->m_refIdx[1][absPartIdx] != -1)
> +                    predMode = 4; // used as indiacator if the block is
> coded as bidir
> +
>                  interDataCTU->modes[depthBytes] = predMode;
>
> +                partSize = ctu->m_partSize[absPartIdx];
> +                interDataCTU->partSize[depthBytes] = partSize;
> +
> +                mergeFlag = ctu->m_mergeFlag[absPartIdx];
> +                interDataCTU->mergeFlag[depthBytes] = mergeFlag;
> +
>                  absPartIdx += ctu->m_numPartitions >> (depth * 2);
>              }
>          }
> @@ -2130,9 +2149,9 @@
>      else
>      {
>          int numDir = (analysis->sliceType == X265_TYPE_P) ? 1 : 2;
> -        analysis->frameRecordSize += depthBytes * 2;
> -        analysis->frameRecordSize += sizeof(MV) * analysis->numCUsInFrame
> * X265_MAX_PRED_MODE_PER_CTU * numDir;
> -        analysis->frameRecordSize += sizeof(WeightParam) * 3 * numDir;
> +        analysis->frameRecordSize += depthBytes * 4;
> +        analysis->frameRecordSize += sizeof(int32_t)*
> analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir;
> +        analysis->frameRecordSize += sizeof(WeightParam)* 3 * numDir;
>      }
>      X265_FWRITE(&analysis->frameRecordSize, sizeof(uint32_t), 1,
> m_analysisFile);
>      X265_FWRITE(&depthBytes, sizeof(uint32_t), 1, m_analysisFile);
> @@ -2155,6 +2174,8 @@
>          int numDir = analysis->sliceType == X265_TYPE_P ? 1 : 2;
>          X265_FWRITE(((analysis_inter_data*)analysis->interData)->depth,
> sizeof(uint8_t), depthBytes, m_analysisFile);
>          X265_FWRITE(((analysis_inter_data*)analysis->interData)->modes,
> sizeof(uint8_t), depthBytes, m_analysisFile);
> +
> X265_FWRITE(((analysis_inter_data*)analysis->interData)->partSize,
> sizeof(uint8_t), depthBytes, m_analysisFile);
> +
> X265_FWRITE(((analysis_inter_data*)analysis->interData)->mergeFlag,
> sizeof(uint8_t), depthBytes, m_analysisFile);
>          X265_FWRITE(((analysis_inter_data*)analysis->interData)->ref,
> sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU *
> numDir, m_analysisFile);
>          uint32_t numPlanes = m_param->internalCsp == X265_CSP_I400 ? 1 :
> 3;
>          X265_FWRITE(((analysis_inter_data*)analysis->interData)->wt,
> sizeof(WeightParam), numPlanes * numDir, m_analysisFile);
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>



-- 
Deepthi Nandakumar
Engineering Manager, x265
Multicoreware, Inc
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20160408/baa69bd7/attachment-0001.html>


More information about the x265-devel mailing list