[x265] [PATCH] analysis: Dump the best MV statistics and re-use this for analysis load mode

Steve Borho steve at borho.org
Tue Nov 11 04:20:27 CET 2014


On 11/10, gopu at multicorewareinc.com wrote:
> # HG changeset patch
> # User Gopu Govindaswamy <gopu at multicorewareinc.com>
> # Date 1415611936 -19800
> #      Mon Nov 10 15:02:16 2014 +0530
> # Node ID 31b6ed10054e753331b65a5e08e512f2f5b22b2d
> # Parent  1e04e178a349ff3a27ed0207cca7bdd9f0db4ff8
> analysis: Dump the best MV statistics and re-use this for analysis load mode
> 
> this patch is to fix the bug in inter information sharing when using
> analysis=load|save mode, existing algorithm always dump and share the last part
> best MV for each prediction, but there is multiple part's each with its own
> prediction, the fix is to dump and share all part best MV's for each prediction
> 
> diff -r 1e04e178a349 -r 31b6ed10054e source/common/common.h
> --- a/source/common/common.h	Sun Nov 09 00:30:09 2014 -0600
> +++ b/source/common/common.h	Mon Nov 10 15:02:16 2014 +0530
> @@ -291,6 +291,7 @@
>  #define MAX_NUM_REF                 16 // max. number of entries in picture reference list
>  
>  #define REF_NOT_VALID               -1
> +#define MAX_NUM_PART                 4

in HEVC, inter cannot code NxN, so for the purpose of this data the max
count is 2. If would be preferrable for this to be an enum in Mode
rather than a general #define in common.h

>  #define AMVP_NUM_CANDS              2 // number of AMVP candidates
>  #define MRG_MAX_NUM_CANDS           5 // max number of final merge candidates
> diff -r 1e04e178a349 -r 31b6ed10054e source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp	Sun Nov 09 00:30:09 2014 -0600
> +++ b/source/encoder/analysis.cpp	Mon Nov 10 15:02:16 2014 +0530
> @@ -1407,12 +1407,16 @@
>  
>      if (m_param->analysisMode == X265_ANALYSIS_LOAD && m_interAnalysisData)
>      {
> -        for (int32_t i = 0; i < numPredDir; i++)
> +        for (uint32_t part = 0; part < interMode.cu.getNumPartInter(); part++)
>          {
> -            interMode.bestME[i].costZero = !!m_interAnalysisData->costZero[i];
> -            interMode.bestME[i].mv.x = m_interAnalysisData->mvx[i];
> -            interMode.bestME[i].mv.y = m_interAnalysisData->mvy[i];
> -            interMode.bestME[i].ref = m_interAnalysisData->ref[i];
> +            for (int32_t i = 0; i < numPredDir; i++)
> +            {
> +                interMode.bestME[part][i].costZero = !!m_interAnalysisData->costZero[i];
> +                interMode.bestME[part][i].mv.x = m_interAnalysisData->mvx[i];
> +                interMode.bestME[part][i].mv.y = m_interAnalysisData->mvy[i];
> +                interMode.bestME[part][i].ref = m_interAnalysisData->ref[i];
> +            }
> +            m_interAnalysisData++;
>
>
>          }
>      }
>      if (predInterSearch(interMode, cuGeom, false, false))
> @@ -1425,17 +1429,20 @@
>  
>          if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_interAnalysisData)
>          {
> -            for (int32_t i = 0; i < numPredDir; i++)
> +            for (uint32_t part = 0; part < interMode.cu.getNumPartInter(); part++)
>              {
> -                m_interAnalysisData->costZero[i] = interMode.bestME[i].costZero;
> -                m_interAnalysisData->mvx[i] = interMode.bestME[i].mv.x;
> -                m_interAnalysisData->mvy[i] = interMode.bestME[i].mv.y;
> -                m_interAnalysisData->ref[i] = interMode.bestME[i].ref;
> +                for (int32_t i = 0; i < numPredDir; i++)
> +                {
> +                    m_interAnalysisData->costZero[i] = interMode.bestME[part][i].costZero;
> +                    m_interAnalysisData->mvx[i] = interMode.bestME[part][i].mv.x;
> +                    m_interAnalysisData->mvy[i] = interMode.bestME[part][i].mv.y;
> +                    m_interAnalysisData->ref[i] = interMode.bestME[part][i].ref;
> +                }
> +                m_interAnalysisData->zOrder = cuGeom.encodeIdx;
> +                m_interAnalysisData->depth  = cuGeom.depth;
> +                m_interAnalysisData++;
>              }
> -            m_interAnalysisData->zOrder = cuGeom.encodeIdx;
> -            m_interAnalysisData->depth  = cuGeom.depth;
>          }
> -        m_interAnalysisData++;
>      }
>      else
>      {
> @@ -1453,12 +1460,16 @@
>  
>      if (m_param->analysisMode == X265_ANALYSIS_LOAD && m_interAnalysisData)
>      {
> -        for (int32_t i = 0; i < numPredDir; i++)
> +        for (uint32_t part = 0; part < interMode.cu.getNumPartInter(); part++)
>          {
> -            interMode.bestME[i].costZero = !!m_interAnalysisData->costZero[i];
> -            interMode.bestME[i].mv.x = m_interAnalysisData->mvx[i];
> -            interMode.bestME[i].mv.y = m_interAnalysisData->mvy[i];
> -            interMode.bestME[i].ref = m_interAnalysisData->ref[i];
> +            for (int32_t i = 0; i < numPredDir; i++)
> +            {
> +                interMode.bestME[part][i].costZero = !!m_interAnalysisData->costZero[i];
> +                interMode.bestME[part][i].mv.x = m_interAnalysisData->mvx[i];
> +                interMode.bestME[part][i].mv.y = m_interAnalysisData->mvy[i];
> +                interMode.bestME[part][i].ref = m_interAnalysisData->ref[i];
> +            }
> +            m_interAnalysisData++;
>          }
>      }
>      if (predInterSearch(interMode, cuGeom, bMergeOnly, true))
> @@ -1467,17 +1478,20 @@
>          encodeResAndCalcRdInterCU(interMode, cuGeom);
>          if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_interAnalysisData)
>          {
> -            for (int32_t i = 0; i < numPredDir; i++)
> +            for (uint32_t part = 0; part < interMode.cu.getNumPartInter(); part++)
>              {
> -                m_interAnalysisData->costZero[i] = interMode.bestME[i].costZero;
> -                m_interAnalysisData->mvx[i] = interMode.bestME[i].mv.x;
> -                m_interAnalysisData->mvy[i] = interMode.bestME[i].mv.y;
> -                m_interAnalysisData->ref[i] = interMode.bestME[i].ref;
> +                for (int32_t i = 0; i < numPredDir; i++)
> +                {
> +                    m_interAnalysisData->costZero[i] = interMode.bestME[part][i].costZero;
> +                    m_interAnalysisData->mvx[i] = interMode.bestME[part][i].mv.x;
> +                    m_interAnalysisData->mvy[i] = interMode.bestME[part][i].mv.y;
> +                    m_interAnalysisData->ref[i] = interMode.bestME[part][i].ref;
> +                }
> +                m_interAnalysisData->zOrder = cuGeom.encodeIdx;
> +                m_interAnalysisData->depth  = cuGeom.depth;
> +                m_interAnalysisData++;
>              }
> -            m_interAnalysisData->zOrder = cuGeom.encodeIdx;
> -            m_interAnalysisData->depth  = cuGeom.depth;
>          }
> -        m_interAnalysisData++;
>      }
>      else
>      {
> diff -r 1e04e178a349 -r 31b6ed10054e source/encoder/search.cpp
> --- a/source/encoder/search.cpp	Sun Nov 09 00:30:09 2014 -0600
> +++ b/source/encoder/search.cpp	Mon Nov 10 15:02:16 2014 +0530
> @@ -1907,15 +1907,15 @@
>  
>      /* tie goes to the smallest ref ID, just like --no-pme */
>      ScopedLock _lock(master.m_outputLock);
> -    if (cost < interMode.bestME[list].cost ||
> -       (cost == interMode.bestME[list].cost && ref < interMode.bestME[list].ref))
> +    if (cost < interMode.bestME[part][list].cost ||
> +       (cost == interMode.bestME[part][list].cost && ref < interMode.bestME[part][list].ref))
>      {
> -        interMode.bestME[list].mv = outmv;
> -        interMode.bestME[list].mvp = mvp;
> -        interMode.bestME[list].mvpIdx = mvpIdx;
> -        interMode.bestME[list].ref = ref;
> -        interMode.bestME[list].cost = cost;
> -        interMode.bestME[list].bits = bits;
> +        interMode.bestME[part][list].mv = outmv;
> +        interMode.bestME[part][list].mvp = mvp;
> +        interMode.bestME[part][list].mvpIdx = mvpIdx;
> +        interMode.bestME[part][list].ref = ref;
> +        interMode.bestME[part][list].cost = cost;
> +        interMode.bestME[part][list].bits = bits;
>      }
>  }
>  
> @@ -1988,17 +1988,17 @@
>          uint32_t bidirCost = MAX_UINT;
>          int bidirBits = 0;

this would be a lot easier to read if you just declared:

           MotionData* bestME = interMode.bestME[puIdx];

>  
> -        interMode.bestME[0].cost = MAX_UINT;
> -        interMode.bestME[1].cost = MAX_UINT;
> +        interMode.bestME[puIdx][0].cost = MAX_UINT;
> +        interMode.bestME[puIdx][1].cost = MAX_UINT;
>  
>          getBlkBits((PartSize)cu.m_partSize[0], slice->isInterP(), puIdx, lastMode, m_listSelBits);
>  
>          /* Uni-directional prediction */
> -        if (m_param->analysisMode == X265_ANALYSIS_LOAD && interMode.bestME[0].ref >= 0)
> +        if (m_param->analysisMode == X265_ANALYSIS_LOAD && interMode.bestME[puIdx][0].ref >= 0)
>          {
>              for (int l = 0; l < numPredDir; l++)
>              {
> -                int ref = interMode.bestME[l].ref;
> +                int ref = interMode.bestME[puIdx][l].ref;
>                  uint32_t bits = m_listSelBits[l] + MVP_IDX_BITS;
>                  bits += getTUBits(ref, numRefIdx[l]);
>  
> @@ -2030,13 +2030,13 @@
>  
>                  MV mvmin, mvmax, outmv, mvp = interMode.amvpCand[l][ref][mvpIdx];
>                  m_me.setMVP(mvp);
> -                MV bmv(interMode.bestME[l].mv.x, interMode.bestME[l].mv.y);
> +                MV bmv(interMode.bestME[puIdx][l].mv.x, interMode.bestME[puIdx][l].mv.y);
>  
>                  int satdCost;
> -                if (interMode.bestME[l].costZero)
> +                if (interMode.bestME[puIdx][l].costZero)
>                      satdCost = m_me.mvcost(bmv);
>                  else
> -                    satdCost = interMode.bestME[l].cost;
> +                    satdCost = interMode.bestME[puIdx][l].cost;
>  
>                  /* Get total cost of partition, but only include MV bit cost once */
>                  bits += m_me.bitcost(bmv);
> @@ -2045,14 +2045,14 @@
>                  /* Refine MVP selection, updates: mvp, mvpIdx, bits, cost */
>                  checkBestMVP(interMode.amvpCand[l][ref], outmv, mvp, mvpIdx, bits, cost);
>  
> -                if (cost < interMode.bestME[l].cost)
> +                if (cost < interMode.bestME[puIdx][l].cost)
>                  {
> -                    interMode.bestME[l].mv = outmv;
> -                    interMode.bestME[l].mvp = mvp;
> -                    interMode.bestME[l].mvpIdx = mvpIdx;
> -                    interMode.bestME[l].ref = ref;
> -                    interMode.bestME[l].cost = cost;
> -                    interMode.bestME[l].bits = bits;
> +                    interMode.bestME[puIdx][l].mv = outmv;
> +                    interMode.bestME[puIdx][l].mvp = mvp;
> +                    interMode.bestME[puIdx][l].mvpIdx = mvpIdx;
> +                    interMode.bestME[puIdx][l].ref = ref;
> +                    interMode.bestME[puIdx][l].cost = cost;
> +                    interMode.bestME[puIdx][l].bits = bits;
>                  }
>              }
>          }
> @@ -2148,31 +2148,31 @@
>                      /* Refine MVP selection, updates: mvp, mvpIdx, bits, cost */
>                      checkBestMVP(interMode.amvpCand[l][ref], outmv, mvp, mvpIdx, bits, cost);
>  
> -                    if (cost < interMode.bestME[l].cost)
> +                    if (cost < interMode.bestME[puIdx][l].cost)
>                      {
> -                        interMode.bestME[l].mv = outmv;
> -                        interMode.bestME[l].mvp = mvp;
> -                        interMode.bestME[l].mvpIdx = mvpIdx;
> -                        interMode.bestME[l].ref = ref;
> -                        interMode.bestME[l].cost = cost;
> -                        interMode.bestME[l].bits = bits;
> +                        interMode.bestME[puIdx][l].mv = outmv;
> +                        interMode.bestME[puIdx][l].mvp = mvp;
> +                        interMode.bestME[puIdx][l].mvpIdx = mvpIdx;
> +                        interMode.bestME[puIdx][l].ref = ref;
> +                        interMode.bestME[puIdx][l].cost = cost;
> +                        interMode.bestME[puIdx][l].bits = bits;
>                      }
>                  }
>              }
>          }
>  
>          /* Bi-directional prediction */
> -        if (slice->isInterB() && !cu.isBipredRestriction() && interMode.bestME[0].cost != MAX_UINT && interMode.bestME[1].cost != MAX_UINT)
> +        if (slice->isInterB() && !cu.isBipredRestriction() && interMode.bestME[puIdx][0].cost != MAX_UINT && interMode.bestME[puIdx][1].cost != MAX_UINT)
>          {
> -            bidir[0] = interMode.bestME[0];
> -            bidir[1] = interMode.bestME[1];
> +            bidir[0] = interMode.bestME[puIdx][0];
> +            bidir[1] = interMode.bestME[puIdx][1];
>  
>              /* Generate reference subpels */
> -            PicYuv* refPic0  = slice->m_refPicList[0][interMode.bestME[0].ref]->m_reconPic;
> -            PicYuv* refPic1  = slice->m_refPicList[1][interMode.bestME[1].ref]->m_reconPic;
> +            PicYuv* refPic0  = slice->m_refPicList[0][interMode.bestME[puIdx][0].ref]->m_reconPic;
> +            PicYuv* refPic1  = slice->m_refPicList[1][interMode.bestME[puIdx][1].ref]->m_reconPic;
>              Yuv*    bidirYuv = m_rqt[cuGeom.depth].bidirPredYuv;
> -            predInterLumaPixel(bidirYuv[0], *refPic0, interMode.bestME[0].mv);
> -            predInterLumaPixel(bidirYuv[1], *refPic1, interMode.bestME[1].mv);
> +            predInterLumaPixel(bidirYuv[0], *refPic0, interMode.bestME[puIdx][0].mv);
> +            predInterLumaPixel(bidirYuv[1], *refPic1, interMode.bestME[puIdx][1].mv);
>  
>              pixel *pred0 = bidirYuv[0].getLumaAddr(m_puAbsPartIdx);
>              pixel *pred1 = bidirYuv[1].getLumaAddr(m_puAbsPartIdx);
> @@ -2181,10 +2181,10 @@
>              primitives.pixelavg_pp[partEnum](tmpPredYuv.m_buf[0], tmpPredYuv.m_size, pred0, bidirYuv[0].m_size, pred1, bidirYuv[1].m_size, 32);
>              int satdCost = m_me.bufSATD(tmpPredYuv.m_buf[0], tmpPredYuv.m_size);
>  
> -            bidirBits = interMode.bestME[0].bits + interMode.bestME[1].bits + m_listSelBits[2] - (m_listSelBits[0] + m_listSelBits[1]);
> +            bidirBits = interMode.bestME[puIdx][0].bits + interMode.bestME[puIdx][1].bits + m_listSelBits[2] - (m_listSelBits[0] + m_listSelBits[1]);
>              bidirCost = satdCost + m_rdCost.getCost(bidirBits);
>  
> -            bool bTryZero = interMode.bestME[0].mv.notZero() || interMode.bestME[1].mv.notZero();
> +            bool bTryZero = interMode.bestME[puIdx][0].mv.notZero() || interMode.bestME[puIdx][1].mv.notZero();
>              if (bTryZero)
>              {
>                  /* Do not try zero MV if unidir motion predictors are beyond
> @@ -2196,32 +2196,32 @@
>                  mvmin <<= 2;
>                  mvmax <<= 2;
>  
> -                bTryZero &= interMode.bestME[0].mvp.checkRange(mvmin, mvmax);
> -                bTryZero &= interMode.bestME[1].mvp.checkRange(mvmin, mvmax);
> +                bTryZero &= interMode.bestME[puIdx][0].mvp.checkRange(mvmin, mvmax);
> +                bTryZero &= interMode.bestME[puIdx][1].mvp.checkRange(mvmin, mvmax);
>              }
>              if (bTryZero)
>              {
>                  /* coincident blocks of the two reference pictures */
> -                pixel *ref0 = m_slice->m_mref[0][interMode.bestME[0].ref].getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx + m_puAbsPartIdx);
> -                pixel *ref1 = m_slice->m_mref[1][interMode.bestME[1].ref].getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx + m_puAbsPartIdx);
> +                pixel *ref0 = m_slice->m_mref[0][interMode.bestME[puIdx][0].ref].getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx + m_puAbsPartIdx);
> +                pixel *ref1 = m_slice->m_mref[1][interMode.bestME[puIdx][1].ref].getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx + m_puAbsPartIdx);
>                  intptr_t refStride = slice->m_mref[0][0].lumaStride;
>  
>                  primitives.pixelavg_pp[partEnum](tmpPredYuv.m_buf[0], tmpPredYuv.m_size, ref0, refStride, ref1, refStride, 32);
>                  satdCost = m_me.bufSATD(tmpPredYuv.m_buf[0], tmpPredYuv.m_size);
>  
> -                MV mvp0 = interMode.bestME[0].mvp;
> -                int mvpIdx0 = interMode.bestME[0].mvpIdx;
> -                uint32_t bits0 = interMode.bestME[0].bits - m_me.bitcost(interMode.bestME[0].mv, mvp0) + m_me.bitcost(mvzero, mvp0);
> -
> -                MV mvp1 = interMode.bestME[1].mvp;
> -                int mvpIdx1 = interMode.bestME[1].mvpIdx;
> -                uint32_t bits1 = interMode.bestME[1].bits - m_me.bitcost(interMode.bestME[1].mv, mvp1) + m_me.bitcost(mvzero, mvp1);
> +                MV mvp0 = interMode.bestME[puIdx][0].mvp;
> +                int mvpIdx0 = interMode.bestME[puIdx][0].mvpIdx;
> +                uint32_t bits0 = interMode.bestME[puIdx][0].bits - m_me.bitcost(interMode.bestME[puIdx][0].mv, mvp0) + m_me.bitcost(mvzero, mvp0);
> +
> +                MV mvp1 = interMode.bestME[puIdx][1].mvp;
> +                int mvpIdx1 = interMode.bestME[puIdx][1].mvpIdx;
> +                uint32_t bits1 = interMode.bestME[puIdx][1].bits - m_me.bitcost(interMode.bestME[puIdx][1].mv, mvp1) + m_me.bitcost(mvzero, mvp1);
>  
>                  uint32_t cost = satdCost + m_rdCost.getCost(bits0) + m_rdCost.getCost(bits1);
>  
>                  /* refine MVP selection for zero mv, updates: mvp, mvpidx, bits, cost */
> -                checkBestMVP(interMode.amvpCand[0][interMode.bestME[0].ref], mvzero, mvp0, mvpIdx0, bits0, cost);
> -                checkBestMVP(interMode.amvpCand[1][interMode.bestME[1].ref], mvzero, mvp1, mvpIdx1, bits1, cost);
> +                checkBestMVP(interMode.amvpCand[0][interMode.bestME[puIdx][0].ref], mvzero, mvp0, mvpIdx0, bits0, cost);
> +                checkBestMVP(interMode.amvpCand[1][interMode.bestME[puIdx][1].ref], mvzero, mvp1, mvpIdx1, bits1, cost);
>  
>                  if (cost < bidirCost)
>                  {
> @@ -2243,7 +2243,7 @@
>          }
>  
>          /* select best option and store into CU */
> -        if (mrgCost < bidirCost && mrgCost < interMode.bestME[0].cost && mrgCost < interMode.bestME[1].cost)
> +        if (mrgCost < bidirCost && mrgCost < interMode.bestME[puIdx][0].cost && mrgCost < interMode.bestME[puIdx][1].cost)
>          {
>              cu.m_mergeFlag[m_puAbsPartIdx] = true;
>              cu.m_mvpIdx[0][m_puAbsPartIdx] = merge.index; // merge candidate ID is stored in L0 MVP idx
> @@ -2255,39 +2255,39 @@
>  
>              totalmebits += merge.bits;
>          }
> -        else if (bidirCost < interMode.bestME[0].cost && bidirCost < interMode.bestME[1].cost)
> +        else if (bidirCost < interMode.bestME[puIdx][0].cost && bidirCost < interMode.bestME[puIdx][1].cost)
>          {
>              lastMode = 2;
>  
>              cu.m_mergeFlag[m_puAbsPartIdx] = false;
>              cu.setPUInterDir(3, m_puAbsPartIdx, puIdx);
>              cu.setPUMv(0, bidir[0].mv, m_puAbsPartIdx, puIdx);
> -            cu.setPURefIdx(0, interMode.bestME[0].ref, m_puAbsPartIdx, puIdx);
> +            cu.setPURefIdx(0, interMode.bestME[puIdx][0].ref, m_puAbsPartIdx, puIdx);
>              cu.m_mvd[0][m_puAbsPartIdx] = bidir[0].mv - bidir[0].mvp;
>              cu.m_mvpIdx[0][m_puAbsPartIdx] = bidir[0].mvpIdx;
>  
>              cu.setPUMv(1, bidir[1].mv, m_puAbsPartIdx, puIdx);
> -            cu.setPURefIdx(1, interMode.bestME[1].ref, m_puAbsPartIdx, puIdx);
> +            cu.setPURefIdx(1, interMode.bestME[puIdx][1].ref, m_puAbsPartIdx, puIdx);
>              cu.m_mvd[1][m_puAbsPartIdx] = bidir[1].mv - bidir[1].mvp;
>              cu.m_mvpIdx[1][m_puAbsPartIdx] = bidir[1].mvpIdx;
>  
>              totalmebits += bidirBits;
>          }
> -        else if (interMode.bestME[0].cost <= interMode.bestME[1].cost)
> +        else if (interMode.bestME[puIdx][0].cost <= interMode.bestME[puIdx][1].cost)
>          {
>              lastMode = 0;
>  
>              cu.m_mergeFlag[m_puAbsPartIdx] = false;
>              cu.setPUInterDir(1, m_puAbsPartIdx, puIdx);
> -            cu.setPUMv(0, interMode.bestME[0].mv, m_puAbsPartIdx, puIdx);
> -            cu.setPURefIdx(0, interMode.bestME[0].ref, m_puAbsPartIdx, puIdx);
> -            cu.m_mvd[0][m_puAbsPartIdx] = interMode.bestME[0].mv - interMode.bestME[0].mvp;
> -            cu.m_mvpIdx[0][m_puAbsPartIdx] = interMode.bestME[0].mvpIdx;
> +            cu.setPUMv(0, interMode.bestME[puIdx][0].mv, m_puAbsPartIdx, puIdx);
> +            cu.setPURefIdx(0, interMode.bestME[puIdx][0].ref, m_puAbsPartIdx, puIdx);
> +            cu.m_mvd[0][m_puAbsPartIdx] = interMode.bestME[puIdx][0].mv - interMode.bestME[puIdx][0].mvp;
> +            cu.m_mvpIdx[0][m_puAbsPartIdx] = interMode.bestME[puIdx][0].mvpIdx;
>  
>              cu.setPURefIdx(1, REF_NOT_VALID, m_puAbsPartIdx, puIdx);
>              cu.setPUMv(1, mvzero, m_puAbsPartIdx, puIdx);
>  
> -            totalmebits += interMode.bestME[0].bits;
> +            totalmebits += interMode.bestME[puIdx][0].bits;
>          }
>          else
>          {
> @@ -2295,15 +2295,15 @@
>  
>              cu.m_mergeFlag[m_puAbsPartIdx] = false;
>              cu.setPUInterDir(2, m_puAbsPartIdx, puIdx);
> -            cu.setPUMv(1, interMode.bestME[1].mv, m_puAbsPartIdx, puIdx);
> -            cu.setPURefIdx(1, interMode.bestME[1].ref, m_puAbsPartIdx, puIdx);
> -            cu.m_mvd[1][m_puAbsPartIdx] = interMode.bestME[1].mv - interMode.bestME[1].mvp;
> -            cu.m_mvpIdx[1][m_puAbsPartIdx] = interMode.bestME[1].mvpIdx;
> +            cu.setPUMv(1, interMode.bestME[puIdx][1].mv, m_puAbsPartIdx, puIdx);
> +            cu.setPURefIdx(1, interMode.bestME[puIdx][1].ref, m_puAbsPartIdx, puIdx);
> +            cu.m_mvd[1][m_puAbsPartIdx] = interMode.bestME[puIdx][1].mv - interMode.bestME[puIdx][1].mvp;
> +            cu.m_mvpIdx[1][m_puAbsPartIdx] = interMode.bestME[puIdx][1].mvpIdx;
>  
>              cu.setPURefIdx(0, REF_NOT_VALID, m_puAbsPartIdx, puIdx);
>              cu.setPUMv(0, mvzero, m_puAbsPartIdx, puIdx);
>  
> -            totalmebits += interMode.bestME[1].bits;
> +            totalmebits += interMode.bestME[puIdx][1].bits;
>          }
>  
>          prepMotionCompensation(cu, cuGeom, puIdx);
> diff -r 1e04e178a349 -r 31b6ed10054e source/encoder/search.h
> --- a/source/encoder/search.h	Sun Nov 09 00:30:09 2014 -0600
> +++ b/source/encoder/search.h	Mon Nov 10 15:02:16 2014 +0530
> @@ -84,7 +84,7 @@
>      Yuv        reconYuv;
>      Entropy    contexts;
>  
> -    MotionData bestME[2];
> +    MotionData bestME[MAX_NUM_PART][2];

       enum { MAX_INTER_PARTS = 2 };
       MotionData bestME[MAX_INTER_PARTS][2];

>      MV         amvpCand[2][MAX_NUM_REF][AMVP_NUM_CANDS];
>  
>      uint64_t   rdCost;     // sum of partition (psy) RD costs          (sse(fenc, recon) + lambda2 * bits)

Don't changes need to be made to the allocation of the inter data
buffers or where the pointers are incremented?

-- 
Steve Borho


More information about the x265-devel mailing list