[x265] [PATCH] analysis: dump and reuse the bestmergeCand for skip and merge mode

Tue Feb 3 06:17:09 CET 2015

On Mon, Feb 2, 2015 at 10:23 AM, <gopu at multicorewareinc.com> wrote:

> # HG changeset patch
> # User Gopu Govindaswamy <gopu at multicorewareinc.com>
> # Date 1422852790 -19800
> #      Mon Feb 02 10:23:10 2015 +0530
> # Node ID db56dc779466c5b54a55b5dadbcd04e882011729
> # Parent  6c5156500d6d4fa655acaf7a8b77f2ba3a0f794b
> analysis: dump and reuse the bestmergeCand for skip and merge mode
>
> diff -r 6c5156500d6d -r db56dc779466 source/common/common.h
> --- a/source/common/common.h    Fri Jan 30 11:54:22 2015 -0600
> +++ b/source/common/common.h    Mon Feb 02 10:23:10 2015 +0530
> @@ -376,6 +376,7 @@
>      int32_t*    ref;
>      uint8_t*    depth;
>      uint8_t*    modes;
> +    uint32_t*   bestMergeCand;
>  };
>
>  /* Stores intra analysis data for a single frame. This struct needs
> better packing */
> diff -r 6c5156500d6d -r db56dc779466 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp       Fri Jan 30 11:54:22 2015 -0600
> +++ b/source/encoder/analysis.cpp       Mon Feb 02 10:23:10 2015 +0530
> @@ -140,6 +140,7 @@
>              int numPredDir = m_slice->isInterP() ? 1 : 2;
>              m_reuseInterDataCTU = (analysis_inter_data
> *)m_frame->m_analysisData.interData;
>              reuseRef = &m_reuseInterDataCTU->ref[ctu.m_cuAddr *
> X265_MAX_PRED_MODE_PER_CTU * numPredDir];
> +            reuseBestMergeCand =
> &m_reuseInterDataCTU->bestMergeCand[ctu.m_cuAddr * CUGeom::MAX_GEOMS];
>          }
>      }
>
> @@ -1066,21 +1067,6 @@
>              md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom);
>              checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP],
> md.pred[PRED_MERGE], cuGeom);
>
> -            if ((m_slice->m_sliceType != B_SLICE ||
> m_param->bIntraInBFrames) &&
> -                (!m_param->bEnableCbfFastMode ||
> md.bestMode->cu.getQtRootCbf(0)))
> -            {
> -                md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
> -                checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL);
> -                checkBestMode(md.pred[PRED_INTRA], depth);
> -
> -                if (depth == g_maxCUDepth && cuGeom.log2CUSize >
> m_slice->m_sps->quadtreeTULog2MinSize)
> -                {
> -                    md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU,
> cuGeom);
> -                    checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN,
> &reuseModes[zOrder]);
> -                    checkBestMode(md.pred[PRED_INTRA_NxN], depth);
> -                }
> -            }
> -
>              if (m_bTryLossless)
>                  tryLossless(cuGeom);
>
> @@ -1388,29 +1374,10 @@
>      bool foundCbf0Merge = false;
>      bool triedPZero = false, triedBZero = false;
>      bestPred->rdCost = MAX_INT64;
> -    for (uint32_t i = 0; i < maxNumMergeCand; i++)
> +
> +    if (m_param->analysisMode == X265_ANALYSIS_LOAD)
>      {
> -        if (m_bFrameParallel &&
> -            (mvFieldNeighbours[i][0].mv.y >= (m_param->searchRange + 1) *
> 4 ||
> -             mvFieldNeighbours[i][1].mv.y >= (m_param->searchRange + 1) *
> 4))
> -            continue;
> -
> -        /* the merge candidate list is packed with MV(0,0) ref 0 when it
> is not full */
> -        if (interDirNeighbours[i] == 1 &&
> !mvFieldNeighbours[i][0].mv.word && !mvFieldNeighbours[i][0].refIdx)
> -        {
> -            if (triedPZero)
> -                continue;
> -            triedPZero = true;
> -        }
> -        else if (interDirNeighbours[i] == 3 &&
> -                 !mvFieldNeighbours[i][0].mv.word &&
> !mvFieldNeighbours[i][0].refIdx &&
> -                 !mvFieldNeighbours[i][1].mv.word &&
> !mvFieldNeighbours[i][1].refIdx)
> -        {
> -            if (triedBZero)
> -                continue;
> -            triedBZero = true;
> -        }
> -
> +        uint32_t i = *reuseBestMergeCand;
>          tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i;    /* merge candidate
> ID is stored in L0 MVP idx */
>          tempPred->cu.m_interDir[0] = interDirNeighbours[i];
>          tempPred->cu.m_mv[0][0] = mvFieldNeighbours[i][0].mv;
> @@ -1424,24 +1391,20 @@
>
>          uint8_t hasCbf = true;
>          bool swapped = false;
> -        if (!foundCbf0Merge)
> +
> +        /* if the best prediction has CBF (not a skip) then try merge
> with residual */
> +        encodeResAndCalcRdInterCU(*tempPred, cuGeom);
> +        hasCbf = tempPred->cu.getQtRootCbf(0);
> +        foundCbf0Merge = !hasCbf;
> +
> +        if (tempPred->rdCost < bestPred->rdCost)
>          {
> -            /* if the best prediction has CBF (not a skip) then try merge
> with residual */
> -
> -            encodeResAndCalcRdInterCU(*tempPred, cuGeom);
> -            hasCbf = tempPred->cu.getQtRootCbf(0);
> -            foundCbf0Merge = !hasCbf;
> -
> -            if (tempPred->rdCost < bestPred->rdCost)
> -            {
> -                std::swap(tempPred, bestPred);
> -                swapped = true;
> -            }
> +            std::swap(tempPred, bestPred);
> +            swapped = true;
>          }
>          if (!m_param->bLossless && hasCbf)
>          {
>              /* try merge without residual (skip), if not lossless coding
> */
> -
>              if (swapped)
>              {
>                  tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i;
> @@ -1453,12 +1416,88 @@
>                  tempPred->cu.setPredModeSubParts(MODE_INTER);
>                  tempPred->predYuv.copyFromYuv(bestPred->predYuv);
>              }
> -
> +
>              encodeResAndCalcRdSkipCU(*tempPred);
>
>              if (tempPred->rdCost < bestPred->rdCost)
>                  std::swap(tempPred, bestPred);
>          }
> +        reuseBestMergeCand++;
> +    }
>

This is way too much code duplication. Lets fold this in by changing
maxNumMergeCand based on analysis-mode.

> +    else
> +    {
> +        for (uint32_t i = 0; i < maxNumMergeCand; i++)
> +        {
> +            if (m_bFrameParallel &&
> +                (mvFieldNeighbours[i][0].mv.y >= (m_param->searchRange +
> 1) * 4 ||
> +                mvFieldNeighbours[i][1].mv.y >= (m_param->searchRange +
> 1) * 4))
> +                continue;
> +
> +            /* the merge candidate list is packed with MV(0,0) ref 0 when
> it is not full */
> +            if (interDirNeighbours[i] == 1 &&
> !mvFieldNeighbours[i][0].mv.word && !mvFieldNeighbours[i][0].refIdx)
> +            {
> +                if (triedPZero)
> +                    continue;
> +                triedPZero = true;
> +            }
> +            else if (interDirNeighbours[i] == 3 &&
> +                !mvFieldNeighbours[i][0].mv.word &&
> !mvFieldNeighbours[i][0].refIdx &&
> +                !mvFieldNeighbours[i][1].mv.word &&
> !mvFieldNeighbours[i][1].refIdx)
> +            {
> +                if (triedBZero)
> +                    continue;
> +                triedBZero = true;
> +            }
> +
> +            tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i;    /* merge
> candidate ID is stored in L0 MVP idx */
> +            tempPred->cu.m_interDir[0] = interDirNeighbours[i];
> +            tempPred->cu.m_mv[0][0] = mvFieldNeighbours[i][0].mv;
> +            tempPred->cu.m_refIdx[0][0] =
> (int8_t)mvFieldNeighbours[i][0].refIdx;
> +            tempPred->cu.m_mv[1][0] = mvFieldNeighbours[i][1].mv;
> +            tempPred->cu.m_refIdx[1][0] =
> (int8_t)mvFieldNeighbours[i][1].refIdx;
> +            tempPred->cu.setPredModeSubParts(MODE_INTER); /* must be
> cleared between encode iterations */
> +
> +            prepMotionCompensation(tempPred->cu, cuGeom, 0);
> +            motionCompensation(tempPred->predYuv, true, true);
> +
> +            uint8_t hasCbf = true;
> +            bool swapped = false;
> +            if (!foundCbf0Merge)
> +            {
> +                /* if the best prediction has CBF (not a skip) then try
> merge with residual */
> +
> +                encodeResAndCalcRdInterCU(*tempPred, cuGeom);
> +                hasCbf = tempPred->cu.getQtRootCbf(0);
> +                foundCbf0Merge = !hasCbf;
> +
> +                if (tempPred->rdCost < bestPred->rdCost)
> +                {
> +                    std::swap(tempPred, bestPred);
> +                    swapped = true;
> +                }
> +            }
> +            if (!m_param->bLossless && hasCbf)
> +            {
> +                /* try merge without residual (skip), if not lossless
> coding */
> +
> +                if (swapped)
> +                {
> +                    tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i;
> +                    tempPred->cu.m_interDir[0] = interDirNeighbours[i];
> +                    tempPred->cu.m_mv[0][0] = mvFieldNeighbours[i][0].mv;
> +                    tempPred->cu.m_refIdx[0][0] =
> (int8_t)mvFieldNeighbours[i][0].refIdx;
> +                    tempPred->cu.m_mv[1][0] = mvFieldNeighbours[i][1].mv;
> +                    tempPred->cu.m_refIdx[1][0] =
> (int8_t)mvFieldNeighbours[i][1].refIdx;
> +                    tempPred->cu.setPredModeSubParts(MODE_INTER);
> +                    tempPred->predYuv.copyFromYuv(bestPred->predYuv);
> +                }
> +
> +                encodeResAndCalcRdSkipCU(*tempPred);
> +
> +                if (tempPred->rdCost < bestPred->rdCost)
> +                    std::swap(tempPred, bestPred);
> +            }
> +        }
>      }
>
>      if (bestPred->rdCost < MAX_INT64)
> @@ -1473,6 +1512,12 @@
>          bestPred->cu.setPUMv(1, mvFieldNeighbours[bestCand][1].mv, 0, 0);
>          bestPred->cu.setPURefIdx(1,
> (int8_t)mvFieldNeighbours[bestCand][1].refIdx, 0, 0);
>      }
> +
> +    if (m_param->analysisMode == X265_ANALYSIS_SAVE)
> +    {
> +        *reuseBestMergeCand = bestPred->cu.m_mvpIdx[0][0];
> +        reuseBestMergeCand++;
> +    }
>  }
>
>  void Analysis::checkInter_rd0_4(Mode& interMode, const CUGeom& cuGeom,
> PartSize partSize)
> diff -r 6c5156500d6d -r db56dc779466 source/encoder/analysis.h
> --- a/source/encoder/analysis.h Fri Jan 30 11:54:22 2015 -0600
> +++ b/source/encoder/analysis.h Mon Feb 02 10:23:10 2015 +0530
> @@ -78,6 +78,7 @@
>      analysis_intra_data* m_reuseIntraDataCTU;
>      analysis_inter_data* m_reuseInterDataCTU;
>      int32_t* reuseRef;
> +    uint32_t* reuseBestMergeCand;
>      Analysis();
>      bool create(ThreadLocalData* tld);
>      void destroy();
> diff -r 6c5156500d6d -r db56dc779466 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp        Fri Jan 30 11:54:22 2015 -0600
> +++ b/source/encoder/encoder.cpp        Mon Feb 02 10:23:10 2015 +0530
> @@ -1628,6 +1628,7 @@
>          CHECKED_MALLOC_ZERO(interData->ref, int32_t,
> analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2);
>          CHECKED_MALLOC(interData->depth, uint8_t, analysis->numPartitions
> * analysis->numCUsInFrame);
>          CHECKED_MALLOC(interData->modes, uint8_t, analysis->numPartitions
> * analysis->numCUsInFrame);
> +        CHECKED_MALLOC_ZERO(interData->bestMergeCand, uint32_t,
> analysis->numCUsInFrame * CUGeom::MAX_GEOMS);
>          analysis->interData = interData;
>      }
>      return;
> @@ -1651,6 +1652,7 @@
>          X265_FREE(((analysis_inter_data*)analysis->interData)->ref);
>          X265_FREE(((analysis_inter_data*)analysis->interData)->depth);
>          X265_FREE(((analysis_inter_data*)analysis->interData)->modes);
> +
> X265_FREE(((analysis_inter_data*)analysis->interData)->bestMergeCand);
>          X265_FREE(analysis->interData);
>      }
>  }
> @@ -1716,6 +1718,7 @@
>          X265_FREAD(((analysis_inter_data *)analysis->interData)->ref,
> sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU,
> m_analysisFile);
>          X265_FREAD(((analysis_inter_data *)analysis->interData)->depth,
> sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions,
> m_analysisFile);
>          X265_FREAD(((analysis_inter_data *)analysis->interData)->modes,
> sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions,
> m_analysisFile);
> +        X265_FREAD(((analysis_inter_data
> *)analysis->interData)->bestMergeCand, sizeof(uint32_t),
> analysis->numCUsInFrame * CUGeom::MAX_GEOMS, m_analysisFile);
>          consumedBytes += frameRecordSize;
>          totalConsumedBytes = consumedBytes;
>      }
> @@ -1724,6 +1727,7 @@
>          X265_FREAD(((analysis_inter_data *)analysis->interData)->ref,
> sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2,
> m_analysisFile);
>          X265_FREAD(((analysis_inter_data *)analysis->interData)->depth,
> sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions,
> m_analysisFile);
>          X265_FREAD(((analysis_inter_data *)analysis->interData)->modes,
> sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions,
> m_analysisFile);
> +        X265_FREAD(((analysis_inter_data
> *)analysis->interData)->bestMergeCand, sizeof(uint32_t),
> analysis->numCUsInFrame * CUGeom::MAX_GEOMS, m_analysisFile);
>          consumedBytes += frameRecordSize;
>      }
>  #undef X265_FREAD
> @@ -1750,11 +1754,13 @@
>      {
>          analysis->frameRecordSize += sizeof(int32_t) *
> analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU;
>          analysis->frameRecordSize += sizeof(uint8_t) *
> analysis->numCUsInFrame * analysis->numPartitions * 2;
> +        analysis->frameRecordSize += sizeof(uint32_t) *
> analysis->numCUsInFrame * CUGeom::MAX_GEOMS;
>      }
>      else
>      {
>          analysis->frameRecordSize += sizeof(int32_t) *
> analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2;
>          analysis->frameRecordSize += sizeof(uint8_t) *
> analysis->numCUsInFrame * analysis->numPartitions * 2;
> +        analysis->frameRecordSize += sizeof(uint32_t) *
> analysis->numCUsInFrame * CUGeom::MAX_GEOMS;
>      }
>
>      X265_FWRITE(&analysis->frameRecordSize, sizeof(uint32_t), 1,
> m_analysisFile);
> @@ -1774,12 +1780,14 @@
>          X265_FWRITE(((analysis_inter_data*)analysis->interData)->ref,
> sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU,
> m_analysisFile);
>          X265_FWRITE(((analysis_inter_data*)analysis->interData)->depth,
> sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions,
> m_analysisFile);
>          X265_FWRITE(((analysis_inter_data*)analysis->interData)->modes,
> sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions,
> m_analysisFile);
> +
> X265_FWRITE(((analysis_inter_data*)analysis->interData)->bestMergeCand,
> sizeof(uint32_t), analysis->numCUsInFrame * CUGeom::MAX_GEOMS,
> m_analysisFile);
>      }
>      else
>      {
>          X265_FWRITE(((analysis_inter_data*)analysis->interData)->ref,
> sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2,
> m_analysisFile);
>          X265_FWRITE(((analysis_inter_data*)analysis->interData)->depth,
> sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions,
> m_analysisFile);
>          X265_FWRITE(((analysis_inter_data*)analysis->interData)->modes,
> sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions,
> m_analysisFile);
> +
> X265_FWRITE(((analysis_inter_data*)analysis->interData)->bestMergeCand,
> sizeof(uint32_t), analysis->numCUsInFrame * CUGeom::MAX_GEOMS,
> m_analysisFile);
>      }
>  #undef X265_FWRITE
>  }
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150203/d4223ca2/attachment-0001.html>