[x265] [PATCH] analysis: dump and reuse bestmergeCand for skip and merge mode

Fri Jan 30 18:40:10 CET 2015

On 01/30, gopu at multicorewareinc.com wrote:
> # HG changeset patch
> # User Gopu Govindaswamy <gopu at multicorewareinc.com>
> # Date 1422614706 -19800
> #      Fri Jan 30 16:15:06 2015 +0530
> # Node ID f732981763c90cd9bf7db88fae6e526932bf596d
> # Parent  5e5dc3763f6386da9722903033a2b9dd263a5226
> analysis: dump and reuse bestmergeCand for skip and merge mode
> 
> diff -r 5e5dc3763f63 -r f732981763c9 source/common/common.h
> --- a/source/common/common.h	Thu Jan 29 10:37:54 2015 -0600
> +++ b/source/common/common.h	Fri Jan 30 16:15:06 2015 +0530
> @@ -318,6 +318,7 @@
>  #define CHROMA_H_SHIFT(x) (x == X265_CSP_I420 || x == X265_CSP_I422)
>  #define CHROMA_V_SHIFT(x) (x == X265_CSP_I420)
>  #define X265_MAX_PRED_MODE_PER_CTU 85 * 2 * 8
> +#define MAX_RECURSIVE_PERCTU 85

This value is already defines as MAX_GEOMS

>  namespace x265 {
>  
> @@ -375,6 +376,7 @@
>      int32_t*    ref;
>      uint8_t*    depth;
>      uint8_t*    modes;
> +    uint32_t*   bestMergeCand;
>  };
>  
>  /* Stores intra analysis data for a single frame. This struct needs better packing */
> diff -r 5e5dc3763f63 -r f732981763c9 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp	Thu Jan 29 10:37:54 2015 -0600
> +++ b/source/encoder/analysis.cpp	Fri Jan 30 16:15:06 2015 +0530
> @@ -140,6 +140,7 @@
>              int numPredDir = m_slice->isInterP() ? 1 : 2;
>              m_reuseInterDataCTU = (analysis_inter_data *)m_frame->m_analysisData.interData;
>              reuseRef = &m_reuseInterDataCTU->ref[ctu.m_cuAddr * X265_MAX_PRED_MODE_PER_CTU * numPredDir];
> +            reuseBestMergeCand = &m_reuseInterDataCTU->bestMergeCand[ctu.m_cuAddr * MAX_RECURSIVE_PERCTU];
>          }
>      }
>  
> @@ -1066,21 +1067,6 @@
>              md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom);
>              checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
>  
> -            if ((m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames) &&
> -                (!m_param->bEnableCbfFastMode || md.bestMode->cu.getQtRootCbf(0)))
> -            {
> -                md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
> -                checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL);
> -                checkBestMode(md.pred[PRED_INTRA], depth);
> -
> -                if (depth == g_maxCUDepth && cuGeom.log2CUSize > m_slice->m_sps->quadtreeTULog2MinSize)
> -                {
> -                    md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom);
> -                    checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN, &reuseModes[zOrder]);
> -                    checkBestMode(md.pred[PRED_INTRA_NxN], depth);
> -                }
> -            }
> -
>              if (m_bTryLossless)
>                  tryLossless(cuGeom);
>  
> @@ -1388,29 +1374,10 @@
>      bool foundCbf0Merge = false;
>      bool triedPZero = false, triedBZero = false;
>      bestPred->rdCost = MAX_INT64;
> -    for (uint32_t i = 0; i < maxNumMergeCand; i++)
> +
> +    if (m_param->analysisMode == X265_ANALYSIS_LOAD)
>      {
> -        if (m_bFrameParallel &&
> -            (mvFieldNeighbours[i][0].mv.y >= (m_param->searchRange + 1) * 4 ||
> -             mvFieldNeighbours[i][1].mv.y >= (m_param->searchRange + 1) * 4))
> -            continue;
> -
> -        /* the merge candidate list is packed with MV(0,0) ref 0 when it is not full */
> -        if (interDirNeighbours[i] == 1 && !mvFieldNeighbours[i][0].mv.word && !mvFieldNeighbours[i][0].refIdx)
> -        {
> -            if (triedPZero)
> -                continue;
> -            triedPZero = true;
> -        }
> -        else if (interDirNeighbours[i] == 3 &&
> -                 !mvFieldNeighbours[i][0].mv.word && !mvFieldNeighbours[i][0].refIdx &&
> -                 !mvFieldNeighbours[i][1].mv.word && !mvFieldNeighbours[i][1].refIdx)
> -        {
> -            if (triedBZero)
> -                continue;
> -            triedBZero = true;
> -        }
> -
> +        uint32_t i = *reuseBestMergeCand;
>          tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i;    /* merge candidate ID is stored in L0 MVP idx */
>          tempPred->cu.m_interDir[0] = interDirNeighbours[i];
>          tempPred->cu.m_mv[0][0] = mvFieldNeighbours[i][0].mv;
> @@ -1424,24 +1391,20 @@
>  
>          uint8_t hasCbf = true;
>          bool swapped = false;
> -        if (!foundCbf0Merge)
> +
> +        /* if the best prediction has CBF (not a skip) then try merge with residual */
> +        encodeResAndCalcRdInterCU(*tempPred, cuGeom);
> +        hasCbf = tempPred->cu.getQtRootCbf(0);
> +        foundCbf0Merge = !hasCbf;
> +
> +        if (tempPred->rdCost < bestPred->rdCost)
>          {
> -            /* if the best prediction has CBF (not a skip) then try merge with residual */
> -
> -            encodeResAndCalcRdInterCU(*tempPred, cuGeom);
> -            hasCbf = tempPred->cu.getQtRootCbf(0);
> -            foundCbf0Merge = !hasCbf;
> -
> -            if (tempPred->rdCost < bestPred->rdCost)
> -            {
> -                std::swap(tempPred, bestPred);
> -                swapped = true;
> -            }
> +            std::swap(tempPred, bestPred);
> +            swapped = true;
>          }
>          if (!m_param->bLossless && hasCbf)
>          {
>              /* try merge without residual (skip), if not lossless coding */
> -
>              if (swapped)
>              {
>                  tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i;
> @@ -1453,12 +1416,88 @@
>                  tempPred->cu.setPredModeSubParts(MODE_INTER);
>                  tempPred->predYuv.copyFromYuv(bestPred->predYuv);
>              }
> -            
> +
>              encodeResAndCalcRdSkipCU(*tempPred);
>  
>              if (tempPred->rdCost < bestPred->rdCost)
>                  std::swap(tempPred, bestPred);
>          }
> +        reuseBestMergeCand++;
> +    }
> +    else 
> +    {
> +        for (uint32_t i = 0; i < maxNumMergeCand; i++)
> +        {
> +            if (m_bFrameParallel &&
> +                (mvFieldNeighbours[i][0].mv.y >= (m_param->searchRange + 1) * 4 ||
> +                mvFieldNeighbours[i][1].mv.y >= (m_param->searchRange + 1) * 4))
> +                continue;
> +
> +            /* the merge candidate list is packed with MV(0,0) ref 0 when it is not full */
> +            if (interDirNeighbours[i] == 1 && !mvFieldNeighbours[i][0].mv.word && !mvFieldNeighbours[i][0].refIdx)
> +            {
> +                if (triedPZero)
> +                    continue;
> +                triedPZero = true;
> +            }
> +            else if (interDirNeighbours[i] == 3 &&
> +                !mvFieldNeighbours[i][0].mv.word && !mvFieldNeighbours[i][0].refIdx &&
> +                !mvFieldNeighbours[i][1].mv.word && !mvFieldNeighbours[i][1].refIdx)
> +            {
> +                if (triedBZero)
> +                    continue;
> +                triedBZero = true;
> +            }
> +
> +            tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i;    /* merge candidate ID is stored in L0 MVP idx */
> +            tempPred->cu.m_interDir[0] = interDirNeighbours[i];
> +            tempPred->cu.m_mv[0][0] = mvFieldNeighbours[i][0].mv;
> +            tempPred->cu.m_refIdx[0][0] = (int8_t)mvFieldNeighbours[i][0].refIdx;
> +            tempPred->cu.m_mv[1][0] = mvFieldNeighbours[i][1].mv;
> +            tempPred->cu.m_refIdx[1][0] = (int8_t)mvFieldNeighbours[i][1].refIdx;
> +            tempPred->cu.setPredModeSubParts(MODE_INTER); /* must be cleared between encode iterations */
> +
> +            prepMotionCompensation(tempPred->cu, cuGeom, 0);
> +            motionCompensation(tempPred->predYuv, true, true);
> +
> +            uint8_t hasCbf = true;
> +            bool swapped = false;
> +            if (!foundCbf0Merge)
> +            {
> +                /* if the best prediction has CBF (not a skip) then try merge with residual */
> +
> +                encodeResAndCalcRdInterCU(*tempPred, cuGeom);
> +                hasCbf = tempPred->cu.getQtRootCbf(0);
> +                foundCbf0Merge = !hasCbf;
> +
> +                if (tempPred->rdCost < bestPred->rdCost)
> +                {
> +                    std::swap(tempPred, bestPred);
> +                    swapped = true;
> +                }
> +            }
> +            if (!m_param->bLossless && hasCbf)
> +            {
> +                /* try merge without residual (skip), if not lossless coding */
> +
> +                if (swapped)
> +                {
> +                    tempPred->cu.m_mvpIdx[0][0] = (uint8_t)i;
> +                    tempPred->cu.m_interDir[0] = interDirNeighbours[i];
> +                    tempPred->cu.m_mv[0][0] = mvFieldNeighbours[i][0].mv;
> +                    tempPred->cu.m_refIdx[0][0] = (int8_t)mvFieldNeighbours[i][0].refIdx;
> +                    tempPred->cu.m_mv[1][0] = mvFieldNeighbours[i][1].mv;
> +                    tempPred->cu.m_refIdx[1][0] = (int8_t)mvFieldNeighbours[i][1].refIdx;
> +                    tempPred->cu.setPredModeSubParts(MODE_INTER);
> +                    tempPred->predYuv.copyFromYuv(bestPred->predYuv);
> +                }
> +
> +                encodeResAndCalcRdSkipCU(*tempPred);
> +
> +                if (tempPred->rdCost < bestPred->rdCost)
> +                    std::swap(tempPred, bestPred);
> +            }
> +        }
>      }
>  
>      if (bestPred->rdCost < MAX_INT64)
> @@ -1473,6 +1512,12 @@
>          bestPred->cu.setPUMv(1, mvFieldNeighbours[bestCand][1].mv, 0, 0);
>          bestPred->cu.setPURefIdx(1, (int8_t)mvFieldNeighbours[bestCand][1].refIdx, 0, 0);
>      }
> +
> +    if (m_param->analysisMode == X265_ANALYSIS_SAVE)
> +    {
> +        *reuseBestMergeCand = bestPred->cu.m_mvpIdx[0][0];
> +        reuseBestMergeCand++;
> +    }
>  }
>  
>  void Analysis::checkInter_rd0_4(Mode& interMode, const CUGeom& cuGeom, PartSize partSize)
> diff -r 5e5dc3763f63 -r f732981763c9 source/encoder/analysis.h
> --- a/source/encoder/analysis.h	Thu Jan 29 10:37:54 2015 -0600
> +++ b/source/encoder/analysis.h	Fri Jan 30 16:15:06 2015 +0530
> @@ -78,6 +78,7 @@
>      analysis_intra_data* m_reuseIntraDataCTU;
>      analysis_inter_data* m_reuseInterDataCTU;
>      int32_t* reuseRef;
> +    uint32_t* reuseBestMergeCand;
>      Analysis();
>      bool create(ThreadLocalData* tld);
>      void destroy();
> diff -r 5e5dc3763f63 -r f732981763c9 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp	Thu Jan 29 10:37:54 2015 -0600
> +++ b/source/encoder/encoder.cpp	Fri Jan 30 16:15:06 2015 +0530
> @@ -1634,6 +1634,7 @@
>          CHECKED_MALLOC_ZERO(interData->ref, int32_t, analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2);
>          CHECKED_MALLOC(interData->depth, uint8_t, analysis->numPartitions * analysis->numCUsInFrame);
>          CHECKED_MALLOC(interData->modes, uint8_t, analysis->numPartitions * analysis->numCUsInFrame);
> +        CHECKED_MALLOC_ZERO(interData->bestMergeCand, uint32_t, analysis->numCUsInFrame * MAX_RECURSIVE_PERCTU);
>          analysis->interData = interData;
>      }
>      return;
> @@ -1657,6 +1658,7 @@
>          X265_FREE(((analysis_inter_data*)analysis->interData)->ref);
>          X265_FREE(((analysis_inter_data*)analysis->interData)->depth);
>          X265_FREE(((analysis_inter_data*)analysis->interData)->modes);
> +        X265_FREE(((analysis_inter_data*)analysis->interData)->bestMergeCand);
>          X265_FREE(analysis->interData);
>      }
>  }
> @@ -1722,6 +1724,7 @@
>          X265_FREAD(((analysis_inter_data *)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU, m_analysisFile);
>          X265_FREAD(((analysis_inter_data *)analysis->interData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
>          X265_FREAD(((analysis_inter_data *)analysis->interData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
> +        X265_FREAD(((analysis_inter_data *)analysis->interData)->bestMergeCand, sizeof(uint32_t), analysis->numCUsInFrame * MAX_RECURSIVE_PERCTU, m_analysisFile);
>          consumedBytes += frameRecordSize;
>          totalConsumedBytes = consumedBytes;
>      }
> @@ -1730,6 +1733,7 @@
>          X265_FREAD(((analysis_inter_data *)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2, m_analysisFile);
>          X265_FREAD(((analysis_inter_data *)analysis->interData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
>          X265_FREAD(((analysis_inter_data *)analysis->interData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
> +        X265_FREAD(((analysis_inter_data *)analysis->interData)->bestMergeCand, sizeof(uint32_t), analysis->numCUsInFrame * MAX_RECURSIVE_PERCTU, m_analysisFile);
>          consumedBytes += frameRecordSize;
>      }
>  #undef X265_FREAD
> @@ -1756,11 +1760,13 @@
>      {
>          analysis->frameRecordSize += sizeof(int32_t) * analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU;
>          analysis->frameRecordSize += sizeof(uint8_t) * analysis->numCUsInFrame * analysis->numPartitions * 2;
> +        analysis->frameRecordSize += sizeof(uint32_t) * analysis->numCUsInFrame * MAX_RECURSIVE_PERCTU;
>      }
>      else
>      {
>          analysis->frameRecordSize += sizeof(int32_t) * analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2;
>          analysis->frameRecordSize += sizeof(uint8_t) * analysis->numCUsInFrame * analysis->numPartitions * 2;
> +        analysis->frameRecordSize += sizeof(uint32_t) * analysis->numCUsInFrame * MAX_RECURSIVE_PERCTU;
>      }
>  
>      X265_FWRITE(&analysis->frameRecordSize, sizeof(uint32_t), 1, m_analysisFile);
> @@ -1780,12 +1786,14 @@
>          X265_FWRITE(((analysis_inter_data*)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU, m_analysisFile);
>          X265_FWRITE(((analysis_inter_data*)analysis->interData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
>          X265_FWRITE(((analysis_inter_data*)analysis->interData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
> +        X265_FWRITE(((analysis_inter_data*)analysis->interData)->bestMergeCand, sizeof(uint32_t), analysis->numCUsInFrame * MAX_RECURSIVE_PERCTU, m_analysisFile);
>      }
>      else
>      {
>          X265_FWRITE(((analysis_inter_data*)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2, m_analysisFile);
>          X265_FWRITE(((analysis_inter_data*)analysis->interData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
>          X265_FWRITE(((analysis_inter_data*)analysis->interData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
> +        X265_FWRITE(((analysis_inter_data*)analysis->interData)->bestMergeCand, sizeof(uint32_t), analysis->numCUsInFrame * MAX_RECURSIVE_PERCTU, m_analysisFile);
>      }
>  #undef X265_FWRITE
>  }
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho