[x265] [PATCH] analysis: dump the best depth and re-use it for analysis-mode=load

Tue Dec 23 06:55:21 CET 2014

On Mon, Dec 22, 2014 at 4:59 PM, <gopu at multicorewareinc.com> wrote:

> # HG changeset patch
> # User Gopu Govindaswamy <gopu at multicorewareinc.com>
> # Date 1419247700 -19800
> #      Mon Dec 22 16:58:20 2014 +0530
> # Node ID 8606c4019f6b962bec47398ac8f876642ecab747
> # Parent  8d2f418829c894c25da79daa861f16c61e5060d7
> analysis: dump the best depth and re-use it for analysis-mode=load
>
> For inter frame currently dump the best ref and re-using it, in addition
> to that
> share the best depth and re-use it for analysis mode=load, the best depth
> can be
> shared only the mode is MODE_SKIP, otherwise ignored it
>
> diff -r 8d2f418829c8 -r 8606c4019f6b source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp       Sat Dec 20 21:27:14 2014 +0900
> +++ b/source/encoder/analysis.cpp       Mon Dec 22 16:58:20 2014 +0530
> @@ -138,9 +138,9 @@
>          m_reuseInterDataCTU = (analysis_inter_data
> *)m_frame->m_analysisData.interData + ctu.m_cuAddr *
> X265_MAX_PRED_MODE_PER_CTU * numPredDir;
>      }
>
> +    uint32_t zOrder = 0;
>      if (m_slice->m_sliceType == I_SLICE)
>      {
> -        uint32_t zOrder = 0;
>          compressIntraCU(ctu, cuGeom, m_reuseIntraDataCTU, zOrder);
>          if (m_param->analysisMode == X265_ANALYSIS_SAVE &&
> m_frame->m_analysisData.intraData)
>          {
> @@ -158,7 +158,7 @@
>              * they are available for intra predictions */
>              m_modeDepth[0].fencYuv.copyToPicYuv(*m_frame->m_reconPic,
> ctu.m_cuAddr, 0);
>
> -            compressInterCU_rd0_4(ctu, cuGeom);
> +            compressInterCU_rd0_4(ctu, cuGeom, m_reuseIntraDataCTU,
> zOrder);
>
>              /* generate residual for entire CTU at once and copy to
> reconPic */
>              encodeResidue(ctu, cuGeom);
> @@ -166,9 +166,17 @@
>          else if (m_param->bDistributeModeAnalysis && m_param->rdLevel >=
> 2)
>              compressInterCU_dist(ctu, cuGeom);
>          else if (m_param->rdLevel <= 4)
> -            compressInterCU_rd0_4(ctu, cuGeom);
> +            compressInterCU_rd0_4(ctu, cuGeom, m_reuseIntraDataCTU,
> zOrder);
>          else
> -            compressInterCU_rd5_6(ctu, cuGeom);
> +        {
> +            compressInterCU_rd5_6(ctu, cuGeom, m_reuseIntraDataCTU,
> zOrder);
>

This is clearly inter-data, why are we saving/loading it using intraData?

> +            if (m_param->analysisMode == X265_ANALYSIS_SAVE &&
> m_frame->m_analysisData.intraData)
> +            {
> +                CUData *bestCU = &m_modeDepth[0].bestMode->cu;
> +                memcpy(&m_reuseIntraDataCTU->depth[ctu.m_cuAddr *
> numPartition], bestCU->m_cuDepth, sizeof(uint8_t) * numPartition);
> +                memcpy(&m_reuseIntraDataCTU->modes[ctu.m_cuAddr *
> numPartition], bestCU->m_predMode, sizeof(uint8_t) * numPartition);
> +            }
> +        }
>      }
>
>      return *m_modeDepth[0].bestMode;
> @@ -748,7 +756,7 @@
>          md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr,
> cuGeom.encodeIdx);
>  }
>
> Lets leave this out until force-skip is added at rd 0, 4 also.

> -void Analysis::compressInterCU_rd0_4(const CUData& parentCTU, const
> CUGeom& cuGeom)
> +void Analysis::compressInterCU_rd0_4(const CUData& parentCTU, const
> CUGeom& cuGeom, analysis_intra_data* reuseIntraData, uint32_t& zOrder)
>  {
>      uint32_t depth = cuGeom.depth;
>      uint32_t cuAddr = parentCTU.m_cuAddr;
> @@ -982,7 +990,7 @@
>              {
>                  m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv,
> childGeom.encodeIdx);
>                  m_rqt[nextDepth].cur.load(*nextContext);
> -                compressInterCU_rd0_4(parentCTU, childGeom);
> +                compressInterCU_rd0_4(parentCTU, childGeom,
> reuseIntraData, zOrder);
>
>                  // Save best CU and pred data for this sub CU
>                  splitCU->copyPartFrom(nd.bestMode->cu, childGeom,
> subPartIdx);
> @@ -1033,7 +1041,7 @@
>          md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr,
> cuGeom.encodeIdx);
>  }
>
> -void Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const
> CUGeom& cuGeom)
>

m_reuseIntraData is a member field of Analysis, it might be better to store
an incrementing counter as each depth/mode is read.

+void Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const
> CUGeom& cuGeom, analysis_intra_data* reuseIntraData, uint32_t& zOrder)
>  {
>      uint32_t depth = cuGeom.depth;
>      ModeDepth& md = m_modeDepth[depth];
> @@ -1042,6 +1050,50 @@
>      bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
>      bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
>
> +    if (m_param->analysisMode == X265_ANALYSIS_LOAD)
> +    {
> +        uint8_t* reuseDepth  = &reuseIntraData->depth[parentCTU.m_cuAddr
> * parentCTU.m_numPartitions];
> +        uint8_t* reuseModes  = &reuseIntraData->modes[parentCTU.m_cuAddr
> * parentCTU.m_numPartitions];
> +        if (mightNotSplit && depth == reuseDepth[zOrder] && zOrder ==
> cuGeom.encodeIdx && reuseModes[zOrder] == MODE_SKIP)
> +        {
> +            md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom);
> +            md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom);
> +            checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP],
> md.pred[PRED_MERGE], cuGeom);
> +
> +            if ((m_slice->m_sliceType != B_SLICE ||
> m_param->bIntraInBFrames) &&
> +                (!m_param->bEnableCbfFastMode ||
> md.bestMode->cu.getQtRootCbf(0)))
> +            {
> +                md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
> +                checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL);
> +                checkBestMode(md.pred[PRED_INTRA], depth);
> +
> +                if (depth == g_maxCUDepth && cuGeom.log2CUSize >
> m_slice->m_sps->quadtreeTULog2MinSize)
> +                {
> +                    md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU,
> cuGeom);
> +                    checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN,
> &reuseModes[zOrder]);
> +                    checkBestMode(md.pred[PRED_INTRA_NxN], depth);
> +                }
> +            }
> +
> +            if (m_bTryLossless)
> +                tryLossless(cuGeom);
> +
> +            if (mightSplit)
> +                addSplitFlagCost(*md.bestMode, cuGeom.depth);
> +
> +            mightSplit = false;
> +            mightNotSplit = false;
> +
> +            // increment zOrder offset to point to next best depth in
> sharedDepth buffer
> +            zOrder += g_depthInc[g_maxCUDepth - 1][reuseDepth[zOrder]];
> +
> +            int numPredDir = m_slice->isInterP() ? 1 : 2;
> +            for (int i = 0; i < md.bestMode->cu.getNumPartInter(); i++)
> +                for (int l = 0; l < numPredDir; l++)
> +                    m_reuseInterDataCTU++;
> +        }
> +    }
> +
>      if (mightNotSplit)
>      {
>          md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom);
> @@ -1173,7 +1225,7 @@
>              {
>                  m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv,
> childGeom.encodeIdx);
>                  m_rqt[nextDepth].cur.load(*nextContext);
> -                compressInterCU_rd5_6(parentCTU, childGeom);
> +                compressInterCU_rd5_6(parentCTU, childGeom,
> reuseIntraData, zOrder);
>
>                  // Save best CU and pred data for this sub CU
>                  splitCU->copyPartFrom(nd.bestMode->cu, childGeom,
> subPartIdx);
> @@ -1182,7 +1234,10 @@
>                  nextContext = &nd.bestMode->contexts;
>              }
>              else
> +            {
>                  splitCU->setEmptyPart(childGeom, subPartIdx);
> +                zOrder += g_depthInc[g_maxCUDepth - 1][nextDepth];
> +            }
>          }
>          nextContext->store(splitPred->contexts);
>          if (mightNotSplit)
> diff -r 8d2f418829c8 -r 8606c4019f6b source/encoder/analysis.h
> --- a/source/encoder/analysis.h Sat Dec 20 21:27:14 2014 +0900
> +++ b/source/encoder/analysis.h Mon Dec 22 16:58:20 2014 +0530
> @@ -99,8 +99,8 @@
>
>      /* full analysis for a P or B slice CU */
>      void compressInterCU_dist(const CUData& parentCTU, const CUGeom&
> cuGeom);
> -    void compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom&
> cuGeom);
> -    void compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom&
> cuGeom);
> +    void compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom&
> cuGeom, analysis_intra_data* sdata, uint32_t &zOrder);
> +    void compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom&
> cuGeom, analysis_intra_data* sdata, uint32_t &zOrder);
>
>      /* measure merge and skip */
>      void checkMerge2Nx2N_rd0_4(Mode& skip, Mode& merge, const CUGeom&
> cuGeom);
> diff -r 8d2f418829c8 -r 8606c4019f6b source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp        Sat Dec 20 21:27:14 2014 +0900
> +++ b/source/encoder/encoder.cpp        Mon Dec 22 16:58:20 2014 +0530
> @@ -1639,12 +1639,16 @@
>      else if (analysis->sliceType == X265_TYPE_P)
>      {
>          X265_FREAD(analysis->interData, sizeof(analysis_inter_data),
> analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU, m_analysisFile);
> +        X265_FREAD(((analysis_intra_data *)analysis->intraData)->depth,
> sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions,
> m_analysisFile);
> +        X265_FREAD(((analysis_intra_data *)analysis->intraData)->modes,
> sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions,
> m_analysisFile);
>          consumedBytes += frameRecordSize;
>          totalConsumedBytes = consumedBytes;
>      }
>      else
>      {
>          X265_FREAD(analysis->interData, sizeof(analysis_inter_data),
> analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2, m_analysisFile);
> +        X265_FREAD(((analysis_intra_data *)analysis->intraData)->depth,
> sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions,
> m_analysisFile);
> +        X265_FREAD(((analysis_intra_data *)analysis->intraData)->modes,
> sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions,
> m_analysisFile);
>          consumedBytes += frameRecordSize;
>      }
>  #undef X265_FREAD
> @@ -1668,9 +1672,15 @@
>      if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType ==
> X265_TYPE_I)
>          analysis->frameRecordSize += sizeof(uint8_t) *
> analysis->numCUsInFrame * analysis->numPartitions * 3;
>      else if (analysis->sliceType == X265_TYPE_P)
> +    {
>          analysis->frameRecordSize += sizeof(analysis_inter_data) *
> analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU;
> +        analysis->frameRecordSize += sizeof(uint8_t) *
> analysis->numCUsInFrame * analysis->numPartitions * 2;
> +    }
>      else
> +    {
>          analysis->frameRecordSize += sizeof(analysis_inter_data) *
> analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2;
> +        analysis->frameRecordSize += sizeof(uint8_t) *
> analysis->numCUsInFrame * analysis->numPartitions * 2;
> +    }
>
>      X265_FWRITE(&analysis->frameRecordSize, sizeof(uint32_t), 1,
> m_analysisFile);
>      X265_FWRITE(&analysis->poc, sizeof(int), 1, m_analysisFile);
> @@ -1687,10 +1697,14 @@
>      else if (analysis->sliceType == X265_TYPE_P)
>      {
>          X265_FWRITE(analysis->interData, sizeof(analysis_inter_data),
> analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU, m_analysisFile);
> +        X265_FWRITE(((analysis_intra_data*)analysis->intraData)->depth,
> sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions,
> m_analysisFile);
> +        X265_FWRITE(((analysis_intra_data*)analysis->intraData)->modes,
> sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions,
> m_analysisFile);
>      }
>      else
>      {
>          X265_FWRITE(analysis->interData, sizeof(analysis_inter_data),
> analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * 2, m_analysisFile);
> +        X265_FWRITE(((analysis_intra_data*)analysis->intraData)->depth,
> sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions,
> m_analysisFile);
> +        X265_FWRITE(((analysis_intra_data*)analysis->intraData)->modes,
> sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions,
> m_analysisFile);
>      }
>  #undef X265_FWRITE
>  }
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20141223/37fed0a8/attachment-0001.html>