[x265] [X265] [Patch] [Master, Release 3.5] fix: corrects output mismatch for cutree enabled analysis save/load encodes with reuse-levels in between 1 to 10 for similar encoder settings.

Srikanth Kurapati srikanth.kurapati at multicorewareinc.com
Thu Jan 28 09:14:13 UTC 2021


Please ignore this patch. Will resend the final version.

On Wed, Jan 27, 2021 at 6:23 PM Srikanth Kurapati <
srikanth.kurapati at multicorewareinc.com> wrote:

> From a54176cf1fc83c3aebb0f4f4f2b6383d3b50ebde Mon Sep 17 00:00:00 2001
> From: Srikanth Kurapati <srikanth.kurapati at multicorewareinc.com>
> Date: Wed, 30 Dec 2020 17:00:08 +0530
> Subject: [PATCH] fix: corrects output mismatch for cutree enabled analysis
>  save/load encodes with reuse-levels in between 1 to 10 for similar encoder
>  settings.
>
> - updates the documentation for the analysis save/load options.
> ---
>  doc/reST/cli.rst             |  34 +++++-----
>  source/abrEncApp.cpp         |  16 ++++-
>  source/common/common.h       |   4 ++
>  source/common/cudata.h       |   2 +-
>  source/encoder/analysis.cpp  |  32 ++++++++-
>  source/encoder/analysis.h    |   1 +
>  source/encoder/api.cpp       |  28 +++++++-
>  source/encoder/encoder.cpp   | 124 ++++++++++++++++++++++++++---------
>  source/encoder/slicetype.cpp |   2 +-
>  source/x265.h                |   4 +-
>  10 files changed, 186 insertions(+), 61 deletions(-)
>
> diff --git a/doc/reST/cli.rst b/doc/reST/cli.rst
> index 94c2a5175..bb1396e8a 100755
> --- a/doc/reST/cli.rst
> +++ b/doc/reST/cli.rst
> @@ -934,14 +934,14 @@ will not reuse analysis if slice type parameters do
> not match.
>  .. option:: --analysis-save <filename>
>
>   Encoder outputs analysis information of each frame. Analysis data from
> save mode is
> - written to the file specified. Requires cutree, pmode to be off. Default
> disabled.
> + written to the file specified. Requires pmode to be off. Default
> disabled.
>
>   The amount of analysis data stored is determined by
> :option:`--analysis-save-reuse-level`.
>
>  .. option:: --analysis-load <filename>
>
>   Encoder reuses analysis information from the file specified. By reading
> the analysis data written by
> - an earlier encode of the same sequence, substantial redundant work may
> be avoided. Requires cutree, pmode
> + an earlier encode of the same sequence, substantial redundant work may
> be avoided. Requires pmode
>   to be off. Default disabled.
>
>   The amount of analysis data reused is determined by
> :option:`--analysis-load-reuse-level`.
> @@ -961,21 +961,21 @@ will not reuse analysis if slice type parameters do
> not match.
>   Note that :option:`--analysis-save-reuse-level` and
> :option:`--analysis-load-reuse-level` must be paired
>   with :option:`--analysis-save` and :option:`--analysis-load`
> respectively.
>
> - +--------------+------------------------------------------+
> - | Level        | Description                              |
> - +==============+==========================================+
> - | 1            | Lookahead information                    |
> - +--------------+------------------------------------------+
> - | 2 to 4       | Level 1 + intra/inter modes, ref's       |
> - +--------------+------------------------------------------+
> - | 5 and 6      | Level 2 + rect-amp                       |
> - +--------------+------------------------------------------+
> - | 7            | Level 5 + AVC size CU refinement         |
> - +--------------+------------------------------------------+
> - | 8 and 9      | Level 5 + AVC size Full CU analysis-info |
> - +--------------+------------------------------------------+
> - | 10           | Level 5 + Full CU analysis-info          |
> - +--------------+------------------------------------------+
> + +--------------+----------------------------------------------------+
> + | Level        | Description                                        |
> + +==============+====================================================+
> + | 1            | Lookahead information                              |
> + +--------------+----------------------------------------------------+
> + | 2 to 4       | Level 1 + intra/inter modes, ref's, cutree offsets |
> + +--------------+----------------------------------------------------+
> + | 5 and 6      | Level 2 + rect-amp                                 |
> + +--------------+----------------------------------------------------+
> + | 7            | Level 5 + AVC size CU refinement                   |
> + +--------------+----------------------------------------------------+
> + | 8 and 9      | Level 5 + AVC size Full CU analysis-info           |
> + +--------------+----------------------------------------------------+
> + | 10           | Level 5 + Full CU analysis-info                    |
> + +--------------+----------------------------------------------------+
>
>  .. option:: --refine-mv-type <string>
>
> diff --git a/source/abrEncApp.cpp b/source/abrEncApp.cpp
> index fa62ebf63..8c5232dc1 100644
> --- a/source/abrEncApp.cpp
> +++ b/source/abrEncApp.cpp
> @@ -99,6 +99,8 @@ namespace X265_NS {
>              }
>
>              m_analysisBuffer[pass] = X265_MALLOC(x265_analysis_data,
> m_queueSize);
> +            if (m_analysisBuffer[pass])
> +                memset(m_analysisBuffer[pass], 0,
> sizeof(x265_analysis_data) * m_queueSize);
>              m_picIdxReadCnt[pass] = new ThreadSafeInteger[m_queueSize];
>              m_analysisWrite[pass] = new ThreadSafeInteger[m_queueSize];
>              m_analysisRead[pass] = new ThreadSafeInteger[m_queueSize];
> @@ -340,7 +342,12 @@ namespace X265_NS {
>              memcpy(intraDst->partSizes, intraSrc->partSizes, sizeof(char)
> * src->depthBytes);
>              memcpy(intraDst->chromaModes, intraSrc->chromaModes,
> sizeof(uint8_t) * src->depthBytes);
>              if (m_param->rc.cuTree)
> -                memcpy(intraDst->cuQPOff, intraSrc->cuQPOff,
> sizeof(int8_t) * src->depthBytes);
> +            {
> +                if (m_param->analysisSaveReuseLevel == 10)
> +                    memcpy(intraDst->cuQPOff, intraSrc->cuQPOff,
> sizeof(int8_t) * src->depthBytes);
> +                else
> +                    memcpy(intraDst->cuQPOff, intraSrc->cuQPOff,
> sizeof(int8_t) * src->numCUsInFrame *
> X265_MAX_CTU_SPLITS(m_param->maxCUSize, m_param->minCUSize));
> +            }
>          }
>          else
>          {
> @@ -355,7 +362,12 @@ namespace X265_NS {
>              memcpy(interDst->depth, interSrc->depth, sizeof(uint8_t) *
> src->depthBytes);
>              memcpy(interDst->modes, interSrc->modes, sizeof(uint8_t) *
> src->depthBytes);
>              if (m_param->rc.cuTree)
> -                memcpy(interDst->cuQPOff, interSrc->cuQPOff,
> sizeof(int8_t) * src->depthBytes);
> +            {
> +                if (m_param->analysisReuseLevel == 10)
> +                    memcpy(interDst->cuQPOff, interSrc->cuQPOff,
> sizeof(int8_t) * src->depthBytes);
> +                else
> +                    memcpy(interDst->cuQPOff, interSrc->cuQPOff,
> sizeof(int8_t) * src->numCUsInFrame *
> X265_MAX_CTU_SPLITS(m_param->maxCUSize, m_param->minCUSize));
> +            }
>              if (m_param->analysisSaveReuseLevel > 4)
>              {
>                  memcpy(interDst->partSize, interSrc->partSize,
> sizeof(uint8_t) * src->depthBytes);
> diff --git a/source/common/common.h b/source/common/common.h
> index 8c06cd79e..a8f3ae71a 100644
> --- a/source/common/common.h
> +++ b/source/common/common.h
> @@ -343,6 +343,10 @@ typedef int16_t  coeff_t;      // transform
> coefficient
>
>  namespace X265_NS {
>
> +const uint8_t g_maxCtuSplits[MIN_LOG2_CU_SIZE + 1] = { 1, 5, 21, 85 }; /*
> max ctu partitions as per min max cu configurations */
> +
> +#define X265_MAX_CTU_SPLITS(maxcusize, mincusize)
> (g_maxCtuSplits[g_log2Size[maxcusize] - g_log2Size[mincusize]])
> +
>  enum { SAO_NUM_OFFSET = 4 };
>
>  enum SaoMergeMode
> diff --git a/source/common/cudata.h b/source/common/cudata.h
> index 8397f0568..c7d9a1972 100644
> --- a/source/common/cudata.h
> +++ b/source/common/cudata.h
> @@ -371,7 +371,7 @@ struct CUDataMemPool
>              CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL) *
> numInstances);
>          }
>          else
> -        {
> +        {
>              uint32_t sizeC = sizeL >> (CHROMA_H_SHIFT(csp) +
> CHROMA_V_SHIFT(csp));
>              CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL + sizeC * 2)
> * numInstances);
>          }
> diff --git a/source/encoder/analysis.cpp b/source/encoder/analysis.cpp
> index aabf386ca..6e1d5c730 100644
> --- a/source/encoder/analysis.cpp
> +++ b/source/encoder/analysis.cpp
> @@ -74,6 +74,7 @@ Analysis::Analysis()
>  {
>      m_reuseInterDataCTU = NULL;
>      m_reuseRef = NULL;
> +    m_reuseQPOff = NULL;
>      m_bHD = false;
>      m_modeFlag[0] = false;
>      m_modeFlag[1] = false;
> @@ -220,6 +221,9 @@ Mode& Analysis::compressCTU(CUData& ctu, Frame& frame,
> const CUGeom& cuGeom, con
>          if (m_param->analysisSave && !m_param->analysisLoad)
>              for (int i = 0; i < X265_MAX_PRED_MODE_PER_CTU * numPredDir;
> i++)
>                  m_reuseRef[i] = -1;
> +
> +        if (m_param->rc.cuTree)
> +            m_reuseQPOff = &m_reuseInterDataCTU->cuQPOff[ctu.m_cuAddr *
> X265_MAX_CTU_SPLITS(m_param->maxCUSize, m_param->minCUSize)];
>      }
>      ProfileCUScope(ctu, totalCTUTime, totalCTUs);
>
> @@ -233,6 +237,8 @@ Mode& Analysis::compressCTU(CUData& ctu, Frame& frame,
> const CUGeom& cuGeom, con
>              memcpy(ctu.m_partSize, &intraDataCTU->partSizes[ctu.m_cuAddr
> * numPartition], sizeof(char) * numPartition);
>              memcpy(ctu.m_chromaIntraDir,
> &intraDataCTU->chromaModes[ctu.m_cuAddr * numPartition], sizeof(uint8_t) *
> numPartition);
>          }
> +        if (m_param->rc.cuTree && reuseLevel > 1 && reuseLevel < 10)
> +            m_reuseQPOff = &intraDataCTU->cuQPOff[ctu.m_cuAddr *
> X265_MAX_CTU_SPLITS(m_param->maxCUSize, m_param->minCUSize)];
>          compressIntraCU(ctu, cuGeom, qp);
>      }
>      else
> @@ -520,6 +526,9 @@ uint64_t Analysis::compressIntraCU(const CUData&
> parentCTU, const CUGeom& cuGeom
>      bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
>      bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
>
> +    if (m_param->rc.cuTree  && m_param->analysisSaveReuseLevel > 1 &&
> m_param->analysisSaveReuseLevel < 10)
> +        m_reuseQPOff[cuGeom.geomRecurId] = (int8_t)(qp -
> (int32_t)(m_frame->m_encData->m_cuStat[parentCTU.m_cuAddr].baseQp + 0.5));
> +
>      bool bAlreadyDecided = m_param->intraRefine != 4 &&
> parentCTU.m_lumaIntraDir[cuGeom.absPartIdx] != (uint8_t)ALL_IDX &&
> !(m_param->bAnalysisType == HEVC_INFO);
>      bool bDecidedDepth = m_param->intraRefine != 4 &&
> parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth;
>      int split = 0;
> @@ -870,6 +879,9 @@ uint32_t Analysis::compressInterCU_dist(const CUData&
> parentCTU, const CUGeom& c
>      uint32_t minDepth = m_param->rdLevel <= 4 ?
> topSkipMinDepth(parentCTU, cuGeom) : 0;
>      uint32_t splitRefs[4] = { 0, 0, 0, 0 };
>
> +    if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 &&
> m_param->analysisSaveReuseLevel < 10)
> +        m_reuseQPOff[cuGeom.geomRecurId] = (int8_t)(qp -
> (int32_t)(m_frame->m_encData->m_cuStat[parentCTU.m_cuAddr].baseQp + 0.5));
> +
>      X265_CHECK(m_param->rdLevel >= 2, "compressInterCU_dist does not
> support RD 0 or 1\n");
>
>      PMODE pmode(*this, cuGeom);
> @@ -1152,6 +1164,8 @@ SplitData Analysis::compressInterCU_rd0_4(const
> CUData& parentCTU, const CUGeom&
>      uint32_t cuAddr = parentCTU.m_cuAddr;
>      ModeDepth& md = m_modeDepth[depth];
>
> +    if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 &&
> m_param->analysisSaveReuseLevel < 10)
> +        m_reuseQPOff[cuGeom.geomRecurId] = (int8_t)(qp -
> (int32_t)(m_frame->m_encData->m_cuStat[parentCTU.m_cuAddr].baseQp + 0.5));
>
>      if (m_param->searchMethod == X265_SEA)
>      {
> @@ -1856,6 +1870,9 @@ SplitData Analysis::compressInterCU_rd5_6(const
> CUData& parentCTU, const CUGeom&
>      ModeDepth& md = m_modeDepth[depth];
>      md.bestMode = NULL;
>
> +    if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 &&
> m_param->analysisSaveReuseLevel < 10)
> +        m_reuseQPOff[cuGeom.geomRecurId] = (int8_t)(qp -
> (int32_t)(m_frame->m_encData->m_cuStat[parentCTU.m_cuAddr].baseQp + 0.5));
> +
>      if (m_param->searchMethod == X265_SEA)
>      {
>          int numPredDir = m_slice->isInterP() ? 1 : 2;
> @@ -3647,11 +3664,20 @@ int Analysis::calculateQpforCuSize(const CUData&
> ctu, const CUGeom& cuGeom, int3
>
>      if (m_param->analysisLoadReuseLevel >= 2 && m_param->rc.cuTree)
>      {
> -        int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) +
> cuGeom.absPartIdx;
> +        int cuIdx;
> +        int8_t cuQPOffSet = 0;
> +
> +        if (m_param->scaleFactor == 2 || m_param->analysisLoadReuseLevel
> == 10)
> +            cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) +
> cuGeom.absPartIdx;
> +        else
> +            cuIdx = (ctu.m_cuAddr *
> X265_MAX_CTU_SPLITS(m_param->maxCUSize, m_param->minCUSize)) +
> cuGeom.geomRecurId;
> +
>          if (ctu.m_slice->m_sliceType == I_SLICE)
> -            return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
> (int32_t)(qp + 0.5 +
> ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx]));
> +            cuQPOffSet =
> ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx];
>          else
> -            return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
> (int32_t)(qp + 0.5 +
> ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx]));
> +            cuQPOffSet =
> ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx];
> +
> +        return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
> (int32_t)(qp + 0.5 + cuQPOffSet));
>      }
>      if (m_param->rc.hevcAq)
>      {
> diff --git a/source/encoder/analysis.h b/source/encoder/analysis.h
> index 3bcb56bc3..3a5cda816 100644
> --- a/source/encoder/analysis.h
> +++ b/source/encoder/analysis.h
> @@ -126,6 +126,7 @@ protected:
>      int32_t*                   m_reuseRef;
>      uint8_t*                   m_reuseDepth;
>      uint8_t*                   m_reuseModes;
> +    int8_t *                   m_reuseQPOff; // array of QP values for
> analysis reuse at reuse levels > 1 and < 10 when cutree is enabled
>      uint8_t*                   m_reusePartSize;
>      uint8_t*                   m_reuseMergeFlag;
>      x265_analysis_MV*          m_reuseMv[2];
> diff --git a/source/encoder/api.cpp b/source/encoder/api.cpp
> index a986355e0..c263e2a87 100644
> --- a/source/encoder/api.cpp
> +++ b/source/encoder/api.cpp
> @@ -825,7 +825,16 @@ void x265_alloc_analysis_data(x265_param *param,
> x265_analysis_data* analysis)
>          CHECKED_MALLOC_ZERO(intraData->partSizes, char,
> analysis->numPartitions * analysis->numCUsInFrame);
>          CHECKED_MALLOC_ZERO(intraData->chromaModes, uint8_t,
> analysis->numPartitions * analysis->numCUsInFrame);
>          if (param->rc.cuTree)
> -            CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t,
> analysis->numPartitions * analysis->numCUsInFrame);
> +        {
> +            if (maxReuseLevel == 10)
> +            {
> +                CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t,
> analysis->numPartitions * analysis->numCUsInFrame);
> +            }
> +            else
> +            {
> +                CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t,
> X265_MAX_CTU_SPLITS(param->maxCUSize, param->minCUSize) *
> analysis->numCUsInFrame);
> +            }
> +        }
>      }
>      analysis->intraData = intraData;
>
> @@ -837,7 +846,16 @@ void x265_alloc_analysis_data(x265_param *param,
> x265_analysis_data* analysis)
>          CHECKED_MALLOC_ZERO(interData->modes, uint8_t,
> analysis->numPartitions * analysis->numCUsInFrame);
>
>          if (param->rc.cuTree && !isMultiPassOpt)
> -            CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t,
> analysis->numPartitions * analysis->numCUsInFrame);
> +        {
> +            if (maxReuseLevel == 10)
> +            {
> +                CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t,
> analysis->numPartitions * analysis->numCUsInFrame);
> +            }
> +            else
> +            {
> +                CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t,
> X265_MAX_CTU_SPLITS(param->maxCUSize, param->minCUSize) *
> analysis->numCUsInFrame);
> +            }
> +        }
>          CHECKED_MALLOC_ZERO(interData->mvpIdx[0], uint8_t,
> analysis->numPartitions * analysis->numCUsInFrame);
>          CHECKED_MALLOC_ZERO(interData->mvpIdx[1], uint8_t,
> analysis->numPartitions * analysis->numCUsInFrame);
>          CHECKED_MALLOC_ZERO(interData->mv[0], x265_analysis_MV,
> analysis->numPartitions * analysis->numCUsInFrame);
> @@ -919,7 +937,9 @@ void x265_free_analysis_data(x265_param *param,
> x265_analysis_data* analysis)
>              X265_FREE((analysis->intraData)->partSizes);
>              X265_FREE((analysis->intraData)->chromaModes);
>              if (param->rc.cuTree)
> -                X265_FREE((analysis->intraData)->cuQPOff);
> +            {
> +                X265_FREE_ZERO((analysis->intraData)->cuQPOff);
> +            }
>          }
>          X265_FREE(analysis->intraData);
>          analysis->intraData = NULL;
> @@ -931,7 +951,9 @@ void x265_free_analysis_data(x265_param *param,
> x265_analysis_data* analysis)
>          X265_FREE((analysis->interData)->depth);
>          X265_FREE((analysis->interData)->modes);
>          if (!isMultiPassOpt && param->rc.cuTree)
> +        {
>              X265_FREE((analysis->interData)->cuQPOff);
> +        }
>          X265_FREE((analysis->interData)->mvpIdx[0]);
>          X265_FREE((analysis->interData)->mvpIdx[1]);
>          X265_FREE((analysis->interData)->mv[0]);
> diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp
> index 1f710e1ce..63c244ace 100644
> --- a/source/encoder/encoder.cpp
> +++ b/source/encoder/encoder.cpp
> @@ -4444,6 +4444,17 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
>              }
>          }
>      }
> +
> +    int8_t *cuQPBuf = NULL;
> +    uint32_t reuseBufSize = 0;
> +    if (m_param->rc.cuTree)
> +    {
> +        if (m_param->analysisLoadReuseLevel == 10)
> +            cuQPBuf = X265_MALLOC(int8_t, depthBytes);
> +        else if (m_param->analysisLoadReuseLevel > 1)
> +            reuseBufSize = X265_MAX_CTU_SPLITS(m_param->maxCUSize,
> m_param->minCUSize) * analysis->numCUsInFrame;
> +    }
> +
>      if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType ==
> X265_TYPE_I)
>      {
>          if (m_param->bAnalysisType == HEVC_INFO)
> @@ -4452,19 +4463,26 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
>              return;
>
>          uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
> *partSizes = NULL;
> -        int8_t *cuQPBuf = NULL;
>
>          tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);
>          depthBuf = tempBuf;
>          modeBuf = tempBuf + depthBytes;
>          partSizes = tempBuf + 2 * depthBytes;
> -        if (m_param->rc.cuTree)
> -            cuQPBuf = X265_MALLOC(int8_t, depthBytes);
>
>          X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, intraPic->depth);
>          X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, intraPic->chromaModes);
>          X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, intraPic->partSizes);
> -        if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
> depthBytes, m_analysisFileIn, intraPic->cuQPOff); }
> +        if (m_param->rc.cuTree)
> +        {
> +            if (m_param->analysisLoadReuseLevel == 10)
> +            {
> +                X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
> m_analysisFileIn, intraPic->cuQPOff);
> +            }
> +            else if (m_param->analysisLoadReuseLevel > 1)
> +            {
> +                X265_FREAD(analysis->intraData->cuQPOff, sizeof(int8_t),
> reuseBufSize, m_analysisFileIn, intraPic->cuQPOff);
> +            }
> +        }
>
>          size_t count = 0;
>          for (uint32_t d = 0; d < depthBytes; d++)
> @@ -4480,7 +4498,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
>              memset(&(analysis->intraData)->depth[count], depthBuf[d],
> bytes);
>              memset(&(analysis->intraData)->chromaModes[count],
> modeBuf[d], bytes);
>              memset(&(analysis->intraData)->partSizes[count],
> partSizes[d], bytes);
> -            if (m_param->rc.cuTree)
> +            if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel ==
> 10)
>                  memset(&(analysis->intraData)->cuQPOff[count],
> cuQPBuf[d], bytes);
>              count += bytes;
>          }
> @@ -4497,7 +4515,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
>                  memset(&(analysis->intraData)->modes[cnt],
> tempLumaBuf[ctu32Idx], factor);
>              X265_FREE(tempLumaBuf);
>          }
> -        if (m_param->rc.cuTree)
> +        if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel == 10)
>              X265_FREE(cuQPBuf);
>          X265_FREE(tempBuf);
>          consumedBytes += frameRecordSize;
> @@ -4515,7 +4533,6 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
>          uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];
>          MV* mv[2];
>          int8_t* refIdx[2];
> -        int8_t* cuQPBuf = NULL;
>
>          int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2;
>          bool bIntraInInter = false;
> @@ -4535,12 +4552,20 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
>              tempBuf = X265_MALLOC(uint8_t, depthBytes * numBuf);
>              depthBuf = tempBuf;
>              modeBuf = tempBuf + depthBytes;
> -            if (m_param->rc.cuTree)
> -                cuQPBuf = X265_MALLOC(int8_t, depthBytes);
>
>              X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, interPic->depth);
>              X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, interPic->modes);
> -            if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
> depthBytes, m_analysisFileIn, interPic->cuQPOff); }
> +            if (m_param->rc.cuTree)
> +            {
> +                if (m_param->analysisLoadReuseLevel == 10)
> +                {
> +                    X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
> m_analysisFileIn, interPic->cuQPOff);
> +                }
> +                else if (m_param->analysisLoadReuseLevel > 1)
> +                {
> +                    X265_FREAD(analysis->interData->cuQPOff,
> sizeof(int8_t), reuseBufSize, m_analysisFileIn, interPic->cuQPOff);
> +                }
> +            }
>
>              if (m_param->analysisLoadReuseLevel > 4)
>              {
> @@ -4578,7 +4603,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
>                      depthBuf[d] = 1;
>                  memset(&(analysis->interData)->depth[count], depthBuf[d],
> bytes);
>                  memset(&(analysis->interData)->modes[count], modeBuf[d],
> bytes);
> -                if (m_param->rc.cuTree)
> +                if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel
> == 10)
>                      memset(&(analysis->interData)->cuQPOff[count],
> cuQPBuf[d], bytes);
>                  if (m_param->analysisLoadReuseLevel > 4)
>                  {
> @@ -4612,7 +4637,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
>                  count += bytes;
>              }
>
> -            if (m_param->rc.cuTree)
> +            if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel ==
> 10)
>                  X265_FREE(cuQPBuf);
>              X265_FREE(tempBuf);
>          }
> @@ -4736,7 +4761,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
>      int numPartitions = analysis->numPartitions;
>      int numCUsInFrame = analysis->numCUsInFrame;
>      int numCuInHeight = analysis->numCuInHeight;
> -    /* Allocate memory for scaled resoultion's numPartitions and
> numCUsInFrame*/
> +    /* Allocate memory for scaled resolution's numPartitions and
> numCUsInFrame */
>      analysis->numPartitions = m_param->num4x4Partitions;
>      analysis->numCUsInFrame = cuLoc.heightInCU * cuLoc.widthInCU;
>      analysis->numCuInHeight = cuLoc.heightInCU;
> @@ -4808,25 +4833,42 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
>          X265_FREE(vbvCostBuf);
>      }
>
> +    uint32_t reuseBufSize = 0;
> +    int8_t *cuQPBuf = NULL;
> +    if (m_param->rc.cuTree)
> +    {
> +        if (m_param->analysisLoadReuseLevel == 10)
> +            cuQPBuf = X265_MALLOC(int8_t, depthBytes);
> +        else if (m_param->analysisLoadReuseLevel > 1)
> +            reuseBufSize = (X265_MAX_CTU_SPLITS(m_param->maxCUSize,
> m_param->minCUSize) / factor) * (analysis->numCUsInFrame);
> +    }
> +
>      if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType ==
> X265_TYPE_I)
>      {
>          if (m_param->analysisLoadReuseLevel < 2)
>              return;
>
>          uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
> *partSizes = NULL;
> -        int8_t *cuQPBuf = NULL;
>
>          tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);
>          depthBuf = tempBuf;
>          modeBuf = tempBuf + depthBytes;
>          partSizes = tempBuf + 2 * depthBytes;
> -        if (m_param->rc.cuTree)
> -            cuQPBuf = X265_MALLOC(int8_t, depthBytes);
>
>          X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, intraPic->depth);
>          X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, intraPic->chromaModes);
>          X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, intraPic->partSizes);
> -        if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
> depthBytes, m_analysisFileIn, intraPic->cuQPOff); }
> +        if (m_param->rc.cuTree)
> +        {
> +            if (m_param->analysisLoadReuseLevel == 10)
> +            {
> +                X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize,
> m_analysisFileIn, intraPic->cuQPOff);
> +            }
> +            else if (m_param->analysisLoadReuseLevel > 1)
> +            {
> +                X265_FREAD(&(analysis->intraData)->cuQPOff,
> sizeof(int8_t), reuseBufSize, m_analysisFileIn, intraPic->cuQPOff);
> +            }
> +        }
>
>          uint32_t count = 0;
>          for (uint32_t d = 0; d < depthBytes; d++)
> @@ -4848,7 +4890,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
>                  memset(&(analysis->intraData)->depth[count], depthBuf[d],
> bytes);
>                  memset(&(analysis->intraData)->chromaModes[count],
> modeBuf[d], bytes);
>                  memset(&(analysis->intraData)->partSizes[count],
> partSizes[d], bytes);
> -                if (m_param->rc.cuTree)
> +                if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel
> == 10)
>                      memset(&(analysis->intraData)->cuQPOff[count],
> cuQPBuf[d], bytes);
>                  count += bytes;
>                  d += getCUIndex(&cuLoc, &count, bytes, 1);
> @@ -4868,7 +4910,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
>              ctu32Idx += getCUIndex(&cuLoc, &cnt, factor, 0);
>          }
>          X265_FREE(tempLumaBuf);
> -        if (m_param->rc.cuTree)
> +        if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel == 10)
>              X265_FREE(cuQPBuf);
>          X265_FREE(tempBuf);
>          consumedBytes += frameRecordSize;
> @@ -4886,7 +4928,6 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
>          uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];
>          MV* mv[2];
>          int8_t* refIdx[2];
> -        int8_t* cuQPBuf = NULL;
>
>          int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2;
>          bool bIntraInInter = false;
> @@ -4900,12 +4941,21 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
>          tempBuf = X265_MALLOC(uint8_t, depthBytes * numBuf);
>          depthBuf = tempBuf;
>          modeBuf = tempBuf + depthBytes;
> -        if (m_param->rc.cuTree)
> -            cuQPBuf = X265_MALLOC(int8_t, depthBytes);
>
>          X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, interPic->depth);
>          X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, interPic->modes);
> -        if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
> depthBytes, m_analysisFileIn, interPic->cuQPOff); }
> +        if (m_param->rc.cuTree)
> +        {
> +            if (m_param->analysisLoadReuseLevel == 10)
> +            {
> +                X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
> m_analysisFileIn, interPic->cuQPOff);
> +            }
> +            else if (m_param->analysisLoadReuseLevel > 1)
> +            {
> +                X265_FREAD(&(analysis->interData)->cuQPOff,
> sizeof(int8_t), reuseBufSize, m_analysisFileIn, interPic->cuQPOff);
> +            }
> +        }
> +
>          if (m_param->analysisLoadReuseLevel > 4)
>          {
>              partSize = modeBuf + depthBytes;
> @@ -4954,7 +5004,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
>              {
>                  memset(&(analysis->interData)->depth[count], writeDepth,
> bytes);
>                  memset(&(analysis->interData)->modes[count], modeBuf[d],
> bytes);
> -                if (m_param->rc.cuTree)
> +                if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel
> == 10)
>                      memset(&(analysis->interData)->cuQPOff[count],
> cuQPBuf[d], bytes);
>                  if (m_param->analysisLoadReuseLevel == 10 &&
> bIntraInInter)
>                      memset(&(analysis->intraData)->chromaModes[count],
> chromaDir[d], bytes);
> @@ -5016,7 +5066,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
>              }
>          }
>
> -        if (m_param->rc.cuTree)
> +        if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel == 10)
>              X265_FREE(cuQPBuf);
>          X265_FREE(tempBuf);
>
> @@ -5046,7 +5096,9 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
>              }
>          }
>          else
> +        {
>              X265_FREAD((analysis->interData)->ref, sizeof(int32_t),
> analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir,
> m_analysisFileIn, interPic->ref);
> +        }
>
>          consumedBytes += frameRecordSize;
>          if (numDir == 1)
> @@ -5510,8 +5562,13 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
> analysis, FrameData &curEncD
>          analysis->frameRecordSize += analysis->numCUsInFrame *
> sizeof(sse_t);
>      }
>
> +    uint32_t reuseQPBufsize = 0;
>      if (m_param->analysisSaveReuseLevel > 1)
>      {
> +        if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel == 10)
> +            reuseQPBufsize = depthBytes;
> +        else if (m_param->rc.cuTree)
> +            reuseQPBufsize = X265_MAX_CTU_SPLITS(m_param->maxCUSize,
> m_param->minCUSize) * analysis->numCUsInFrame;
>
>          if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType
> == X265_TYPE_I)
>          {
> @@ -5536,10 +5593,11 @@ void
> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>                      partSize = ctu->m_partSize[absPartIdx];
>                      intraDataCTU->partSizes[depthBytes] = partSize;
>
> -                    if (m_param->rc.cuTree)
> +                    if (m_param->rc.cuTree &&
> m_param->analysisSaveReuseLevel == 10)
>                          intraDataCTU->cuQPOff[depthBytes] =
> (int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP);
>                      absPartIdx += ctu->m_numPartitions >> (depth * 2);
>                  }
> +
>                  memcpy(&intraDataCTU->modes[ctu->m_cuAddr *
> ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)*
> ctu->m_numPartitions);
>              }
>          }
> @@ -5567,7 +5625,7 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
> analysis, FrameData &curEncD
>                          predMode = 4; // used as indicator if the block
> is coded as bidir
>
>                      interDataCTU->modes[depthBytes] = predMode;
> -                    if (m_param->rc.cuTree)
> +                    if (m_param->rc.cuTree &&
> m_param->analysisSaveReuseLevel == 10)
>                          interDataCTU->cuQPOff[depthBytes] =
> (int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP);
>
>                      if (m_param->analysisSaveReuseLevel > 4)
> @@ -5599,13 +5657,14 @@ void
> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>                      }
>                      absPartIdx += ctu->m_numPartitions >> (depth * 2);
>                  }
> +
>                  if (m_param->analysisSaveReuseLevel == 10 &&
> bIntraInInter)
>                      memcpy(&intraDataCTU->modes[ctu->m_cuAddr *
> ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)*
> ctu->m_numPartitions);
>              }
>          }
>
>          if ((analysis->sliceType == X265_TYPE_IDR || analysis->sliceType
> == X265_TYPE_I) && m_param->rc.cuTree)
> -            analysis->frameRecordSize += sizeof(uint8_t)*
> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 +
> (sizeof(int8_t) * depthBytes);
> +            analysis->frameRecordSize += sizeof(uint8_t)*
> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 +
> (sizeof(int8_t) * reuseQPBufsize);
>          else if (analysis->sliceType == X265_TYPE_IDR ||
> analysis->sliceType == X265_TYPE_I)
>              analysis->frameRecordSize += sizeof(uint8_t)*
> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3;
>          else
> @@ -5613,7 +5672,8 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
> analysis, FrameData &curEncD
>              /* Add sizeof depth, modes, partSize, cuQPOffset, mergeFlag */
>              analysis->frameRecordSize += depthBytes * 2;
>              if (m_param->rc.cuTree)
> -            analysis->frameRecordSize += (sizeof(int8_t) * depthBytes);
> +                analysis->frameRecordSize += (sizeof(int8_t) *
> reuseQPBufsize);
> +
>              if (m_param->analysisSaveReuseLevel > 4)
>                  analysis->frameRecordSize += (depthBytes * 2);
>
> @@ -5669,7 +5729,7 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
> analysis, FrameData &curEncD
>          X265_FWRITE((analysis->intraData)->chromaModes, sizeof(uint8_t),
> depthBytes, m_analysisFileOut);
>          X265_FWRITE((analysis->intraData)->partSizes, sizeof(char),
> depthBytes, m_analysisFileOut);
>          if (m_param->rc.cuTree)
> -            X265_FWRITE((analysis->intraData)->cuQPOff, sizeof(int8_t),
> depthBytes, m_analysisFileOut);
> +            X265_FWRITE((analysis->intraData)->cuQPOff, sizeof(int8_t),
> reuseQPBufsize, m_analysisFileOut);
>          X265_FWRITE((analysis->intraData)->modes, sizeof(uint8_t),
> analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileOut);
>      }
>      else
> @@ -5677,7 +5737,7 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
> analysis, FrameData &curEncD
>          X265_FWRITE((analysis->interData)->depth, sizeof(uint8_t),
> depthBytes, m_analysisFileOut);
>          X265_FWRITE((analysis->interData)->modes, sizeof(uint8_t),
> depthBytes, m_analysisFileOut);
>          if (m_param->rc.cuTree)
> -            X265_FWRITE((analysis->interData)->cuQPOff, sizeof(int8_t),
> depthBytes, m_analysisFileOut);
> +            X265_FWRITE((analysis->interData)->cuQPOff, sizeof(int8_t),
> reuseQPBufsize, m_analysisFileOut);
>          if (m_param->analysisSaveReuseLevel > 4)
>          {
>              X265_FWRITE((analysis->interData)->partSize, sizeof(uint8_t),
> depthBytes, m_analysisFileOut);
> @@ -5762,7 +5822,7 @@ void
> Encoder::writeAnalysisFileRefine(x265_analysis_data* analysis, FrameData &c
>                      interData->mv[1][depthBytes].word =
> ctu->m_mv[1][absPartIdx].word;
>                      interData->mvpIdx[1][depthBytes] =
> ctu->m_mvpIdx[1][absPartIdx];
>                      ref[1][depthBytes] = ctu->m_refIdx[1][absPartIdx];
> -                    predMode = 4; // used as indiacator if the block is
> coded as bidir
> +                    predMode = 4; // used as indicator if the block is
> coded as bidir
>                  }
>                  interData->modes[depthBytes] = predMode;
>
> diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
> index 0adb0d0db..9bee58192 100644
> --- a/source/encoder/slicetype.cpp
> +++ b/source/encoder/slicetype.cpp
> @@ -1894,7 +1894,7 @@ void Lookahead::slicetypeAnalyse(Lowres **frames,
> bool bKeyframe)
>
>      if (!framecnt)
>      {
> -        if (m_param->rc.cuTree)
> +        if (m_param->rc.cuTree && (!m_param->analysisLoad ||
> (m_param->analysisLoad && m_param->analysisLoadReuseLevel == 1)))
>              cuTree(frames, 0, bKeyframe);
>          return;
>      }
> diff --git a/source/x265.h b/source/x265.h
> index f44040ba7..8d7a75826 100644
> --- a/source/x265.h
> +++ b/source/x265.h
> @@ -144,7 +144,7 @@ typedef struct x265_analysis_intra_data
>      uint8_t*  modes;
>      char*     partSizes;
>      uint8_t*  chromaModes;
> -    int8_t*    cuQPOff;
> +    int8_t*   cuQPOff;
>  }x265_analysis_intra_data;
>
>  typedef struct x265_analysis_MV
> @@ -167,7 +167,7 @@ typedef struct x265_analysis_inter_data
>      uint8_t*    interDir;
>      uint8_t*    mvpIdx[2];
>      int8_t*     refIdx[2];
> -    x265_analysis_MV*         mv[2];
> +    x265_analysis_MV* mv[2];
>      int64_t*     sadCost;
>      int8_t*    cuQPOff;
>  }x265_analysis_inter_data;
> --
> 2.20.1.windows.1
>
>
> --
> *With Regards,*
> *Srikanth Kurapati.*
>


-- 
*With Regards,*
*Srikanth Kurapati.*
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20210128/bf08ae60/attachment-0001.html>


More information about the x265-devel mailing list