[x265] [PATCH] [Release_3.5] correct reusing cutree qp offsets in load encode for reuse-level > 1 and < 10 for same resolution

Kavitha Sampath kavitha at multicorewareinc.com
Fri Nov 20 11:02:10 CET 2020


On Tue, Nov 17, 2020 at 8:22 AM Mahesh Pittala <mahesh at multicorewareinc.com>
wrote:

> From 787ae5da7431b5d113ea033cf6502ac1cc1e7572 Mon Sep 17 00:00:00 2001
> From: maheshpittala <mahesh at multicorewareinc.com>
> Date: Sun, 1 Nov 2020 10:09:28 +0530
> Subject: [PATCH] correct reusing cutree qp offsets in load encode for
>  reuse-level > 1 and < 10 for same resolution
>
> Earlier in save encode, dumped only best modes analysis data of that CTU
> into file after encoding, not for each split CU's analysis. So in analysis
> load, it reads the same best mode's qp value even for split CU's(whereas
> split CU's qp would be different in save encode) and redo-analysis.
>
> So now, cuGeom.geomRecurId stores unique ID for each CU and even for
> parents CU so based on this storing cutree qp offset and loaded same
>
[KS] Commit message sounds informal. Suggest rephrasing

> ---
>  source/abrEncApp.cpp         |  6 +++
>  source/common/cudata.cpp     |  6 ++-
>  source/common/cudata.h       |  3 +-
>  source/encoder/analysis.cpp  | 32 ++++++++++--
>  source/encoder/api.cpp       | 12 +++++
>  source/encoder/encoder.cpp   | 97 ++++++++++++++++++++++++++++++++----
>  source/encoder/slicetype.cpp |  2 +-
>  source/x265.h                |  2 +
>  8 files changed, 140 insertions(+), 20 deletions(-)
>
> diff --git a/source/abrEncApp.cpp b/source/abrEncApp.cpp
> index cd85154f1..3550d8b11 100644
> --- a/source/abrEncApp.cpp
> +++ b/source/abrEncApp.cpp
> @@ -342,7 +342,10 @@ namespace X265_NS {
>              memcpy(intraDst->partSizes, intraSrc->partSizes, sizeof(char)
> * src->depthBytes);
>              memcpy(intraDst->chromaModes, intraSrc->chromaModes,
> sizeof(uint8_t) * src->depthBytes);
>              if (m_param->rc.cuTree)
> +            {
>                  memcpy(intraDst->cuQPOff, intraSrc->cuQPOff,
> sizeof(int8_t) * src->depthBytes);
> +                memcpy(intraDst->cuQPOffReuse, intraSrc->cuQPOffReuse,
> sizeof(int8_t) * (src->numCUsInFrame * src->numPartitions));
>
[KS] maximum number of qps saved per CTU is 85. Allocating copying
numPartition size is unnecessary

> +            }
>          }
>          else
>          {
> @@ -357,7 +360,10 @@ namespace X265_NS {
>              memcpy(interDst->depth, interSrc->depth, sizeof(uint8_t) *
> src->depthBytes);
>              memcpy(interDst->modes, interSrc->modes, sizeof(uint8_t) *
> src->depthBytes);
>              if (m_param->rc.cuTree)
> +            {
>                  memcpy(interDst->cuQPOff, interSrc->cuQPOff,
> sizeof(int8_t) * src->depthBytes);
> +                memcpy(interDst->cuQPOffReuse, interSrc->cuQPOffReuse,
> sizeof(int8_t) * (src->numCUsInFrame * src->numPartitions));
> +            }
>              if (m_param->analysisSaveReuseLevel > 4)
>              {
>                  memcpy(interDst->partSize, interSrc->partSize,
> sizeof(uint8_t) * src->depthBytes);
> diff --git a/source/common/cudata.cpp b/source/common/cudata.cpp
> index 19281dee2..08cdff11a 100644
> --- a/source/common/cudata.cpp
> +++ b/source/common/cudata.cpp
> @@ -194,6 +194,7 @@ void CUData::initialize(const CUDataMemPool& dataPool,
> uint32_t depth, const x26
>
>          m_qp        = (int8_t*)charBuf; charBuf += m_numPartitions;
>          m_qpAnalysis = (int8_t*)charBuf; charBuf += m_numPartitions;
> +        m_qpreuse    = (int8_t*)charBuf; charBuf += m_numPartitions;
>
[KS] Can you move this out of parentCTU? Would be appropriate to include it
as an Analysis class member - just like other reuse parameters such
as m_reuseRef, m_reuseDepth,..

>          m_log2CUSize         = charBuf; charBuf += m_numPartitions;
>          m_lumaIntraDir       = charBuf; charBuf += m_numPartitions;
>          m_tqBypass           = charBuf; charBuf += m_numPartitions;
> @@ -235,6 +236,7 @@ void CUData::initialize(const CUDataMemPool& dataPool,
> uint32_t depth, const x26
>
>          m_qp        = (int8_t*)charBuf; charBuf += m_numPartitions;
>          m_qpAnalysis = (int8_t*)charBuf; charBuf += m_numPartitions;
> +        m_qpreuse =    (int8_t*)charBuf; charBuf += m_numPartitions;
>          m_log2CUSize         = charBuf; charBuf += m_numPartitions;
>          m_lumaIntraDir       = charBuf; charBuf += m_numPartitions;
>          m_tqBypass           = charBuf; charBuf += m_numPartitions;
> @@ -307,7 +309,7 @@ void CUData::initCTU(const Frame& frame, uint32_t
> cuAddr, int qp, uint32_t first
>      X265_CHECK(!(frame.m_encData->m_param->bLossless &&
> !m_slice->m_pps->bTransquantBypassEnabled), "lossless enabled without
> TQbypass in PPS\n");
>
>      /* initialize the remaining CU data in one memset */
> -    memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ?
> BytesPerPartition - 12 : BytesPerPartition - 8) * m_numPartitions);
> +    memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ?
> BytesPerPartition - 13 : BytesPerPartition - 9) * m_numPartitions);
>
>      for (int8_t i = 0; i < NUM_TU_DEPTH; i++)
>          m_refTuDepth[i] = -1;
> @@ -358,7 +360,7 @@ void CUData::initSubCU(const CUData& ctu, const
> CUGeom& cuGeom, int qp)
>      m_partSet(m_cuDepth,      (uint8_t)cuGeom.depth);
>
>      /* initialize the remaining CU data in one memset */
> -    memset(m_predMode, 0, (ctu.m_chromaFormat == X265_CSP_I400 ?
> BytesPerPartition - 13 : BytesPerPartition - 9) * m_numPartitions);
> +    memset(m_predMode, 0, (ctu.m_chromaFormat == X265_CSP_I400 ?
> BytesPerPartition - 14 : BytesPerPartition - 10) * m_numPartitions);
>      memset(m_distortion, 0, m_numPartitions * sizeof(sse_t));
>  }
>
> diff --git a/source/common/cudata.h b/source/common/cudata.h
> index 8397f0568..d58f53e39 100644
> --- a/source/common/cudata.h
> +++ b/source/common/cudata.h
> @@ -192,6 +192,7 @@ public:
>      /* Per-part data, stored contiguously */
>      int8_t*       m_qp;               // array of QP values
>      int8_t*       m_qpAnalysis;       // array of QP values for analysis
> reuse
> +    int8_t*       m_qpreuse;          // array of QP values for analysis
> reuse for reuse levels > 1 and < 10
>      uint8_t*      m_log2CUSize;       // array of cu log2Size TODO: seems
> redundant to depth
>      uint8_t*      m_lumaIntraDir;     // array of intra directions (luma)
>      uint8_t*      m_tqBypass;         // array of CU lossless flags
> @@ -207,7 +208,7 @@ public:
>      uint8_t*      m_transformSkip[3]; // array of transform skipping
> flags per plane
>      uint8_t*      m_cbf[3];           // array of coded block flags (CBF)
> per plane
>      uint8_t*      m_chromaIntraDir;   // array of intra directions
> (chroma)
> -    enum { BytesPerPartition = 24 };  // combined sizeof() of all
> per-part data
> +    enum { BytesPerPartition = 25 };  // combined sizeof() of all
> per-part data
>
>      sse_t*        m_distortion;
>      coeff_t*      m_trCoeff[3];       // transformed coefficient buffer
> per plane
> diff --git a/source/encoder/analysis.cpp b/source/encoder/analysis.cpp
> index aabf386ca..b1d7e3ad1 100644
> --- a/source/encoder/analysis.cpp
> +++ b/source/encoder/analysis.cpp
> @@ -520,6 +520,9 @@ uint64_t Analysis::compressIntraCU(const CUData&
> parentCTU, const CUGeom& cuGeom
>      bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
>      bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
>
> +    if (m_param->rc.cuTree)
> +        parentCTU.m_qpreuse[cuGeom.geomRecurId] = (int8_t)qp;
> +
>      bool bAlreadyDecided = m_param->intraRefine != 4 &&
> parentCTU.m_lumaIntraDir[cuGeom.absPartIdx] != (uint8_t)ALL_IDX &&
> !(m_param->bAnalysisType == HEVC_INFO);
>      bool bDecidedDepth = m_param->intraRefine != 4 &&
> parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth;
>      int split = 0;
> @@ -870,6 +873,9 @@ uint32_t Analysis::compressInterCU_dist(const CUData&
> parentCTU, const CUGeom& c
>      uint32_t minDepth = m_param->rdLevel <= 4 ?
> topSkipMinDepth(parentCTU, cuGeom) : 0;
>      uint32_t splitRefs[4] = { 0, 0, 0, 0 };
>
> +    if (m_param->rc.cuTree)
> +        parentCTU.m_qpreuse[cuGeom.geomRecurId] = (int8_t)qp;
> +
>      X265_CHECK(m_param->rdLevel >= 2, "compressInterCU_dist does not
> support RD 0 or 1\n");
>
>      PMODE pmode(*this, cuGeom);
> @@ -1152,6 +1158,8 @@ SplitData Analysis::compressInterCU_rd0_4(const
> CUData& parentCTU, const CUGeom&
>      uint32_t cuAddr = parentCTU.m_cuAddr;
>      ModeDepth& md = m_modeDepth[depth];
>
> +    if (m_param->rc.cuTree)
> +        parentCTU.m_qpreuse[cuGeom.geomRecurId] = (int8_t)qp;
>
>      if (m_param->searchMethod == X265_SEA)
>      {
> @@ -1856,6 +1864,9 @@ SplitData Analysis::compressInterCU_rd5_6(const
> CUData& parentCTU, const CUGeom&
>      ModeDepth& md = m_modeDepth[depth];
>      md.bestMode = NULL;
>
> +    if (m_param->rc.cuTree)
> +        parentCTU.m_qpreuse[cuGeom.geomRecurId] = (int8_t)qp;
> +
>      if (m_param->searchMethod == X265_SEA)
>      {
>          int numPredDir = m_slice->isInterP() ? 1 : 2;
> @@ -3643,15 +3654,26 @@ int Analysis::calculateQpforCuSize(const CUData&
> ctu, const CUGeom& cuGeom, int3
>          if ((distortionData->threshold[ctu.m_cuAddr] < 0.9 ||
> distortionData->threshold[ctu.m_cuAddr] > 1.1)
>              && distortionData->highDistortionCtuCount &&
> distortionData->lowDistortionCtuCount)
>              qp += distortionData->offset[ctu.m_cuAddr];
> -    }
> + }
>
>      if (m_param->analysisLoadReuseLevel >= 2 && m_param->rc.cuTree)
>      {
> -        int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) +
> cuGeom.absPartIdx;
> -        if (ctu.m_slice->m_sliceType == I_SLICE)
> -            return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
> (int32_t)(qp + 0.5 +
> ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx]));
> +        if (m_param->scaleFactor == 2 || m_param->analysisLoadReuseLevel
> == 10)
> +        {
> +            int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) +
> cuGeom.absPartIdx;
> +            if (ctu.m_slice->m_sliceType == I_SLICE)
> +                return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
> (int32_t)(qp + 0.5 +
> ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx]));
> +            else
> +                return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
> (int32_t)(qp + 0.5 +
> ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx]));
> +        }
>          else
> -            return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
> (int32_t)(qp + 0.5 +
> ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx]));
> +        {
> +            int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) +
> cuGeom.geomRecurId;
> +            if (ctu.m_slice->m_sliceType == I_SLICE)
> +                return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
> (int32_t)(qp + 0.5 +
> ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOffReuse[cuIdx]));
> +            else
> +                return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
> (int32_t)(qp + 0.5 +
> ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOffReuse[cuIdx]));
> +        }
>
[KS] Why is this reuse not applicable to reuse level 1?

>      }
>      if (m_param->rc.hevcAq)
>      {
> diff --git a/source/encoder/api.cpp b/source/encoder/api.cpp
> index a986355e0..0f266d328 100644
> --- a/source/encoder/api.cpp
> +++ b/source/encoder/api.cpp
> @@ -825,7 +825,10 @@ void x265_alloc_analysis_data(x265_param *param,
> x265_analysis_data* analysis)
>          CHECKED_MALLOC_ZERO(intraData->partSizes, char,
> analysis->numPartitions * analysis->numCUsInFrame);
>          CHECKED_MALLOC_ZERO(intraData->chromaModes, uint8_t,
> analysis->numPartitions * analysis->numCUsInFrame);
>          if (param->rc.cuTree)
> +        {
>              CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t,
> analysis->numPartitions * analysis->numCUsInFrame);
> +            CHECKED_MALLOC_ZERO(intraData->cuQPOffReuse, int8_t,
> analysis->numPartitions * analysis->numCUsInFrame);
> +        }
>      }
>      analysis->intraData = intraData;
>
> @@ -837,7 +840,10 @@ void x265_alloc_analysis_data(x265_param *param,
> x265_analysis_data* analysis)
>          CHECKED_MALLOC_ZERO(interData->modes, uint8_t,
> analysis->numPartitions * analysis->numCUsInFrame);
>
>          if (param->rc.cuTree && !isMultiPassOpt)
> +        {
>              CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t,
> analysis->numPartitions * analysis->numCUsInFrame);
> +            CHECKED_MALLOC_ZERO(interData->cuQPOffReuse, int8_t,
> analysis->numPartitions * analysis->numCUsInFrame);
> +        }
>          CHECKED_MALLOC_ZERO(interData->mvpIdx[0], uint8_t,
> analysis->numPartitions * analysis->numCUsInFrame);
>          CHECKED_MALLOC_ZERO(interData->mvpIdx[1], uint8_t,
> analysis->numPartitions * analysis->numCUsInFrame);
>          CHECKED_MALLOC_ZERO(interData->mv[0], x265_analysis_MV,
> analysis->numPartitions * analysis->numCUsInFrame);
> @@ -919,7 +925,10 @@ void x265_free_analysis_data(x265_param *param,
> x265_analysis_data* analysis)
>              X265_FREE((analysis->intraData)->partSizes);
>              X265_FREE((analysis->intraData)->chromaModes);
>              if (param->rc.cuTree)
> +            {
>                  X265_FREE((analysis->intraData)->cuQPOff);
> +                X265_FREE((analysis->intraData)->cuQPOffReuse);
> +            }
>          }
>          X265_FREE(analysis->intraData);
>          analysis->intraData = NULL;
> @@ -931,7 +940,10 @@ void x265_free_analysis_data(x265_param *param,
> x265_analysis_data* analysis)
>          X265_FREE((analysis->interData)->depth);
>          X265_FREE((analysis->interData)->modes);
>          if (!isMultiPassOpt && param->rc.cuTree)
> +        {
>              X265_FREE((analysis->interData)->cuQPOff);
> +            X265_FREE((analysis->interData)->cuQPOffReuse);
> +        }
>          X265_FREE((analysis->interData)->mvpIdx[0]);
>          X265_FREE((analysis->interData)->mvpIdx[1]);
>          X265_FREE((analysis->interData)->mv[0]);
> diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp
> index 1f710e1ce..9666744f3 100644
> --- a/source/encoder/encoder.cpp
> +++ b/source/encoder/encoder.cpp
> @@ -4452,19 +4452,25 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
>              return;
>
>          uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
> *partSizes = NULL;
> -        int8_t *cuQPBuf = NULL;
> +        int8_t *cuQPBuf = NULL, *cuQPReuseBuf = NULL;
>
>          tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);
>          depthBuf = tempBuf;
>          modeBuf = tempBuf + depthBytes;
>          partSizes = tempBuf + 2 * depthBytes;
>          if (m_param->rc.cuTree)
> +        {
>              cuQPBuf = X265_MALLOC(int8_t, depthBytes);
> + cuQPReuseBuf = X265_MALLOC(int8_t, scaledNumPartition *
> analysis->numCUsInFrame);
>
[KS] Check whitespaces

> +        }
>
>          X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, intraPic->depth);
>          X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, intraPic->chromaModes);
>          X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, intraPic->partSizes);
> -        if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
> depthBytes, m_analysisFileIn, intraPic->cuQPOff); }
> +        if (m_param->rc.cuTree) {
> +            X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
> m_analysisFileIn, intraPic->cuQPOff);
> +            X265_FREAD(cuQPReuseBuf, sizeof(int8_t), (scaledNumPartition
> * analysis->numCUsInFrame), m_analysisFileIn, intraPic->cuQPOffReuse);
> +        }
>
>          size_t count = 0;
>          for (uint32_t d = 0; d < depthBytes; d++)
> @@ -4484,7 +4490,11 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
>                  memset(&(analysis->intraData)->cuQPOff[count],
> cuQPBuf[d], bytes);
>              count += bytes;
>          }
> -
> +        if (m_param->rc.cuTree)
> +        {
> + for (uint32_t i = 0; i < (scaledNumPartition * analysis->numCUsInFrame);
> i++)
> +                memset(&(analysis->intraData)->cuQPOffReuse[i],
> cuQPReuseBuf[i], sizeof(int8_t));
> +        }
>          if (!m_param->scaleFactor)
>          {
>              X265_FREAD((analysis->intraData)->modes, sizeof(uint8_t),
> numCUsLoad * analysis->numPartitions, m_analysisFileIn, intraPic->modes);
> @@ -4498,7 +4508,10 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
>              X265_FREE(tempLumaBuf);
>          }
>          if (m_param->rc.cuTree)
> +        {
>              X265_FREE(cuQPBuf);
> +            X265_FREE(cuQPReuseBuf);
> +        }
>          X265_FREE(tempBuf);
>          consumedBytes += frameRecordSize;
>      }
> @@ -4515,7 +4528,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
>          uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];
>          MV* mv[2];
>          int8_t* refIdx[2];
> -        int8_t* cuQPBuf = NULL;
> +        int8_t* cuQPBuf = NULL, *cuQPReuseBuf = NULL;
>
[KS] Why can't we reuse cuQPBuf ? I agree that the size of offsets differ
for reuse level 10 and others but that can be taken care of in allocation.

>
>          int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2;
>          bool bIntraInInter = false;
> @@ -4536,11 +4549,17 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
>              depthBuf = tempBuf;
>              modeBuf = tempBuf + depthBytes;
>              if (m_param->rc.cuTree)
> +            {
>                  cuQPBuf = X265_MALLOC(int8_t, depthBytes);
> + cuQPReuseBuf = X265_MALLOC(int8_t, scaledNumPartition *
> analysis->numCUsInFrame);
> +            }
>
>              X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, interPic->depth);
>              X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, interPic->modes);
> -            if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
> depthBytes, m_analysisFileIn, interPic->cuQPOff); }
> +            if (m_param->rc.cuTree) {
> +                X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
> m_analysisFileIn, interPic->cuQPOff);
> +                X265_FREAD(cuQPReuseBuf, sizeof(int8_t),
> (scaledNumPartition * analysis->numCUsInFrame), m_analysisFileIn,
> interPic->cuQPOffReuse);
> +            }
>
>              if (m_param->analysisLoadReuseLevel > 4)
>              {
> @@ -4611,9 +4630,17 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
>                  }
>                  count += bytes;
>              }
> +            if (m_param->rc.cuTree)
> +            {
> + for (uint32_t i = 0; i < (scaledNumPartition * analysis->numCUsInFrame);
> i++)
> +                    memset(&(analysis->interData)->cuQPOffReuse[i],
> cuQPReuseBuf[i], sizeof(int8_t));
> +            }
>
>              if (m_param->rc.cuTree)
> +            {
>                  X265_FREE(cuQPBuf);
> +                X265_FREE(cuQPReuseBuf);
> +            }
>              X265_FREE(tempBuf);
>          }
>          if (m_param->analysisLoadReuseLevel == 10)
> @@ -4814,19 +4841,26 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
>              return;
>
>          uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
> *partSizes = NULL;
> -        int8_t *cuQPBuf = NULL;
> +        int8_t *cuQPBuf = NULL, *cuQPReuseBuf = NULL;;
>
>          tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);
>          depthBuf = tempBuf;
>          modeBuf = tempBuf + depthBytes;
>          partSizes = tempBuf + 2 * depthBytes;
>          if (m_param->rc.cuTree)
> +        {
>              cuQPBuf = X265_MALLOC(int8_t, depthBytes);
> +            cuQPReuseBuf = X265_MALLOC(int8_t, (analysis->numPartitions /
> factor) * analysis->numCUsInFrame);
> +        }
>
>          X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, intraPic->depth);
>          X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, intraPic->chromaModes);
>          X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, intraPic->partSizes);
> -        if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
> depthBytes, m_analysisFileIn, intraPic->cuQPOff); }
> +        if (m_param->rc.cuTree)
> +        {
> +            X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
> m_analysisFileIn, intraPic->cuQPOff);
> +            X265_FREAD(cuQPReuseBuf, sizeof(int8_t),
> ((analysis->numPartitions / factor) * analysis->numCUsInFrame),
> m_analysisFileIn, intraPic->cuQPOffReuse);
> +        }
>
>          uint32_t count = 0;
>          for (uint32_t d = 0; d < depthBytes; d++)
> @@ -4869,7 +4903,10 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
>          }
>          X265_FREE(tempLumaBuf);
>          if (m_param->rc.cuTree)
> +        {
>              X265_FREE(cuQPBuf);
> +            X265_FREE(cuQPReuseBuf);
> +        }
>          X265_FREE(tempBuf);
>          consumedBytes += frameRecordSize;
>      }
> @@ -4886,7 +4923,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
>          uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];
>          MV* mv[2];
>          int8_t* refIdx[2];
> -        int8_t* cuQPBuf = NULL;
> +        int8_t* cuQPBuf = NULL, *cuQPReuseBuf = NULL;
>
>          int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2;
>          bool bIntraInInter = false;
> @@ -4901,11 +4938,18 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
>          depthBuf = tempBuf;
>          modeBuf = tempBuf + depthBytes;
>          if (m_param->rc.cuTree)
> +        {
>              cuQPBuf = X265_MALLOC(int8_t, depthBytes);
> +            cuQPReuseBuf = X265_MALLOC(int8_t, (analysis->numPartitions /
> factor) * analysis->numCUsInFrame);
> +        }
>
>          X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, interPic->depth);
>          X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, interPic->modes);
> -        if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
> depthBytes, m_analysisFileIn, interPic->cuQPOff); }
> +        if (m_param->rc.cuTree)
> +        {
> +            X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
> m_analysisFileIn, interPic->cuQPOff);
> +            X265_FREAD(cuQPReuseBuf, sizeof(int8_t),
> (analysis->numPartitions / factor) * analysis->numCUsInFrame,
> m_analysisFileIn, interPic->cuQPOffReuse);
> +        }
>          if (m_param->analysisLoadReuseLevel > 4)
>          {
>              partSize = modeBuf + depthBytes;
> @@ -5017,7 +5061,16 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
>          }
>
>          if (m_param->rc.cuTree)
> +        {
> +            for (uint32_t i = 0; i < ((analysis->numPartitions / factor)
> * analysis->numCUsInFrame); i++)
> +                memset(&(analysis->interData)->cuQPOffReuse[i],
> cuQPReuseBuf[i], sizeof(int8_t));
> +        }
> +
> +        if (m_param->rc.cuTree)
> +        {
>              X265_FREE(cuQPBuf);
> +            X265_FREE(cuQPReuseBuf);
> +        }
>          X265_FREE(tempBuf);
>
>          if (m_param->analysisLoadReuseLevel == 10)
> @@ -5540,6 +5593,12 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
> analysis, FrameData &curEncD
>                          intraDataCTU->cuQPOff[depthBytes] =
> (int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP);
>                      absPartIdx += ctu->m_numPartitions >> (depth * 2);
>                  }
> +
> +                if (m_param->rc.cuTree)
> +                {
> +                    for (uint32_t i = (cuAddr * ctu->m_numPartitions), j
> = 0; j < ctu->m_numPartitions; i++, j++)
> +                        intraDataCTU->cuQPOffReuse[i] =
> (int8_t)(ctu->m_qpreuse[j] - baseQP);
> +                }
>                  memcpy(&intraDataCTU->modes[ctu->m_cuAddr *
> ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)*
> ctu->m_numPartitions);
>              }
>          }
> @@ -5599,13 +5658,20 @@ void
> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>                      }
>                      absPartIdx += ctu->m_numPartitions >> (depth * 2);
>                  }
> +
> +                if (m_param->rc.cuTree)
> +                {
> +                    for (uint32_t i = (cuAddr * ctu->m_numPartitions), j
> = 0; j < ctu->m_numPartitions; i++, j++)
> +                        interDataCTU->cuQPOffReuse[i] =
> (int8_t)(ctu->m_qpreuse[j] - baseQP);
> +                }
> +
>                  if (m_param->analysisSaveReuseLevel == 10 &&
> bIntraInInter)
>                      memcpy(&intraDataCTU->modes[ctu->m_cuAddr *
> ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)*
> ctu->m_numPartitions);
>              }
>          }
>
>          if ((analysis->sliceType == X265_TYPE_IDR || analysis->sliceType
> == X265_TYPE_I) && m_param->rc.cuTree)
> -            analysis->frameRecordSize += sizeof(uint8_t)*
> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 +
> (sizeof(int8_t) * depthBytes);
> +            analysis->frameRecordSize += sizeof(uint8_t)*
> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 +
> (sizeof(int8_t) * depthBytes) + (sizeof(int8_t) * analysis->numPartitions
>  * analysis->numCUsInFrame);
>          else if (analysis->sliceType == X265_TYPE_IDR ||
> analysis->sliceType == X265_TYPE_I)
>              analysis->frameRecordSize += sizeof(uint8_t)*
> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3;
>          else
> @@ -5613,7 +5679,10 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
> analysis, FrameData &curEncD
>              /* Add sizeof depth, modes, partSize, cuQPOffset, mergeFlag */
>              analysis->frameRecordSize += depthBytes * 2;
>              if (m_param->rc.cuTree)
> -            analysis->frameRecordSize += (sizeof(int8_t) * depthBytes);
> +            {
> +                analysis->frameRecordSize += (sizeof(int8_t) *
> depthBytes);
> +                analysis->frameRecordSize += (sizeof(int8_t) *
> analysis->numPartitions * analysis->numCUsInFrame);
> +            }
>              if (m_param->analysisSaveReuseLevel > 4)
>                  analysis->frameRecordSize += (depthBytes * 2);
>
> @@ -5669,7 +5738,10 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
> analysis, FrameData &curEncD
>          X265_FWRITE((analysis->intraData)->chromaModes, sizeof(uint8_t),
> depthBytes, m_analysisFileOut);
>          X265_FWRITE((analysis->intraData)->partSizes, sizeof(char),
> depthBytes, m_analysisFileOut);
>          if (m_param->rc.cuTree)
> +        {
>              X265_FWRITE((analysis->intraData)->cuQPOff, sizeof(int8_t),
> depthBytes, m_analysisFileOut);
> +            X265_FWRITE((analysis->intraData)->cuQPOffReuse,
> sizeof(int8_t), (analysis->numCUsInFrame * analysis->numPartitions),
> m_analysisFileOut);
> +        }
>          X265_FWRITE((analysis->intraData)->modes, sizeof(uint8_t),
> analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileOut);
>      }
>      else
> @@ -5677,7 +5749,10 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
> analysis, FrameData &curEncD
>          X265_FWRITE((analysis->interData)->depth, sizeof(uint8_t),
> depthBytes, m_analysisFileOut);
>          X265_FWRITE((analysis->interData)->modes, sizeof(uint8_t),
> depthBytes, m_analysisFileOut);
>          if (m_param->rc.cuTree)
> +        {
>              X265_FWRITE((analysis->interData)->cuQPOff, sizeof(int8_t),
> depthBytes, m_analysisFileOut);
> +            X265_FWRITE((analysis->interData)->cuQPOffReuse,
> sizeof(int8_t), (analysis->numCUsInFrame * analysis->numPartitions),
> m_analysisFileOut);
> +        }
>          if (m_param->analysisSaveReuseLevel > 4)
>          {
>              X265_FWRITE((analysis->interData)->partSize, sizeof(uint8_t),
> depthBytes, m_analysisFileOut);
> diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
> index 0adb0d0db..3bc01268b 100644
> --- a/source/encoder/slicetype.cpp
> +++ b/source/encoder/slicetype.cpp
> @@ -1894,7 +1894,7 @@ void Lookahead::slicetypeAnalyse(Lowres **frames,
> bool bKeyframe)
>
>      if (!framecnt)
>      {
> -        if (m_param->rc.cuTree)
> +        if (m_param->rc.cuTree && !m_param->analysisLoad)
>              cuTree(frames, 0, bKeyframe);
>          return;
>      }
> diff --git a/source/x265.h b/source/x265.h
> index f44040ba7..d6a828539 100644
> --- a/source/x265.h
> +++ b/source/x265.h
> @@ -145,6 +145,7 @@ typedef struct x265_analysis_intra_data
>      char*     partSizes;
>      uint8_t*  chromaModes;
>      int8_t*    cuQPOff;
> +    int8_t*   cuQPOffReuse;
>  }x265_analysis_intra_data;
>
>  typedef struct x265_analysis_MV
> @@ -170,6 +171,7 @@ typedef struct x265_analysis_inter_data
>      x265_analysis_MV*         mv[2];
>      int64_t*     sadCost;
>      int8_t*    cuQPOff;
> +    int8_t*    cuQPOffReuse;
>  }x265_analysis_inter_data;
>
>  typedef struct x265_weight_param
> --
> 2.23.0.windows.1
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>


-- 
Regards,
Kavitha
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20201120/df2ef79d/attachment-0001.html>


More information about the x265-devel mailing list