[x265] [PATCH] [Release_3.5] correct reusing cutree qp offsets in load encode for reuse-level > 1 and < 10 for same resolution

Srikanth Kurapati srikanth.kurapati at multicorewareinc.com
Fri Jan 8 05:25:36 UTC 2021


On Fri, Nov 20, 2020 at 3:32 PM Kavitha Sampath <
kavitha at multicorewareinc.com> wrote:

>
>
> On Tue, Nov 17, 2020 at 8:22 AM Mahesh Pittala <
> mahesh at multicorewareinc.com> wrote:
>
>> From 787ae5da7431b5d113ea033cf6502ac1cc1e7572 Mon Sep 17 00:00:00 2001
>> From: maheshpittala <mahesh at multicorewareinc.com>
>> Date: Sun, 1 Nov 2020 10:09:28 +0530
>> Subject: [PATCH] correct reusing cutree qp offsets in load encode for
>>  reuse-level > 1 and < 10 for same resolution
>>
>> Earlier in save encode, dumped only best modes analysis data of that CTU
>> into file after encoding, not for each split CU's analysis. So in analysis
>> load, it reads the same best mode's qp value even for split CU's(whereas
>> split CU's qp would be different in save encode) and redo-analysis.
>>
>> So now, cuGeom.geomRecurId stores unique ID for each CU and even for
>> parents CU so based on this storing cutree qp offset and loaded same
>>
> [KS] Commit message sounds informal. Suggest rephrasing
>
    [SK] Addressed the same.

> ---
>>  source/abrEncApp.cpp         |  6 +++
>>  source/common/cudata.cpp     |  6 ++-
>>  source/common/cudata.h       |  3 +-
>>  source/encoder/analysis.cpp  | 32 ++++++++++--
>>  source/encoder/api.cpp       | 12 +++++
>>  source/encoder/encoder.cpp   | 97 ++++++++++++++++++++++++++++++++----
>>  source/encoder/slicetype.cpp |  2 +-
>>  source/x265.h                |  2 +
>>  8 files changed, 140 insertions(+), 20 deletions(-)
>>
>> diff --git a/source/abrEncApp.cpp b/source/abrEncApp.cpp
>> index cd85154f1..3550d8b11 100644
>> --- a/source/abrEncApp.cpp
>> +++ b/source/abrEncApp.cpp
>> @@ -342,7 +342,10 @@ namespace X265_NS {
>>              memcpy(intraDst->partSizes, intraSrc->partSizes,
>> sizeof(char) * src->depthBytes);
>>              memcpy(intraDst->chromaModes, intraSrc->chromaModes,
>> sizeof(uint8_t) * src->depthBytes);
>>              if (m_param->rc.cuTree)
>> +            {
>>                  memcpy(intraDst->cuQPOff, intraSrc->cuQPOff,
>> sizeof(int8_t) * src->depthBytes);
>> +                memcpy(intraDst->cuQPOffReuse, intraSrc->cuQPOffReuse,
>> sizeof(int8_t) * (src->numCUsInFrame * src->numPartitions));
>>
> [KS] maximum number of qp's saved per CTU is 85. Allocating copying
> numPartition size is unnecessary
>
    [SK] Agreed. Fixed the same.

> +            }
>>          }
>>          else
>>          {
>> @@ -357,7 +360,10 @@ namespace X265_NS {
>>              memcpy(interDst->depth, interSrc->depth, sizeof(uint8_t) *
>> src->depthBytes);
>>              memcpy(interDst->modes, interSrc->modes, sizeof(uint8_t) *
>> src->depthBytes);
>>              if (m_param->rc.cuTree)
>> +            {
>>                  memcpy(interDst->cuQPOff, interSrc->cuQPOff,
>> sizeof(int8_t) * src->depthBytes);
>> +                memcpy(interDst->cuQPOffReuse, interSrc->cuQPOffReuse,
>> sizeof(int8_t) * (src->numCUsInFrame * src->numPartitions));
>> +            }
>>              if (m_param->analysisSaveReuseLevel > 4)
>>              {
>>                  memcpy(interDst->partSize, interSrc->partSize,
>> sizeof(uint8_t) * src->depthBytes);
>> diff --git a/source/common/cudata.cpp b/source/common/cudata.cpp
>> index 19281dee2..08cdff11a 100644
>> --- a/source/common/cudata.cpp
>> +++ b/source/common/cudata.cpp
>> @@ -194,6 +194,7 @@ void CUData::initialize(const CUDataMemPool&
>> dataPool, uint32_t depth, const x26
>>
>>          m_qp        = (int8_t*)charBuf; charBuf += m_numPartitions;
>>          m_qpAnalysis = (int8_t*)charBuf; charBuf += m_numPartitions;
>> +        m_qpreuse    = (int8_t*)charBuf; charBuf += m_numPartitions;
>>
> [KS] Can you move this out of parentCTU? Would be appropriate to include
> it as an Analysis class member - just like other reuse parameters such
> as m_reuseRef, m_reuseDepth,..
>
   [SK] addressed the same so that cudata mem pool can be used for other
purposes. We will store the offsets only in the frame's analysis data
structures.

>
>
>>          m_log2CUSize         = charBuf; charBuf += m_numPartitions;
>>          m_lumaIntraDir       = charBuf; charBuf += m_numPartitions;
>>          m_tqBypass           = charBuf; charBuf += m_numPartitions;
>> @@ -235,6 +236,7 @@ void CUData::initialize(const CUDataMemPool&
>> dataPool, uint32_t depth, const x26
>>
>>          m_qp        = (int8_t*)charBuf; charBuf += m_numPartitions;
>>          m_qpAnalysis = (int8_t*)charBuf; charBuf += m_numPartitions;
>> +        m_qpreuse =    (int8_t*)charBuf; charBuf += m_numPartitions;
>>          m_log2CUSize         = charBuf; charBuf += m_numPartitions;
>>          m_lumaIntraDir       = charBuf; charBuf += m_numPartitions;
>>          m_tqBypass           = charBuf; charBuf += m_numPartitions;
>> @@ -307,7 +309,7 @@ void CUData::initCTU(const Frame& frame, uint32_t
>> cuAddr, int qp, uint32_t first
>>      X265_CHECK(!(frame.m_encData->m_param->bLossless &&
>> !m_slice->m_pps->bTransquantBypassEnabled), "lossless enabled without
>> TQbypass in PPS\n");
>>
>>      /* initialize the remaining CU data in one memset */
>> -    memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ?
>> BytesPerPartition - 12 : BytesPerPartition - 8) * m_numPartitions);
>> +    memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ?
>> BytesPerPartition - 13 : BytesPerPartition - 9) * m_numPartitions);
>>
>>      for (int8_t i = 0; i < NUM_TU_DEPTH; i++)
>>          m_refTuDepth[i] = -1;
>> @@ -358,7 +360,7 @@ void CUData::initSubCU(const CUData& ctu, const
>> CUGeom& cuGeom, int qp)
>>      m_partSet(m_cuDepth,      (uint8_t)cuGeom.depth);
>>
>>      /* initialize the remaining CU data in one memset */
>> -    memset(m_predMode, 0, (ctu.m_chromaFormat == X265_CSP_I400 ?
>> BytesPerPartition - 13 : BytesPerPartition - 9) * m_numPartitions);
>> +    memset(m_predMode, 0, (ctu.m_chromaFormat == X265_CSP_I400 ?
>> BytesPerPartition - 14 : BytesPerPartition - 10) * m_numPartitions);
>>      memset(m_distortion, 0, m_numPartitions * sizeof(sse_t));
>>  }
>>
>> diff --git a/source/common/cudata.h b/source/common/cudata.h
>> index 8397f0568..d58f53e39 100644
>> --- a/source/common/cudata.h
>> +++ b/source/common/cudata.h
>> @@ -192,6 +192,7 @@ public:
>>      /* Per-part data, stored contiguously */
>>      int8_t*       m_qp;               // array of QP values
>>      int8_t*       m_qpAnalysis;       // array of QP values for analysis
>> reuse
>> +    int8_t*       m_qpreuse;          // array of QP values for analysis
>> reuse for reuse levels > 1 and < 10
>>      uint8_t*      m_log2CUSize;       // array of cu log2Size TODO:
>> seems redundant to depth
>>      uint8_t*      m_lumaIntraDir;     // array of intra directions (luma)
>>      uint8_t*      m_tqBypass;         // array of CU lossless flags
>> @@ -207,7 +208,7 @@ public:
>>      uint8_t*      m_transformSkip[3]; // array of transform skipping
>> flags per plane
>>      uint8_t*      m_cbf[3];           // array of coded block flags
>> (CBF) per plane
>>      uint8_t*      m_chromaIntraDir;   // array of intra directions
>> (chroma)
>> -    enum { BytesPerPartition = 24 };  // combined sizeof() of all
>> per-part data
>> +    enum { BytesPerPartition = 25 };  // combined sizeof() of all
>> per-part data
>>
>>      sse_t*        m_distortion;
>>      coeff_t*      m_trCoeff[3];       // transformed coefficient buffer
>> per plane
>> diff --git a/source/encoder/analysis.cpp b/source/encoder/analysis.cpp
>> index aabf386ca..b1d7e3ad1 100644
>> --- a/source/encoder/analysis.cpp
>> +++ b/source/encoder/analysis.cpp
>> @@ -520,6 +520,9 @@ uint64_t Analysis::compressIntraCU(const CUData&
>> parentCTU, const CUGeom& cuGeom
>>      bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
>>      bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
>>
>> +    if (m_param->rc.cuTree)
>> +        parentCTU.m_qpreuse[cuGeom.geomRecurId] = (int8_t)qp;
>> +
>>      bool bAlreadyDecided = m_param->intraRefine != 4 &&
>> parentCTU.m_lumaIntraDir[cuGeom.absPartIdx] != (uint8_t)ALL_IDX &&
>> !(m_param->bAnalysisType == HEVC_INFO);
>>      bool bDecidedDepth = m_param->intraRefine != 4 &&
>> parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth;
>>      int split = 0;
>> @@ -870,6 +873,9 @@ uint32_t Analysis::compressInterCU_dist(const CUData&
>> parentCTU, const CUGeom& c
>>      uint32_t minDepth = m_param->rdLevel <= 4 ?
>> topSkipMinDepth(parentCTU, cuGeom) : 0;
>>      uint32_t splitRefs[4] = { 0, 0, 0, 0 };
>>
>> +    if (m_param->rc.cuTree)
>> +        parentCTU.m_qpreuse[cuGeom.geomRecurId] = (int8_t)qp;
>> +
>>      X265_CHECK(m_param->rdLevel >= 2, "compressInterCU_dist does not
>> support RD 0 or 1\n");
>>
>>      PMODE pmode(*this, cuGeom);
>> @@ -1152,6 +1158,8 @@ SplitData Analysis::compressInterCU_rd0_4(const
>> CUData& parentCTU, const CUGeom&
>>      uint32_t cuAddr = parentCTU.m_cuAddr;
>>      ModeDepth& md = m_modeDepth[depth];
>>
>> +    if (m_param->rc.cuTree)
>> +        parentCTU.m_qpreuse[cuGeom.geomRecurId] = (int8_t)qp;
>>
>>      if (m_param->searchMethod == X265_SEA)
>>      {
>> @@ -1856,6 +1864,9 @@ SplitData Analysis::compressInterCU_rd5_6(const
>> CUData& parentCTU, const CUGeom&
>>      ModeDepth& md = m_modeDepth[depth];
>>      md.bestMode = NULL;
>>
>> +    if (m_param->rc.cuTree)
>> +        parentCTU.m_qpreuse[cuGeom.geomRecurId] = (int8_t)qp;
>> +
>>      if (m_param->searchMethod == X265_SEA)
>>      {
>>          int numPredDir = m_slice->isInterP() ? 1 : 2;
>> @@ -3643,15 +3654,26 @@ int Analysis::calculateQpforCuSize(const CUData&
>> ctu, const CUGeom& cuGeom, int3
>>          if ((distortionData->threshold[ctu.m_cuAddr] < 0.9 ||
>> distortionData->threshold[ctu.m_cuAddr] > 1.1)
>>              && distortionData->highDistortionCtuCount &&
>> distortionData->lowDistortionCtuCount)
>>              qp += distortionData->offset[ctu.m_cuAddr];
>> -    }
>> + }
>>
>>      if (m_param->analysisLoadReuseLevel >= 2 && m_param->rc.cuTree)
>>      {
>> -        int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) +
>> cuGeom.absPartIdx;
>> -        if (ctu.m_slice->m_sliceType == I_SLICE)
>> -            return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
>> (int32_t)(qp + 0.5 +
>> ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx]));
>> +        if (m_param->scaleFactor == 2 || m_param->analysisLoadReuseLevel
>> == 10)
>> +        {
>> +            int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) +
>> cuGeom.absPartIdx;
>> +            if (ctu.m_slice->m_sliceType == I_SLICE)
>> +                return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
>> (int32_t)(qp + 0.5 +
>> ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx]));
>> +            else
>> +                return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
>> (int32_t)(qp + 0.5 +
>> ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx]));
>> +        }
>>          else
>> -            return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
>> (int32_t)(qp + 0.5 +
>> ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx]));
>> +        {
>> +            int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) +
>> cuGeom.geomRecurId;
>> +            if (ctu.m_slice->m_sliceType == I_SLICE)
>> +                return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
>> (int32_t)(qp + 0.5 +
>> ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOffReuse[cuIdx]));
>> +            else
>> +                return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
>> (int32_t)(qp + 0.5 +
>> ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOffReuse[cuIdx]));
>> +        }
>>
> [KS] Why is this reuse not applicable to reuse level 1?
>
   [SK] Not sure of the improvements or gain in this case. Since this is a
general question , we  will be tracking this and other improvements
possible for multipass encoding as a separate action item under
  x265-Story - 1059.

>      }
>>      if (m_param->rc.hevcAq)
>>      {
>> diff --git a/source/encoder/api.cpp b/source/encoder/api.cpp
>> index a986355e0..0f266d328 100644
>> --- a/source/encoder/api.cpp
>> +++ b/source/encoder/api.cpp
>> @@ -825,7 +825,10 @@ void x265_alloc_analysis_data(x265_param *param,
>> x265_analysis_data* analysis)
>>          CHECKED_MALLOC_ZERO(intraData->partSizes, char,
>> analysis->numPartitions * analysis->numCUsInFrame);
>>          CHECKED_MALLOC_ZERO(intraData->chromaModes, uint8_t,
>> analysis->numPartitions * analysis->numCUsInFrame);
>>          if (param->rc.cuTree)
>> +        {
>>              CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t,
>> analysis->numPartitions * analysis->numCUsInFrame);
>> +            CHECKED_MALLOC_ZERO(intraData->cuQPOffReuse, int8_t,
>> analysis->numPartitions * analysis->numCUsInFrame);
>> +        }
>>      }
>>      analysis->intraData = intraData;
>>
>> @@ -837,7 +840,10 @@ void x265_alloc_analysis_data(x265_param *param,
>> x265_analysis_data* analysis)
>>          CHECKED_MALLOC_ZERO(interData->modes, uint8_t,
>> analysis->numPartitions * analysis->numCUsInFrame);
>>
>>          if (param->rc.cuTree && !isMultiPassOpt)
>> +        {
>>              CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t,
>> analysis->numPartitions * analysis->numCUsInFrame);
>> +            CHECKED_MALLOC_ZERO(interData->cuQPOffReuse, int8_t,
>> analysis->numPartitions * analysis->numCUsInFrame);
>> +        }
>>          CHECKED_MALLOC_ZERO(interData->mvpIdx[0], uint8_t,
>> analysis->numPartitions * analysis->numCUsInFrame);
>>          CHECKED_MALLOC_ZERO(interData->mvpIdx[1], uint8_t,
>> analysis->numPartitions * analysis->numCUsInFrame);
>>          CHECKED_MALLOC_ZERO(interData->mv[0], x265_analysis_MV,
>> analysis->numPartitions * analysis->numCUsInFrame);
>> @@ -919,7 +925,10 @@ void x265_free_analysis_data(x265_param *param,
>> x265_analysis_data* analysis)
>>              X265_FREE((analysis->intraData)->partSizes);
>>              X265_FREE((analysis->intraData)->chromaModes);
>>              if (param->rc.cuTree)
>> +            {
>>                  X265_FREE((analysis->intraData)->cuQPOff);
>> +                X265_FREE((analysis->intraData)->cuQPOffReuse);
>> +            }
>>          }
>>          X265_FREE(analysis->intraData);
>>          analysis->intraData = NULL;
>> @@ -931,7 +940,10 @@ void x265_free_analysis_data(x265_param *param,
>> x265_analysis_data* analysis)
>>          X265_FREE((analysis->interData)->depth);
>>          X265_FREE((analysis->interData)->modes);
>>          if (!isMultiPassOpt && param->rc.cuTree)
>> +        {
>>              X265_FREE((analysis->interData)->cuQPOff);
>> +            X265_FREE((analysis->interData)->cuQPOffReuse);
>> +        }
>>          X265_FREE((analysis->interData)->mvpIdx[0]);
>>          X265_FREE((analysis->interData)->mvpIdx[1]);
>>          X265_FREE((analysis->interData)->mv[0]);
>> diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp
>> index 1f710e1ce..9666744f3 100644
>> --- a/source/encoder/encoder.cpp
>> +++ b/source/encoder/encoder.cpp
>> @@ -4452,19 +4452,25 @@ void
>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>>              return;
>>
>>          uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
>> *partSizes = NULL;
>> -        int8_t *cuQPBuf = NULL;
>> +        int8_t *cuQPBuf = NULL, *cuQPReuseBuf = NULL;
>>
>>          tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);
>>          depthBuf = tempBuf;
>>          modeBuf = tempBuf + depthBytes;
>>          partSizes = tempBuf + 2 * depthBytes;
>>          if (m_param->rc.cuTree)
>> +        {
>>              cuQPBuf = X265_MALLOC(int8_t, depthBytes);
>> + cuQPReuseBuf = X265_MALLOC(int8_t, scaledNumPartition *
>> analysis->numCUsInFrame);
>>
> [KS] Check whitespaces
>
>> +        }
>>
>>          X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, intraPic->depth);
>>          X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, intraPic->chromaModes);
>>          X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, intraPic->partSizes);
>> -        if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
>> depthBytes, m_analysisFileIn, intraPic->cuQPOff); }
>> +        if (m_param->rc.cuTree) {
>> +            X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
>> m_analysisFileIn, intraPic->cuQPOff);
>> +            X265_FREAD(cuQPReuseBuf, sizeof(int8_t), (scaledNumPartition
>> * analysis->numCUsInFrame), m_analysisFileIn, intraPic->cuQPOffReuse);
>> +        }
>>
>>          size_t count = 0;
>>          for (uint32_t d = 0; d < depthBytes; d++)
>> @@ -4484,7 +4490,11 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>> analysis, int curPoc, const x
>>                  memset(&(analysis->intraData)->cuQPOff[count],
>> cuQPBuf[d], bytes);
>>              count += bytes;
>>          }
>> -
>> +        if (m_param->rc.cuTree)
>> +        {
>> + for (uint32_t i = 0; i < (scaledNumPartition *
>> analysis->numCUsInFrame); i++)
>> +                memset(&(analysis->intraData)->cuQPOffReuse[i],
>> cuQPReuseBuf[i], sizeof(int8_t));
>> +        }
>>          if (!m_param->scaleFactor)
>>          {
>>              X265_FREAD((analysis->intraData)->modes, sizeof(uint8_t),
>> numCUsLoad * analysis->numPartitions, m_analysisFileIn, intraPic->modes);
>> @@ -4498,7 +4508,10 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>> analysis, int curPoc, const x
>>              X265_FREE(tempLumaBuf);
>>          }
>>          if (m_param->rc.cuTree)
>> +        {
>>              X265_FREE(cuQPBuf);
>> +            X265_FREE(cuQPReuseBuf);
>> +        }
>>          X265_FREE(tempBuf);
>>          consumedBytes += frameRecordSize;
>>      }
>> @@ -4515,7 +4528,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>> analysis, int curPoc, const x
>>          uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];
>>          MV* mv[2];
>>          int8_t* refIdx[2];
>> -        int8_t* cuQPBuf = NULL;
>> +        int8_t* cuQPBuf = NULL, *cuQPReuseBuf = NULL;
>>
> [KS] Why can't we reuse cuQPBuf ? I agree that the size of offsets differ
> for reuse level 10 and others but that can be taken care of in allocation.
>
   [SK] We can use the same and also use the same buffer in analysis data
for all reuse levels. Hence optimized the memory footprint per frame.

>
>>          int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2;
>>          bool bIntraInInter = false;
>> @@ -4536,11 +4549,17 @@ void
>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>>              depthBuf = tempBuf;
>>              modeBuf = tempBuf + depthBytes;
>>              if (m_param->rc.cuTree)
>> +            {
>>                  cuQPBuf = X265_MALLOC(int8_t, depthBytes);
>> + cuQPReuseBuf = X265_MALLOC(int8_t, scaledNumPartition *
>> analysis->numCUsInFrame);
>> +            }
>>
>>              X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, interPic->depth);
>>              X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, interPic->modes);
>> -            if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf,
>> sizeof(int8_t), depthBytes, m_analysisFileIn, interPic->cuQPOff); }
>> +            if (m_param->rc.cuTree) {
>> +                X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
>> m_analysisFileIn, interPic->cuQPOff);
>> +                X265_FREAD(cuQPReuseBuf, sizeof(int8_t),
>> (scaledNumPartition * analysis->numCUsInFrame), m_analysisFileIn,
>> interPic->cuQPOffReuse);
>> +            }
>>
>>              if (m_param->analysisLoadReuseLevel > 4)
>>              {
>> @@ -4611,9 +4630,17 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>> analysis, int curPoc, const x
>>                  }
>>                  count += bytes;
>>              }
>> +            if (m_param->rc.cuTree)
>> +            {
>> + for (uint32_t i = 0; i < (scaledNumPartition *
>> analysis->numCUsInFrame); i++)
>> +                    memset(&(analysis->interData)->cuQPOffReuse[i],
>> cuQPReuseBuf[i], sizeof(int8_t));
>> +            }
>>
>>              if (m_param->rc.cuTree)
>> +            {
>>                  X265_FREE(cuQPBuf);
>> +                X265_FREE(cuQPReuseBuf);
>> +            }
>>              X265_FREE(tempBuf);
>>          }
>>          if (m_param->analysisLoadReuseLevel == 10)
>> @@ -4814,19 +4841,26 @@ void
>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>>              return;
>>
>>          uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
>> *partSizes = NULL;
>> -        int8_t *cuQPBuf = NULL;
>> +        int8_t *cuQPBuf = NULL, *cuQPReuseBuf = NULL;;
>>
>>          tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);
>>          depthBuf = tempBuf;
>>          modeBuf = tempBuf + depthBytes;
>>          partSizes = tempBuf + 2 * depthBytes;
>>          if (m_param->rc.cuTree)
>> +        {
>>              cuQPBuf = X265_MALLOC(int8_t, depthBytes);
>> +            cuQPReuseBuf = X265_MALLOC(int8_t, (analysis->numPartitions
>> / factor) * analysis->numCUsInFrame);
>> +        }
>>
>>          X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, intraPic->depth);
>>          X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, intraPic->chromaModes);
>>          X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, intraPic->partSizes);
>> -        if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
>> depthBytes, m_analysisFileIn, intraPic->cuQPOff); }
>> +        if (m_param->rc.cuTree)
>> +        {
>> +            X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
>> m_analysisFileIn, intraPic->cuQPOff);
>> +            X265_FREAD(cuQPReuseBuf, sizeof(int8_t),
>> ((analysis->numPartitions / factor) * analysis->numCUsInFrame),
>> m_analysisFileIn, intraPic->cuQPOffReuse);
>> +        }
>>
>>          uint32_t count = 0;
>>          for (uint32_t d = 0; d < depthBytes; d++)
>> @@ -4869,7 +4903,10 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>> analysis, int curPoc, const x
>>          }
>>          X265_FREE(tempLumaBuf);
>>          if (m_param->rc.cuTree)
>> +        {
>>              X265_FREE(cuQPBuf);
>> +            X265_FREE(cuQPReuseBuf);
>> +        }
>>          X265_FREE(tempBuf);
>>          consumedBytes += frameRecordSize;
>>      }
>> @@ -4886,7 +4923,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>> analysis, int curPoc, const x
>>          uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];
>>          MV* mv[2];
>>          int8_t* refIdx[2];
>> -        int8_t* cuQPBuf = NULL;
>> +        int8_t* cuQPBuf = NULL, *cuQPReuseBuf = NULL;
>>
>>          int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2;
>>          bool bIntraInInter = false;
>> @@ -4901,11 +4938,18 @@ void
>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>>          depthBuf = tempBuf;
>>          modeBuf = tempBuf + depthBytes;
>>          if (m_param->rc.cuTree)
>> +        {
>>              cuQPBuf = X265_MALLOC(int8_t, depthBytes);
>> +            cuQPReuseBuf = X265_MALLOC(int8_t, (analysis->numPartitions
>> / factor) * analysis->numCUsInFrame);
>> +        }
>>
>>          X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, interPic->depth);
>>          X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, interPic->modes);
>> -        if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
>> depthBytes, m_analysisFileIn, interPic->cuQPOff); }
>> +        if (m_param->rc.cuTree)
>> +        {
>> +            X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
>> m_analysisFileIn, interPic->cuQPOff);
>> +            X265_FREAD(cuQPReuseBuf, sizeof(int8_t),
>> (analysis->numPartitions / factor) * analysis->numCUsInFrame,
>> m_analysisFileIn, interPic->cuQPOffReuse);
>> +        }
>>          if (m_param->analysisLoadReuseLevel > 4)
>>          {
>>              partSize = modeBuf + depthBytes;
>> @@ -5017,7 +5061,16 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>> analysis, int curPoc, const x
>>          }
>>
>>          if (m_param->rc.cuTree)
>> +        {
>> +            for (uint32_t i = 0; i < ((analysis->numPartitions / factor)
>> * analysis->numCUsInFrame); i++)
>> +                memset(&(analysis->interData)->cuQPOffReuse[i],
>> cuQPReuseBuf[i], sizeof(int8_t));
>> +        }
>> +
>> +        if (m_param->rc.cuTree)
>> +        {
>>              X265_FREE(cuQPBuf);
>> +            X265_FREE(cuQPReuseBuf);
>> +        }
>>          X265_FREE(tempBuf);
>>
>>          if (m_param->analysisLoadReuseLevel == 10)
>> @@ -5540,6 +5593,12 @@ void
>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>>                          intraDataCTU->cuQPOff[depthBytes] =
>> (int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP);
>>                      absPartIdx += ctu->m_numPartitions >> (depth * 2);
>>                  }
>> +
>> +                if (m_param->rc.cuTree)
>> +                {
>> +                    for (uint32_t i = (cuAddr * ctu->m_numPartitions), j
>> = 0; j < ctu->m_numPartitions; i++, j++)
>> +                        intraDataCTU->cuQPOffReuse[i] =
>> (int8_t)(ctu->m_qpreuse[j] - baseQP);
>> +                }
>>                  memcpy(&intraDataCTU->modes[ctu->m_cuAddr *
>> ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)*
>> ctu->m_numPartitions);
>>              }
>>          }
>> @@ -5599,13 +5658,20 @@ void
>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>>                      }
>>                      absPartIdx += ctu->m_numPartitions >> (depth * 2);
>>                  }
>> +
>> +                if (m_param->rc.cuTree)
>> +                {
>> +                    for (uint32_t i = (cuAddr * ctu->m_numPartitions), j
>> = 0; j < ctu->m_numPartitions; i++, j++)
>> +                        interDataCTU->cuQPOffReuse[i] =
>> (int8_t)(ctu->m_qpreuse[j] - baseQP);
>> +                }
>> +
>>                  if (m_param->analysisSaveReuseLevel == 10 &&
>> bIntraInInter)
>>                      memcpy(&intraDataCTU->modes[ctu->m_cuAddr *
>> ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)*
>> ctu->m_numPartitions);
>>              }
>>          }
>>
>>          if ((analysis->sliceType == X265_TYPE_IDR || analysis->sliceType
>> == X265_TYPE_I) && m_param->rc.cuTree)
>> -            analysis->frameRecordSize += sizeof(uint8_t)*
>> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 +
>> (sizeof(int8_t) * depthBytes);
>> +            analysis->frameRecordSize += sizeof(uint8_t)*
>> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 +
>> (sizeof(int8_t) * depthBytes) + (sizeof(int8_t) * analysis->numPartitions
>>  * analysis->numCUsInFrame);
>>          else if (analysis->sliceType == X265_TYPE_IDR ||
>> analysis->sliceType == X265_TYPE_I)
>>              analysis->frameRecordSize += sizeof(uint8_t)*
>> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3;
>>          else
>> @@ -5613,7 +5679,10 @@ void
>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>>              /* Add sizeof depth, modes, partSize, cuQPOffset, mergeFlag
>> */
>>              analysis->frameRecordSize += depthBytes * 2;
>>              if (m_param->rc.cuTree)
>> -            analysis->frameRecordSize += (sizeof(int8_t) * depthBytes);
>> +            {
>> +                analysis->frameRecordSize += (sizeof(int8_t) *
>> depthBytes);
>> +                analysis->frameRecordSize += (sizeof(int8_t) *
>> analysis->numPartitions * analysis->numCUsInFrame);
>> +            }
>>              if (m_param->analysisSaveReuseLevel > 4)
>>                  analysis->frameRecordSize += (depthBytes * 2);
>>
>> @@ -5669,7 +5738,10 @@ void
>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>>          X265_FWRITE((analysis->intraData)->chromaModes, sizeof(uint8_t),
>> depthBytes, m_analysisFileOut);
>>          X265_FWRITE((analysis->intraData)->partSizes, sizeof(char),
>> depthBytes, m_analysisFileOut);
>>          if (m_param->rc.cuTree)
>> +        {
>>              X265_FWRITE((analysis->intraData)->cuQPOff, sizeof(int8_t),
>> depthBytes, m_analysisFileOut);
>> +            X265_FWRITE((analysis->intraData)->cuQPOffReuse,
>> sizeof(int8_t), (analysis->numCUsInFrame * analysis->numPartitions),
>> m_analysisFileOut);
>> +        }
>>          X265_FWRITE((analysis->intraData)->modes, sizeof(uint8_t),
>> analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileOut);
>>      }
>>      else
>> @@ -5677,7 +5749,10 @@ void
>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>>          X265_FWRITE((analysis->interData)->depth, sizeof(uint8_t),
>> depthBytes, m_analysisFileOut);
>>          X265_FWRITE((analysis->interData)->modes, sizeof(uint8_t),
>> depthBytes, m_analysisFileOut);
>>          if (m_param->rc.cuTree)
>> +        {
>>              X265_FWRITE((analysis->interData)->cuQPOff, sizeof(int8_t),
>> depthBytes, m_analysisFileOut);
>> +            X265_FWRITE((analysis->interData)->cuQPOffReuse,
>> sizeof(int8_t), (analysis->numCUsInFrame * analysis->numPartitions),
>> m_analysisFileOut);
>> +        }
>>          if (m_param->analysisSaveReuseLevel > 4)
>>          {
>>              X265_FWRITE((analysis->interData)->partSize,
>> sizeof(uint8_t), depthBytes, m_analysisFileOut);
>> diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
>> index 0adb0d0db..3bc01268b 100644
>> --- a/source/encoder/slicetype.cpp
>> +++ b/source/encoder/slicetype.cpp
>> @@ -1894,7 +1894,7 @@ void Lookahead::slicetypeAnalyse(Lowres **frames,
>> bool bKeyframe)
>>
>>      if (!framecnt)
>>      {
>> -        if (m_param->rc.cuTree)
>> +        if (m_param->rc.cuTree && !m_param->analysisLoad)
>>              cuTree(frames, 0, bKeyframe);
>>          return;
>>      }
>> diff --git a/source/x265.h b/source/x265.h
>> index f44040ba7..d6a828539 100644
>> --- a/source/x265.h
>> +++ b/source/x265.h
>> @@ -145,6 +145,7 @@ typedef struct x265_analysis_intra_data
>>      char*     partSizes;
>>      uint8_t*  chromaModes;
>>      int8_t*    cuQPOff;
>> +    int8_t*   cuQPOffReuse;
>>  }x265_analysis_intra_data;
>>
>>  typedef struct x265_analysis_MV
>> @@ -170,6 +171,7 @@ typedef struct x265_analysis_inter_data
>>      x265_analysis_MV*         mv[2];
>>      int64_t*     sadCost;
>>      int8_t*    cuQPOff;
>> +    int8_t*    cuQPOffReuse;
>>  }x265_analysis_inter_data;
>>
>>  typedef struct x265_weight_param
>> --
>> 2.23.0.windows.1
>>
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>
>
> --
> Regards,
> Kavitha
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>


-- 
*With Regards,*
*Srikanth Kurapati.*
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20210108/e0308000/attachment-0001.html>


More information about the x265-devel mailing list