[x265] [Patch] fix: corrects output mismatch for cutree enabled analysis save/load enodes with reuse-levels in between 1 to 10 for similar encoder settings.

Aruna Matheswaran aruna at multicorewareinc.com
Fri Jan 22 17:24:49 UTC 2021


On Thu, Jan 21, 2021 at 5:34 PM Srikanth Kurapati <
srikanth.kurapati at multicorewareinc.com> wrote:

>
> Adding to my reply above.
>
> [AM] Why MAX_NUM_CU_GEOMS combinations?
>
> [KS] Will optimize storage based on min-cu-size configuration.
>
> On Thu, Jan 21, 2021 at 4:09 PM Srikanth Kurapati <
> srikanth.kurapati at multicorewareinc.com> wrote:
>
>>
>> [AM] Can't we share lowres cutree stats generated at qg size granularity?
>> Why MAX_NUM_CU_GEOMS combinations?
>>
>> [KS] If we share like that then we will have to calculate the dqp per cu
>> at analysis phase just like save encode and we will not get the savings in
>> cpu cycles there. Currently we are storing the final dqp derived from
>> lowres mv costs at qg size granularity by taking the difference between the
>> final qp and base qp per slice.
>>
> [AM] What is the memory footprint and performance impact of 1. Sharing
cutree offsets per qg and collating CU-level offsets from qg-level offsets,
and 2. Sharing cu-tree offsets of all partition sizes? I don't think #1
will have a significant hit on performance as the partition evaluations in
load encode is restricted.

> MAX_NUM_CU_GEOMS is 85 = ( 1 + 4 + 16 + 64 ) this is maximum number of
>> partitions at which qp can be computed and used in a ctu.
>>
>> [AM] Won't this implicitly turn OFF cutree at reuse-level 1?
>>
>> [KS]  Agreed and addressed.
>>
>>
>> On Tue, Jan 19, 2021 at 11:12 PM Aruna Matheswaran <
>> aruna at multicorewareinc.com> wrote:
>>
>>>
>>>
>>> On Mon, Jan 11, 2021 at 8:08 PM Srikanth Kurapati <
>>> srikanth.kurapati at multicorewareinc.com> wrote:
>>>
>>>> From d516d0564888e154d88d89320302725d87bfab78 Mon Sep 17 00:00:00 2001
>>>> From: Srikanth Kurapati <srikanth.kurapati at multicorewareinc.com>
>>>> Date: Wed, 30 Dec 2020 17:00:08 +0530
>>>> Subject: [PATCH] fix: corrects output mismatch for cutree enabled
>>>> analysis
>>>>  save/load enodes with reuse-levels in between 1 to 10 for similar
>>>> encoder
>>>>  settings.
>>>>
>>>> ---
>>>>  source/abrEncApp.cpp         |  14 +++-
>>>>  source/common/common.h       |   3 +-
>>>>  source/common/cudata.h       |   2 +-
>>>>  source/encoder/analysis.cpp  |  31 ++++++++-
>>>>  source/encoder/analysis.h    |   1 +
>>>>  source/encoder/api.cpp       |  28 +++++++-
>>>>  source/encoder/encoder.cpp   | 123 ++++++++++++++++++++++++++---------
>>>>  source/encoder/slicetype.cpp |   2 +-
>>>>  source/x265.h                |   4 +-
>>>>  9 files changed, 166 insertions(+), 42 deletions(-)
>>>>
>>>> diff --git a/source/abrEncApp.cpp b/source/abrEncApp.cpp
>>>> index fa62ebf63..ea255e3f6 100644
>>>> --- a/source/abrEncApp.cpp
>>>> +++ b/source/abrEncApp.cpp
>>>> @@ -340,7 +340,12 @@ namespace X265_NS {
>>>>              memcpy(intraDst->partSizes, intraSrc->partSizes,
>>>> sizeof(char) * src->depthBytes);
>>>>              memcpy(intraDst->chromaModes, intraSrc->chromaModes,
>>>> sizeof(uint8_t) * src->depthBytes);
>>>>              if (m_param->rc.cuTree)
>>>> -                memcpy(intraDst->cuQPOff, intraSrc->cuQPOff,
>>>> sizeof(int8_t) * src->depthBytes);
>>>> +            {
>>>> +                if (m_param->analysisSaveReuseLevel == 10)
>>>> +                    memcpy(intraDst->cuQPOff, intraSrc->cuQPOff,
>>>> sizeof(int8_t) * src->depthBytes);
>>>> +                else
>>>> +                    memcpy(intraDst->cuQPOff, intraSrc->cuQPOff,
>>>> sizeof(int8_t) * (src->numCUsInFrame * MAX_NUM_CU_GEOMS));
>>>> +            }
>>>>          }
>>>>          else
>>>>          {
>>>> @@ -355,7 +360,12 @@ namespace X265_NS {
>>>>              memcpy(interDst->depth, interSrc->depth, sizeof(uint8_t) *
>>>> src->depthBytes);
>>>>              memcpy(interDst->modes, interSrc->modes, sizeof(uint8_t) *
>>>> src->depthBytes);
>>>>              if (m_param->rc.cuTree)
>>>> -                memcpy(interDst->cuQPOff, interSrc->cuQPOff,
>>>> sizeof(int8_t) * src->depthBytes);
>>>> +            {
>>>> +                if (m_param->analysisReuseLevel == 10)
>>>> +                    memcpy(interDst->cuQPOff, interSrc->cuQPOff,
>>>> sizeof(int8_t) * src->depthBytes);
>>>> +                else
>>>> +                    memcpy(interDst->cuQPOff, interSrc->cuQPOff,
>>>> sizeof(int8_t) * (src->numCUsInFrame * MAX_NUM_CU_GEOMS));
>>>> +            }
>>>>              if (m_param->analysisSaveReuseLevel > 4)
>>>>              {
>>>>                  memcpy(interDst->partSize, interSrc->partSize,
>>>> sizeof(uint8_t) * src->depthBytes);
>>>> diff --git a/source/common/common.h b/source/common/common.h
>>>> index 8c06cd79e..0ffbf17eb 100644
>>>> --- a/source/common/common.h
>>>> +++ b/source/common/common.h
>>>> @@ -326,7 +326,8 @@ typedef int16_t  coeff_t;      // transform
>>>> coefficient
>>>>
>>>>  #define CHROMA_H_SHIFT(x) (x == X265_CSP_I420 || x == X265_CSP_I422)
>>>>  #define CHROMA_V_SHIFT(x) (x == X265_CSP_I420)
>>>> -#define X265_MAX_PRED_MODE_PER_CTU 85 * 2 * 8
>>>> +#define MAX_NUM_CU_GEOMS 85
>>>> +#define X265_MAX_PRED_MODE_PER_CTU MAX_NUM_CU_GEOMS * 2 * 8
>>>>
>>>>  #define MAX_NUM_TR_COEFFS           MAX_TR_SIZE * MAX_TR_SIZE //
>>>> Maximum number of transform coefficients, for a 32x32 transform
>>>>  #define MAX_NUM_TR_CATEGORIES       16                        // 32,
>>>> 16, 8, 4 transform categories each for luma and chroma
>>>> diff --git a/source/common/cudata.h b/source/common/cudata.h
>>>> index 8397f0568..c7d9a1972 100644
>>>> --- a/source/common/cudata.h
>>>> +++ b/source/common/cudata.h
>>>> @@ -371,7 +371,7 @@ struct CUDataMemPool
>>>>              CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL) *
>>>> numInstances);
>>>>          }
>>>>          else
>>>> -        {
>>>> +        {
>>>>              uint32_t sizeC = sizeL >> (CHROMA_H_SHIFT(csp) +
>>>> CHROMA_V_SHIFT(csp));
>>>>              CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL + sizeC *
>>>> 2) * numInstances);
>>>>          }
>>>> diff --git a/source/encoder/analysis.cpp b/source/encoder/analysis.cpp
>>>> index aabf386ca..22a4ba74f 100644
>>>> --- a/source/encoder/analysis.cpp
>>>> +++ b/source/encoder/analysis.cpp
>>>> @@ -220,6 +220,9 @@ Mode& Analysis::compressCTU(CUData& ctu, Frame&
>>>> frame, const CUGeom& cuGeom, con
>>>>          if (m_param->analysisSave && !m_param->analysisLoad)
>>>>              for (int i = 0; i < X265_MAX_PRED_MODE_PER_CTU *
>>>> numPredDir; i++)
>>>>                  m_reuseRef[i] = -1;
>>>> +
>>>> +        if (m_param->rc.cuTree)
>>>> +            m_reuseQP = &m_reuseInterDataCTU->cuQPOff[ctu.m_cuAddr *
>>>> MAX_NUM_CU_GEOMS];
>>>>      }
>>>>      ProfileCUScope(ctu, totalCTUTime, totalCTUs);
>>>>
>>>> @@ -233,6 +236,8 @@ Mode& Analysis::compressCTU(CUData& ctu, Frame&
>>>> frame, const CUGeom& cuGeom, con
>>>>              memcpy(ctu.m_partSize,
>>>> &intraDataCTU->partSizes[ctu.m_cuAddr * numPartition], sizeof(char) *
>>>> numPartition);
>>>>              memcpy(ctu.m_chromaIntraDir,
>>>> &intraDataCTU->chromaModes[ctu.m_cuAddr * numPartition], sizeof(uint8_t) *
>>>> numPartition);
>>>>          }
>>>> +        if (m_param->rc.cuTree && reuseLevel > 1 && reuseLevel < 10)
>>>> +            m_reuseQP = &intraDataCTU->cuQPOff[ctu.m_cuAddr *
>>>> MAX_NUM_CU_GEOMS];
>>>>          compressIntraCU(ctu, cuGeom, qp);
>>>>      }
>>>>      else
>>>> @@ -520,6 +525,9 @@ uint64_t Analysis::compressIntraCU(const CUData&
>>>> parentCTU, const CUGeom& cuGeom
>>>>      bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
>>>>      bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
>>>>
>>>> +    if (m_param->rc.cuTree  && m_param->analysisSaveReuseLevel > 1 &&
>>>> m_param->analysisSaveReuseLevel < 10)
>>>> +        m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp;
>>>>
>>> +
>>>>      bool bAlreadyDecided = m_param->intraRefine != 4 &&
>>>> parentCTU.m_lumaIntraDir[cuGeom.absPartIdx] != (uint8_t)ALL_IDX &&
>>>> !(m_param->bAnalysisType == HEVC_INFO);
>>>>      bool bDecidedDepth = m_param->intraRefine != 4 &&
>>>> parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth;
>>>>      int split = 0;
>>>> @@ -870,6 +878,9 @@ uint32_t Analysis::compressInterCU_dist(const
>>>> CUData& parentCTU, const CUGeom& c
>>>>      uint32_t minDepth = m_param->rdLevel <= 4 ?
>>>> topSkipMinDepth(parentCTU, cuGeom) : 0;
>>>>      uint32_t splitRefs[4] = { 0, 0, 0, 0 };
>>>>
>>>> +    if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 &&
>>>> m_param->analysisSaveReuseLevel < 10)
>>>> +        m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp;
>>>> +
>>>>      X265_CHECK(m_param->rdLevel >= 2, "compressInterCU_dist does not
>>>> support RD 0 or 1\n");
>>>>
>>>>      PMODE pmode(*this, cuGeom);
>>>> @@ -1152,6 +1163,8 @@ SplitData Analysis::compressInterCU_rd0_4(const
>>>> CUData& parentCTU, const CUGeom&
>>>>      uint32_t cuAddr = parentCTU.m_cuAddr;
>>>>      ModeDepth& md = m_modeDepth[depth];
>>>>
>>>> +    if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 &&
>>>> m_param->analysisSaveReuseLevel < 10)
>>>> +        m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp;
>>>>
>>>>      if (m_param->searchMethod == X265_SEA)
>>>>      {
>>>> @@ -1856,6 +1869,9 @@ SplitData Analysis::compressInterCU_rd5_6(const
>>>> CUData& parentCTU, const CUGeom&
>>>>      ModeDepth& md = m_modeDepth[depth];
>>>>      md.bestMode = NULL;
>>>>
>>>> +    if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 &&
>>>> m_param->analysisSaveReuseLevel < 10)
>>>> +        m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp;
>>>> +
>>>>      if (m_param->searchMethod == X265_SEA)
>>>>      {
>>>>          int numPredDir = m_slice->isInterP() ? 1 : 2;
>>>> @@ -3647,11 +3663,20 @@ int Analysis::calculateQpforCuSize(const
>>>> CUData& ctu, const CUGeom& cuGeom, int3
>>>>
>>>>      if (m_param->analysisLoadReuseLevel >= 2 && m_param->rc.cuTree)
>>>>      {
>>>> -        int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) +
>>>> cuGeom.absPartIdx;
>>>> +        int cuIdx;
>>>> +        int8_t cuQPOffSet = 0;
>>>> +
>>>> +        if (m_param->scaleFactor == 2 ||
>>>> m_param->analysisLoadReuseLevel == 10)
>>>> +            cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) +
>>>> cuGeom.absPartIdx;
>>>> +        else
>>>> +            cuIdx = (ctu.m_cuAddr * MAX_NUM_CU_GEOMS) +
>>>> cuGeom.geomRecurId;
>>>> +
>>>>          if (ctu.m_slice->m_sliceType == I_SLICE)
>>>> -            return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
>>>> (int32_t)(qp + 0.5 +
>>>> ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx]));
>>>> +            cuQPOffSet =
>>>> ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx];
>>>>          else
>>>> -            return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
>>>> (int32_t)(qp + 0.5 +
>>>> ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx]));
>>>> +            cuQPOffSet =
>>>> ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx];
>>>> +
>>>> +        return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
>>>> (int32_t)(qp + 0.5 + cuQPOffSet));
>>>>      }
>>>>      if (m_param->rc.hevcAq)
>>>>      {
>>>> diff --git a/source/encoder/analysis.h b/source/encoder/analysis.h
>>>> index 3bcb56bc3..8d76d5c5e 100644
>>>> --- a/source/encoder/analysis.h
>>>> +++ b/source/encoder/analysis.h
>>>> @@ -126,6 +126,7 @@ protected:
>>>>      int32_t*                   m_reuseRef;
>>>>      uint8_t*                   m_reuseDepth;
>>>>      uint8_t*                   m_reuseModes;
>>>> +    int8_t *                   m_reuseQP; // array of QP values for
>>>> analysis reuse at reuse levels > 1 and < 10 when cutree is enabled
>>>>      uint8_t*                   m_reusePartSize;
>>>>      uint8_t*                   m_reuseMergeFlag;
>>>>      x265_analysis_MV*          m_reuseMv[2];
>>>> diff --git a/source/encoder/api.cpp b/source/encoder/api.cpp
>>>> index a986355e0..2c90fe8f2 100644
>>>> --- a/source/encoder/api.cpp
>>>> +++ b/source/encoder/api.cpp
>>>> @@ -825,7 +825,16 @@ void x265_alloc_analysis_data(x265_param *param,
>>>> x265_analysis_data* analysis)
>>>>          CHECKED_MALLOC_ZERO(intraData->partSizes, char,
>>>> analysis->numPartitions * analysis->numCUsInFrame);
>>>>          CHECKED_MALLOC_ZERO(intraData->chromaModes, uint8_t,
>>>> analysis->numPartitions * analysis->numCUsInFrame);
>>>>          if (param->rc.cuTree)
>>>> -            CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t,
>>>> analysis->numPartitions * analysis->numCUsInFrame);
>>>> +        {
>>>> +            if (maxReuseLevel == 10)
>>>> +            {
>>>> +                CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t,
>>>> analysis->numPartitions * analysis->numCUsInFrame);
>>>> +            }
>>>> +            else
>>>> +            {
>>>> +                CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t,
>>>> MAX_NUM_CU_GEOMS * analysis->numCUsInFrame);
>>>> +            }
>>>> +        }
>>>>      }
>>>>      analysis->intraData = intraData;
>>>>
>>>> @@ -837,7 +846,16 @@ void x265_alloc_analysis_data(x265_param *param,
>>>> x265_analysis_data* analysis)
>>>>          CHECKED_MALLOC_ZERO(interData->modes, uint8_t,
>>>> analysis->numPartitions * analysis->numCUsInFrame);
>>>>
>>>>          if (param->rc.cuTree && !isMultiPassOpt)
>>>> -            CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t,
>>>> analysis->numPartitions * analysis->numCUsInFrame);
>>>> +        {
>>>> +            if (maxReuseLevel == 10)
>>>> +            {
>>>> +                CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t,
>>>> analysis->numPartitions * analysis->numCUsInFrame);
>>>> +            }
>>>> +            else
>>>> +            {
>>>> +                CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t,
>>>> MAX_NUM_CU_GEOMS * analysis->numCUsInFrame);
>>>>
>>> [AM] Can't we share lowres cutree stats generated at qg size
>>> granularity? Why MAX_NUM_CU_GEOMS combinations?
>>>
>>>> +            }
>>>> +        }
>>>>          CHECKED_MALLOC_ZERO(interData->mvpIdx[0], uint8_t,
>>>> analysis->numPartitions * analysis->numCUsInFrame);
>>>>          CHECKED_MALLOC_ZERO(interData->mvpIdx[1], uint8_t,
>>>> analysis->numPartitions * analysis->numCUsInFrame);
>>>>          CHECKED_MALLOC_ZERO(interData->mv[0], x265_analysis_MV,
>>>> analysis->numPartitions * analysis->numCUsInFrame);
>>>> @@ -919,7 +937,9 @@ void x265_free_analysis_data(x265_param *param,
>>>> x265_analysis_data* analysis)
>>>>              X265_FREE((analysis->intraData)->partSizes);
>>>>              X265_FREE((analysis->intraData)->chromaModes);
>>>>              if (param->rc.cuTree)
>>>> -                X265_FREE((analysis->intraData)->cuQPOff);
>>>> +            {
>>>> +                X265_FREE_ZERO((analysis->intraData)->cuQPOff);
>>>> +            }
>>>>          }
>>>>          X265_FREE(analysis->intraData);
>>>>          analysis->intraData = NULL;
>>>> @@ -931,7 +951,9 @@ void x265_free_analysis_data(x265_param *param,
>>>> x265_analysis_data* analysis)
>>>>          X265_FREE((analysis->interData)->depth);
>>>>          X265_FREE((analysis->interData)->modes);
>>>>          if (!isMultiPassOpt && param->rc.cuTree)
>>>> +        {
>>>>              X265_FREE((analysis->interData)->cuQPOff);
>>>> +        }
>>>>          X265_FREE((analysis->interData)->mvpIdx[0]);
>>>>          X265_FREE((analysis->interData)->mvpIdx[1]);
>>>>          X265_FREE((analysis->interData)->mv[0]);
>>>> diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp
>>>> index 1f710e1ce..5eb123d31 100644
>>>> --- a/source/encoder/encoder.cpp
>>>> +++ b/source/encoder/encoder.cpp
>>>> @@ -4444,6 +4444,26 @@ void
>>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>>>>              }
>>>>          }
>>>>      }
>>>> +
>>>> +    int8_t *cuQPBuf = NULL, *cuQPOffSets = NULL;
>>>> +    uint32_t reuseBufSize = 0;
>>>> +
>>>> +    if (m_param->rc.cuTree)
>>>> +    {
>>>> +        if (m_param->analysisLoadReuseLevel == 10)
>>>> +            reuseBufSize = depthBytes;
>>>> +        else if (m_param->analysisLoadReuseLevel > 1)
>>>> +            reuseBufSize = MAX_NUM_CU_GEOMS * analysis->numCUsInFrame;
>>>> +        cuQPBuf = X265_MALLOC(int8_t, reuseBufSize);
>>>> +        if (!m_param->bUseAnalysisFile)
>>>> +        {
>>>> +            if (analysis->sliceType == X265_TYPE_IDR ||
>>>> analysis->sliceType == X265_TYPE_I)
>>>> +                cuQPOffSets = intraPic->cuQPOff;
>>>> +            else
>>>> +                cuQPOffSets = interPic->cuQPOff;
>>>> +        }
>>>> +    }
>>>> +
>>>>      if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType ==
>>>> X265_TYPE_I)
>>>>      {
>>>>          if (m_param->bAnalysisType == HEVC_INFO)
>>>> @@ -4452,19 +4472,21 @@ void
>>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>>>>              return;
>>>>
>>>>          uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
>>>> *partSizes = NULL;
>>>> -        int8_t *cuQPBuf = NULL;
>>>>
>>>>          tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);
>>>>          depthBuf = tempBuf;
>>>>          modeBuf = tempBuf + depthBytes;
>>>>          partSizes = tempBuf + 2 * depthBytes;
>>>> -        if (m_param->rc.cuTree)
>>>> -            cuQPBuf = X265_MALLOC(int8_t, depthBytes);
>>>>
>>>>          X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
>>>> m_analysisFileIn, intraPic->depth);
>>>>          X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
>>>> m_analysisFileIn, intraPic->chromaModes);
>>>>          X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
>>>> m_analysisFileIn, intraPic->partSizes);
>>>> -        if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
>>>> depthBytes, m_analysisFileIn, intraPic->cuQPOff); }
>>>> +        if (m_param->rc.cuTree)
>>>> +        {
>>>> +            X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize,
>>>> m_analysisFileIn, cuQPOffSets);
>>>> +            if (m_param->analysisLoadReuseLevel > 1 &&
>>>> m_param->analysisLoadReuseLevel < 10)
>>>> +                memcpy(analysis->intraData->cuQPOff, cuQPBuf,
>>>> sizeof(int8_t) * reuseBufSize);
>>>> +        }
>>>>
>>>>          size_t count = 0;
>>>>          for (uint32_t d = 0; d < depthBytes; d++)
>>>> @@ -4480,7 +4502,7 @@ void
>>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>>>>              memset(&(analysis->intraData)->depth[count], depthBuf[d],
>>>> bytes);
>>>>              memset(&(analysis->intraData)->chromaModes[count],
>>>> modeBuf[d], bytes);
>>>>              memset(&(analysis->intraData)->partSizes[count],
>>>> partSizes[d], bytes);
>>>> -            if (m_param->rc.cuTree)
>>>> +            if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel
>>>> == 10)
>>>>                  memset(&(analysis->intraData)->cuQPOff[count],
>>>> cuQPBuf[d], bytes);
>>>>              count += bytes;
>>>>          }
>>>> @@ -4515,7 +4537,6 @@ void
>>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>>>>          uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];
>>>>          MV* mv[2];
>>>>          int8_t* refIdx[2];
>>>> -        int8_t* cuQPBuf = NULL;
>>>>
>>>>          int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2;
>>>>          bool bIntraInInter = false;
>>>> @@ -4535,12 +4556,15 @@ void
>>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>>>>              tempBuf = X265_MALLOC(uint8_t, depthBytes * numBuf);
>>>>              depthBuf = tempBuf;
>>>>              modeBuf = tempBuf + depthBytes;
>>>> -            if (m_param->rc.cuTree)
>>>> -                cuQPBuf = X265_MALLOC(int8_t, depthBytes);
>>>>
>>>>              X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
>>>> m_analysisFileIn, interPic->depth);
>>>>              X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
>>>> m_analysisFileIn, interPic->modes);
>>>> -            if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf,
>>>> sizeof(int8_t), depthBytes, m_analysisFileIn, interPic->cuQPOff); }
>>>> +            if (m_param->rc.cuTree)
>>>> +            {
>>>> +                X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize,
>>>> m_analysisFileIn, cuQPOffSets);
>>>> +                if (m_param->analysisLoadReuseLevel > 1 &&
>>>> m_param->analysisLoadReuseLevel < 10)
>>>> +                    memcpy(analysis->interData->cuQPOff, cuQPBuf,
>>>> sizeof(int8_t) * reuseBufSize);
>>>> +            }
>>>>
>>>>              if (m_param->analysisLoadReuseLevel > 4)
>>>>              {
>>>> @@ -4578,7 +4602,7 @@ void
>>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>>>>                      depthBuf[d] = 1;
>>>>                  memset(&(analysis->interData)->depth[count],
>>>> depthBuf[d], bytes);
>>>>                  memset(&(analysis->interData)->modes[count],
>>>> modeBuf[d], bytes);
>>>> -                if (m_param->rc.cuTree)
>>>> +                if (m_param->rc.cuTree &&
>>>> m_param->analysisLoadReuseLevel == 10)
>>>>                      memset(&(analysis->interData)->cuQPOff[count],
>>>> cuQPBuf[d], bytes);
>>>>                  if (m_param->analysisLoadReuseLevel > 4)
>>>>                  {
>>>> @@ -4736,7 +4760,7 @@ void
>>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>>>>      int numPartitions = analysis->numPartitions;
>>>>      int numCUsInFrame = analysis->numCUsInFrame;
>>>>      int numCuInHeight = analysis->numCuInHeight;
>>>> -    /* Allocate memory for scaled resoultion's numPartitions and
>>>> numCUsInFrame*/
>>>> +    /* Allocate memory for scaled resolution's numPartitions and
>>>> numCUsInFrame */
>>>>      analysis->numPartitions = m_param->num4x4Partitions;
>>>>      analysis->numCUsInFrame = cuLoc.heightInCU * cuLoc.widthInCU;
>>>>      analysis->numCuInHeight = cuLoc.heightInCU;
>>>> @@ -4808,25 +4832,40 @@ void
>>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>>>>          X265_FREE(vbvCostBuf);
>>>>      }
>>>>
>>>> +    uint32_t reuseBufSize = 0;
>>>> +    int8_t *cuQPOffSets = NULL, *cuQPBuf = NULL;
>>>> +    if (m_param->rc.cuTree)
>>>> +    {
>>>> +        if (m_param->analysisLoadReuseLevel == 10)
>>>> +            reuseBufSize = depthBytes;
>>>> +        else if (m_param->analysisLoadReuseLevel > 1)
>>>> +            reuseBufSize = (MAX_NUM_CU_GEOMS / factor) *
>>>> (analysis->numCUsInFrame);
>>>> +        cuQPBuf = X265_MALLOC(int8_t, reuseBufSize);
>>>> +        if (!m_param->bUseAnalysisFile)
>>>> +        {
>>>> +            if (analysis->sliceType == X265_TYPE_IDR ||
>>>> analysis->sliceType == X265_TYPE_I)
>>>> +                cuQPOffSets = intraPic->cuQPOff;
>>>> +            else
>>>> +                cuQPOffSets = interPic->cuQPOff;
>>>> +        }
>>>> +    }
>>>> +
>>>>      if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType ==
>>>> X265_TYPE_I)
>>>>      {
>>>>          if (m_param->analysisLoadReuseLevel < 2)
>>>>              return;
>>>>
>>>>          uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
>>>> *partSizes = NULL;
>>>> -        int8_t *cuQPBuf = NULL;
>>>>
>>>>          tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);
>>>>          depthBuf = tempBuf;
>>>>          modeBuf = tempBuf + depthBytes;
>>>>          partSizes = tempBuf + 2 * depthBytes;
>>>> -        if (m_param->rc.cuTree)
>>>> -            cuQPBuf = X265_MALLOC(int8_t, depthBytes);
>>>>
>>>>          X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
>>>> m_analysisFileIn, intraPic->depth);
>>>>          X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
>>>> m_analysisFileIn, intraPic->chromaModes);
>>>>          X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
>>>> m_analysisFileIn, intraPic->partSizes);
>>>> -        if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
>>>> depthBytes, m_analysisFileIn, intraPic->cuQPOff); }
>>>> +        if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
>>>> reuseBufSize, m_analysisFileIn, cuQPOffSets); }
>>>>
>>>>          uint32_t count = 0;
>>>>          for (uint32_t d = 0; d < depthBytes; d++)
>>>> @@ -4848,7 +4887,7 @@ void
>>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>>>>                  memset(&(analysis->intraData)->depth[count],
>>>> depthBuf[d], bytes);
>>>>                  memset(&(analysis->intraData)->chromaModes[count],
>>>> modeBuf[d], bytes);
>>>>                  memset(&(analysis->intraData)->partSizes[count],
>>>> partSizes[d], bytes);
>>>> -                if (m_param->rc.cuTree)
>>>> +                if (m_param->rc.cuTree &&
>>>> m_param->analysisLoadReuseLevel == 10)
>>>>                      memset(&(analysis->intraData)->cuQPOff[count],
>>>> cuQPBuf[d], bytes);
>>>>                  count += bytes;
>>>>                  d += getCUIndex(&cuLoc, &count, bytes, 1);
>>>> @@ -4886,7 +4925,6 @@ void
>>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>>>>          uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];
>>>>          MV* mv[2];
>>>>          int8_t* refIdx[2];
>>>> -        int8_t* cuQPBuf = NULL;
>>>>
>>>>          int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2;
>>>>          bool bIntraInInter = false;
>>>> @@ -4900,12 +4938,16 @@ void
>>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>>>>          tempBuf = X265_MALLOC(uint8_t, depthBytes * numBuf);
>>>>          depthBuf = tempBuf;
>>>>          modeBuf = tempBuf + depthBytes;
>>>> -        if (m_param->rc.cuTree)
>>>> -            cuQPBuf = X265_MALLOC(int8_t, depthBytes);
>>>>
>>>>          X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
>>>> m_analysisFileIn, interPic->depth);
>>>>          X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
>>>> m_analysisFileIn, interPic->modes);
>>>> -        if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
>>>> depthBytes, m_analysisFileIn, interPic->cuQPOff); }
>>>> +        if (m_param->rc.cuTree)
>>>> +        {
>>>> +            X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize,
>>>> m_analysisFileIn, cuQPOffSets);
>>>> +            if (m_param->analysisLoadReuseLevel > 1 &&
>>>> m_param->analysisLoadReuseLevel < 10)
>>>> +                memcpy(&(analysis->interData)->cuQPOff, cuQPBuf,
>>>> sizeof(int8_t) * reuseBufSize);
>>>> +        }
>>>> +
>>>>          if (m_param->analysisLoadReuseLevel > 4)
>>>>          {
>>>>              partSize = modeBuf + depthBytes;
>>>> @@ -4954,7 +4996,7 @@ void
>>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>>>>              {
>>>>                  memset(&(analysis->interData)->depth[count],
>>>> writeDepth, bytes);
>>>>                  memset(&(analysis->interData)->modes[count],
>>>> modeBuf[d], bytes);
>>>> -                if (m_param->rc.cuTree)
>>>> +                if (m_param->rc.cuTree &&
>>>> m_param->analysisLoadReuseLevel == 10)
>>>>                      memset(&(analysis->interData)->cuQPOff[count],
>>>> cuQPBuf[d], bytes);
>>>>                  if (m_param->analysisLoadReuseLevel == 10 &&
>>>> bIntraInInter)
>>>>                      memset(&(analysis->intraData)->chromaModes[count],
>>>> chromaDir[d], bytes);
>>>> @@ -5046,7 +5088,9 @@ void
>>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>>>>              }
>>>>          }
>>>>          else
>>>> +        {
>>>>              X265_FREAD((analysis->interData)->ref, sizeof(int32_t),
>>>> analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir,
>>>> m_analysisFileIn, interPic->ref);
>>>> +        }
>>>>
>>>>          consumedBytes += frameRecordSize;
>>>>          if (numDir == 1)
>>>> @@ -5510,9 +5554,10 @@ void
>>>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>>>>          analysis->frameRecordSize += analysis->numCUsInFrame *
>>>> sizeof(sse_t);
>>>>      }
>>>>
>>>> +    uint32_t reuseQPBufsize = 0;
>>>>      if (m_param->analysisSaveReuseLevel > 1)
>>>>      {
>>>> -
>>>> +        reuseQPBufsize = MAX_NUM_CU_GEOMS * analysis->numCUsInFrame;
>>>>          if (analysis->sliceType == X265_TYPE_IDR ||
>>>> analysis->sliceType == X265_TYPE_I)
>>>>          {
>>>>              for (uint32_t cuAddr = 0; cuAddr <
>>>> analysis->numCUsInFrame; cuAddr++)
>>>> @@ -5536,12 +5581,21 @@ void
>>>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>>>>                      partSize = ctu->m_partSize[absPartIdx];
>>>>                      intraDataCTU->partSizes[depthBytes] = partSize;
>>>>
>>>> -                    if (m_param->rc.cuTree)
>>>> +                    if (m_param->rc.cuTree &&
>>>> m_param->analysisSaveReuseLevel == 10)
>>>>                          intraDataCTU->cuQPOff[depthBytes] =
>>>> (int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP);
>>>>                      absPartIdx += ctu->m_numPartitions >> (depth * 2);
>>>>                  }
>>>> +
>>>> +                if (m_param->rc.cuTree &&
>>>> m_param->analysisSaveReuseLevel < 10)
>>>> +                {
>>>> +                    uint32_t nextCuIdx = (cuAddr + 1) *
>>>> MAX_NUM_CU_GEOMS;
>>>> +                    for (uint32_t i = cuAddr * MAX_NUM_CU_GEOMS; i <
>>>> nextCuIdx; i++)
>>>> +                        intraDataCTU->cuQPOff[i] =
>>>> (int8_t)(intraDataCTU->cuQPOff[i] - baseQP);
>>>> +                }
>>>>                  memcpy(&intraDataCTU->modes[ctu->m_cuAddr *
>>>> ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)*
>>>> ctu->m_numPartitions);
>>>>              }
>>>> +            if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel
>>>> == 10)
>>>> +                reuseQPBufsize = depthBytes;
>>>>          }
>>>>          else
>>>>          {
>>>> @@ -5567,7 +5621,7 @@ void
>>>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>>>>                          predMode = 4; // used as indicator if the
>>>> block is coded as bidir
>>>>
>>>>                      interDataCTU->modes[depthBytes] = predMode;
>>>> -                    if (m_param->rc.cuTree)
>>>> +                    if (m_param->rc.cuTree &&
>>>> m_param->analysisSaveReuseLevel == 10)
>>>>                          interDataCTU->cuQPOff[depthBytes] =
>>>> (int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP);
>>>>
>>>>                      if (m_param->analysisSaveReuseLevel > 4)
>>>> @@ -5599,13 +5653,23 @@ void
>>>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>>>>                      }
>>>>                      absPartIdx += ctu->m_numPartitions >> (depth * 2);
>>>>                  }
>>>> +
>>>> +                if (m_param->rc.cuTree &&
>>>> m_param->analysisSaveReuseLevel < 10)
>>>> +                {
>>>> +                    uint32_t nextCuIdx = (cuAddr + 1) *
>>>> MAX_NUM_CU_GEOMS;
>>>> +                    for (uint32_t i = cuAddr * MAX_NUM_CU_GEOMS; i <
>>>> nextCuIdx ; i++)
>>>> +                        interDataCTU->cuQPOff[i] =
>>>> (int8_t)(interDataCTU->cuQPOff[i] - baseQP);
>>>> +                }
>>>> +
>>>>                  if (m_param->analysisSaveReuseLevel == 10 &&
>>>> bIntraInInter)
>>>>                      memcpy(&intraDataCTU->modes[ctu->m_cuAddr *
>>>> ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)*
>>>> ctu->m_numPartitions);
>>>>              }
>>>> +            if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel
>>>> == 10)
>>>> +                reuseQPBufsize = depthBytes;
>>>>          }
>>>>
>>>>          if ((analysis->sliceType == X265_TYPE_IDR ||
>>>> analysis->sliceType == X265_TYPE_I) && m_param->rc.cuTree)
>>>> -            analysis->frameRecordSize += sizeof(uint8_t)*
>>>> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 +
>>>> (sizeof(int8_t) * depthBytes);
>>>> +            analysis->frameRecordSize += sizeof(uint8_t)*
>>>> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 +
>>>> (sizeof(int8_t) * reuseQPBufsize);
>>>>          else if (analysis->sliceType == X265_TYPE_IDR ||
>>>> analysis->sliceType == X265_TYPE_I)
>>>>              analysis->frameRecordSize += sizeof(uint8_t)*
>>>> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3;
>>>>          else
>>>> @@ -5613,7 +5677,8 @@ void
>>>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>>>>              /* Add sizeof depth, modes, partSize, cuQPOffset,
>>>> mergeFlag */
>>>>              analysis->frameRecordSize += depthBytes * 2;
>>>>              if (m_param->rc.cuTree)
>>>> -            analysis->frameRecordSize += (sizeof(int8_t) * depthBytes);
>>>> +                analysis->frameRecordSize += (sizeof(int8_t) *
>>>> reuseQPBufsize);
>>>> +
>>>>              if (m_param->analysisSaveReuseLevel > 4)
>>>>                  analysis->frameRecordSize += (depthBytes * 2);
>>>>
>>>> @@ -5669,7 +5734,7 @@ void
>>>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>>>>          X265_FWRITE((analysis->intraData)->chromaModes,
>>>> sizeof(uint8_t), depthBytes, m_analysisFileOut);
>>>>          X265_FWRITE((analysis->intraData)->partSizes, sizeof(char),
>>>> depthBytes, m_analysisFileOut);
>>>>          if (m_param->rc.cuTree)
>>>> -            X265_FWRITE((analysis->intraData)->cuQPOff,
>>>> sizeof(int8_t), depthBytes, m_analysisFileOut);
>>>> +            X265_FWRITE((analysis->intraData)->cuQPOff,
>>>> sizeof(int8_t), reuseQPBufsize, m_analysisFileOut);
>>>>          X265_FWRITE((analysis->intraData)->modes, sizeof(uint8_t),
>>>> analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileOut);
>>>>      }
>>>>      else
>>>> @@ -5677,7 +5742,7 @@ void
>>>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>>>>          X265_FWRITE((analysis->interData)->depth, sizeof(uint8_t),
>>>> depthBytes, m_analysisFileOut);
>>>>          X265_FWRITE((analysis->interData)->modes, sizeof(uint8_t),
>>>> depthBytes, m_analysisFileOut);
>>>>          if (m_param->rc.cuTree)
>>>> -            X265_FWRITE((analysis->interData)->cuQPOff,
>>>> sizeof(int8_t), depthBytes, m_analysisFileOut);
>>>> +            X265_FWRITE((analysis->interData)->cuQPOff,
>>>> sizeof(int8_t), reuseQPBufsize, m_analysisFileOut);
>>>>          if (m_param->analysisSaveReuseLevel > 4)
>>>>          {
>>>>              X265_FWRITE((analysis->interData)->partSize,
>>>> sizeof(uint8_t), depthBytes, m_analysisFileOut);
>>>> @@ -5762,7 +5827,7 @@ void
>>>> Encoder::writeAnalysisFileRefine(x265_analysis_data* analysis, FrameData &c
>>>>                      interData->mv[1][depthBytes].word =
>>>> ctu->m_mv[1][absPartIdx].word;
>>>>                      interData->mvpIdx[1][depthBytes] =
>>>> ctu->m_mvpIdx[1][absPartIdx];
>>>>                      ref[1][depthBytes] = ctu->m_refIdx[1][absPartIdx];
>>>> -                    predMode = 4; // used as indiacator if the block
>>>> is coded as bidir
>>>> +                    predMode = 4; // used as indicator if the block is
>>>> coded as bidir
>>>>                  }
>>>>                  interData->modes[depthBytes] = predMode;
>>>>
>>>> diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
>>>> index 0adb0d0db..3bc01268b 100644
>>>> --- a/source/encoder/slicetype.cpp
>>>> +++ b/source/encoder/slicetype.cpp
>>>> @@ -1894,7 +1894,7 @@ void Lookahead::slicetypeAnalyse(Lowres **frames,
>>>> bool bKeyframe)
>>>>
>>>>      if (!framecnt)
>>>>      {
>>>> -        if (m_param->rc.cuTree)
>>>> +        if (m_param->rc.cuTree && !m_param->analysisLoad)
>>>>
>>> [AM] Won't this implicitly turn OFF cutree at reuse-level 1?
>>>
>>>>              cuTree(frames, 0, bKeyframe);
>>>>          return;
>>>>      }
>>>> diff --git a/source/x265.h b/source/x265.h
>>>> index f44040ba7..8d7a75826 100644
>>>> --- a/source/x265.h
>>>> +++ b/source/x265.h
>>>> @@ -144,7 +144,7 @@ typedef struct x265_analysis_intra_data
>>>>      uint8_t*  modes;
>>>>      char*     partSizes;
>>>>      uint8_t*  chromaModes;
>>>> -    int8_t*    cuQPOff;
>>>> +    int8_t*   cuQPOff;
>>>>  }x265_analysis_intra_data;
>>>>
>>>>  typedef struct x265_analysis_MV
>>>> @@ -167,7 +167,7 @@ typedef struct x265_analysis_inter_data
>>>>      uint8_t*    interDir;
>>>>      uint8_t*    mvpIdx[2];
>>>>      int8_t*     refIdx[2];
>>>> -    x265_analysis_MV*         mv[2];
>>>> +    x265_analysis_MV* mv[2];
>>>>      int64_t*     sadCost;
>>>>      int8_t*    cuQPOff;
>>>>  }x265_analysis_inter_data;
>>>> --
>>>> 2.20.1.windows.1
>>>>
>>>>
>>>> --
>>>> *With Regards,*
>>>> *Srikanth Kurapati.*
>>>> _______________________________________________
>>>> x265-devel mailing list
>>>> x265-devel at videolan.org
>>>> https://mailman.videolan.org/listinfo/x265-devel
>>>>
>>>
>>>
>>> --
>>> Regards,
>>> *Aruna Matheswaran,*
>>> Video Codec Engineer,
>>> Media & AI analytics BU,
>>>
>>>
>>>
>>> _______________________________________________
>>> x265-devel mailing list
>>> x265-devel at videolan.org
>>> https://mailman.videolan.org/listinfo/x265-devel
>>>
>>
>>
>> --
>> *With Regards,*
>> *Srikanth Kurapati.*
>>
>
>
> --
> *With Regards,*
> *Srikanth Kurapati.*
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>


-- 
Regards,
*Aruna Matheswaran,*
Video Codec Engineer,
Media & AI analytics BU,
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20210122/9319ec37/attachment-0001.html>


More information about the x265-devel mailing list