[x265] [Patch] fix: corrects output mismatch for cutree enabled analysis save/load enodes with reuse-levels in between 1 to 10 for similar encoder settings.
Srikanth Kurapati
srikanth.kurapati at multicorewareinc.com
Thu Jan 21 12:03:54 UTC 2021
Adding to my reply above.
[AM] Why MAX_NUM_CU_GEOMS combinations?
[KS] Will optimize storage based on min-cu-size configuration.
On Thu, Jan 21, 2021 at 4:09 PM Srikanth Kurapati <
srikanth.kurapati at multicorewareinc.com> wrote:
>
> [AM] Can't we share lowres cutree stats generated at qg size granularity?
> Why MAX_NUM_CU_GEOMS combinations?
>
> [KS] If we share like that then we will have to calculate the dqp per cu
> at analysis phase just like save encode and we will not get the savings in
> cpu cycles there. Currently we are storing the final dqp derived from
> lowres mv costs at qg size granularity by taking the difference between the
> final qp and base qp per slice.
>
> MAX_NUM_CU_GEOMS is 85 = ( 1 + 4 + 16 + 64 ) this is maximum number of
> partitions at which qp can be computed and used in a ctu.
>
> [AM] Won't this implicitly turn OFF cutree at reuse-level 1?
>
> [KS] Agreed and addressed.
>
>
> On Tue, Jan 19, 2021 at 11:12 PM Aruna Matheswaran <
> aruna at multicorewareinc.com> wrote:
>
>>
>>
>> On Mon, Jan 11, 2021 at 8:08 PM Srikanth Kurapati <
>> srikanth.kurapati at multicorewareinc.com> wrote:
>>
>>> From d516d0564888e154d88d89320302725d87bfab78 Mon Sep 17 00:00:00 2001
>>> From: Srikanth Kurapati <srikanth.kurapati at multicorewareinc.com>
>>> Date: Wed, 30 Dec 2020 17:00:08 +0530
>>> Subject: [PATCH] fix: corrects output mismatch for cutree enabled
>>> analysis
>>> save/load enodes with reuse-levels in between 1 to 10 for similar
>>> encoder
>>> settings.
>>>
>>> ---
>>> source/abrEncApp.cpp | 14 +++-
>>> source/common/common.h | 3 +-
>>> source/common/cudata.h | 2 +-
>>> source/encoder/analysis.cpp | 31 ++++++++-
>>> source/encoder/analysis.h | 1 +
>>> source/encoder/api.cpp | 28 +++++++-
>>> source/encoder/encoder.cpp | 123 ++++++++++++++++++++++++++---------
>>> source/encoder/slicetype.cpp | 2 +-
>>> source/x265.h | 4 +-
>>> 9 files changed, 166 insertions(+), 42 deletions(-)
>>>
>>> diff --git a/source/abrEncApp.cpp b/source/abrEncApp.cpp
>>> index fa62ebf63..ea255e3f6 100644
>>> --- a/source/abrEncApp.cpp
>>> +++ b/source/abrEncApp.cpp
>>> @@ -340,7 +340,12 @@ namespace X265_NS {
>>> memcpy(intraDst->partSizes, intraSrc->partSizes,
>>> sizeof(char) * src->depthBytes);
>>> memcpy(intraDst->chromaModes, intraSrc->chromaModes,
>>> sizeof(uint8_t) * src->depthBytes);
>>> if (m_param->rc.cuTree)
>>> - memcpy(intraDst->cuQPOff, intraSrc->cuQPOff,
>>> sizeof(int8_t) * src->depthBytes);
>>> + {
>>> + if (m_param->analysisSaveReuseLevel == 10)
>>> + memcpy(intraDst->cuQPOff, intraSrc->cuQPOff,
>>> sizeof(int8_t) * src->depthBytes);
>>> + else
>>> + memcpy(intraDst->cuQPOff, intraSrc->cuQPOff,
>>> sizeof(int8_t) * (src->numCUsInFrame * MAX_NUM_CU_GEOMS));
>>> + }
>>> }
>>> else
>>> {
>>> @@ -355,7 +360,12 @@ namespace X265_NS {
>>> memcpy(interDst->depth, interSrc->depth, sizeof(uint8_t) *
>>> src->depthBytes);
>>> memcpy(interDst->modes, interSrc->modes, sizeof(uint8_t) *
>>> src->depthBytes);
>>> if (m_param->rc.cuTree)
>>> - memcpy(interDst->cuQPOff, interSrc->cuQPOff,
>>> sizeof(int8_t) * src->depthBytes);
>>> + {
>>> + if (m_param->analysisReuseLevel == 10)
>>> + memcpy(interDst->cuQPOff, interSrc->cuQPOff,
>>> sizeof(int8_t) * src->depthBytes);
>>> + else
>>> + memcpy(interDst->cuQPOff, interSrc->cuQPOff,
>>> sizeof(int8_t) * (src->numCUsInFrame * MAX_NUM_CU_GEOMS));
>>> + }
>>> if (m_param->analysisSaveReuseLevel > 4)
>>> {
>>> memcpy(interDst->partSize, interSrc->partSize,
>>> sizeof(uint8_t) * src->depthBytes);
>>> diff --git a/source/common/common.h b/source/common/common.h
>>> index 8c06cd79e..0ffbf17eb 100644
>>> --- a/source/common/common.h
>>> +++ b/source/common/common.h
>>> @@ -326,7 +326,8 @@ typedef int16_t coeff_t; // transform
>>> coefficient
>>>
>>> #define CHROMA_H_SHIFT(x) (x == X265_CSP_I420 || x == X265_CSP_I422)
>>> #define CHROMA_V_SHIFT(x) (x == X265_CSP_I420)
>>> -#define X265_MAX_PRED_MODE_PER_CTU 85 * 2 * 8
>>> +#define MAX_NUM_CU_GEOMS 85
>>> +#define X265_MAX_PRED_MODE_PER_CTU MAX_NUM_CU_GEOMS * 2 * 8
>>>
>>> #define MAX_NUM_TR_COEFFS MAX_TR_SIZE * MAX_TR_SIZE //
>>> Maximum number of transform coefficients, for a 32x32 transform
>>> #define MAX_NUM_TR_CATEGORIES 16 // 32,
>>> 16, 8, 4 transform categories each for luma and chroma
>>> diff --git a/source/common/cudata.h b/source/common/cudata.h
>>> index 8397f0568..c7d9a1972 100644
>>> --- a/source/common/cudata.h
>>> +++ b/source/common/cudata.h
>>> @@ -371,7 +371,7 @@ struct CUDataMemPool
>>> CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL) *
>>> numInstances);
>>> }
>>> else
>>> - {
>>> + {
>>> uint32_t sizeC = sizeL >> (CHROMA_H_SHIFT(csp) +
>>> CHROMA_V_SHIFT(csp));
>>> CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL + sizeC *
>>> 2) * numInstances);
>>> }
>>> diff --git a/source/encoder/analysis.cpp b/source/encoder/analysis.cpp
>>> index aabf386ca..22a4ba74f 100644
>>> --- a/source/encoder/analysis.cpp
>>> +++ b/source/encoder/analysis.cpp
>>> @@ -220,6 +220,9 @@ Mode& Analysis::compressCTU(CUData& ctu, Frame&
>>> frame, const CUGeom& cuGeom, con
>>> if (m_param->analysisSave && !m_param->analysisLoad)
>>> for (int i = 0; i < X265_MAX_PRED_MODE_PER_CTU *
>>> numPredDir; i++)
>>> m_reuseRef[i] = -1;
>>> +
>>> + if (m_param->rc.cuTree)
>>> + m_reuseQP = &m_reuseInterDataCTU->cuQPOff[ctu.m_cuAddr *
>>> MAX_NUM_CU_GEOMS];
>>> }
>>> ProfileCUScope(ctu, totalCTUTime, totalCTUs);
>>>
>>> @@ -233,6 +236,8 @@ Mode& Analysis::compressCTU(CUData& ctu, Frame&
>>> frame, const CUGeom& cuGeom, con
>>> memcpy(ctu.m_partSize,
>>> &intraDataCTU->partSizes[ctu.m_cuAddr * numPartition], sizeof(char) *
>>> numPartition);
>>> memcpy(ctu.m_chromaIntraDir,
>>> &intraDataCTU->chromaModes[ctu.m_cuAddr * numPartition], sizeof(uint8_t) *
>>> numPartition);
>>> }
>>> + if (m_param->rc.cuTree && reuseLevel > 1 && reuseLevel < 10)
>>> + m_reuseQP = &intraDataCTU->cuQPOff[ctu.m_cuAddr *
>>> MAX_NUM_CU_GEOMS];
>>> compressIntraCU(ctu, cuGeom, qp);
>>> }
>>> else
>>> @@ -520,6 +525,9 @@ uint64_t Analysis::compressIntraCU(const CUData&
>>> parentCTU, const CUGeom& cuGeom
>>> bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
>>> bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
>>>
>>> + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 &&
>>> m_param->analysisSaveReuseLevel < 10)
>>> + m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp;
>>>
>> +
>>> bool bAlreadyDecided = m_param->intraRefine != 4 &&
>>> parentCTU.m_lumaIntraDir[cuGeom.absPartIdx] != (uint8_t)ALL_IDX &&
>>> !(m_param->bAnalysisType == HEVC_INFO);
>>> bool bDecidedDepth = m_param->intraRefine != 4 &&
>>> parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth;
>>> int split = 0;
>>> @@ -870,6 +878,9 @@ uint32_t Analysis::compressInterCU_dist(const
>>> CUData& parentCTU, const CUGeom& c
>>> uint32_t minDepth = m_param->rdLevel <= 4 ?
>>> topSkipMinDepth(parentCTU, cuGeom) : 0;
>>> uint32_t splitRefs[4] = { 0, 0, 0, 0 };
>>>
>>> + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 &&
>>> m_param->analysisSaveReuseLevel < 10)
>>> + m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp;
>>> +
>>> X265_CHECK(m_param->rdLevel >= 2, "compressInterCU_dist does not
>>> support RD 0 or 1\n");
>>>
>>> PMODE pmode(*this, cuGeom);
>>> @@ -1152,6 +1163,8 @@ SplitData Analysis::compressInterCU_rd0_4(const
>>> CUData& parentCTU, const CUGeom&
>>> uint32_t cuAddr = parentCTU.m_cuAddr;
>>> ModeDepth& md = m_modeDepth[depth];
>>>
>>> + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 &&
>>> m_param->analysisSaveReuseLevel < 10)
>>> + m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp;
>>>
>>> if (m_param->searchMethod == X265_SEA)
>>> {
>>> @@ -1856,6 +1869,9 @@ SplitData Analysis::compressInterCU_rd5_6(const
>>> CUData& parentCTU, const CUGeom&
>>> ModeDepth& md = m_modeDepth[depth];
>>> md.bestMode = NULL;
>>>
>>> + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 &&
>>> m_param->analysisSaveReuseLevel < 10)
>>> + m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp;
>>> +
>>> if (m_param->searchMethod == X265_SEA)
>>> {
>>> int numPredDir = m_slice->isInterP() ? 1 : 2;
>>> @@ -3647,11 +3663,20 @@ int Analysis::calculateQpforCuSize(const CUData&
>>> ctu, const CUGeom& cuGeom, int3
>>>
>>> if (m_param->analysisLoadReuseLevel >= 2 && m_param->rc.cuTree)
>>> {
>>> - int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) +
>>> cuGeom.absPartIdx;
>>> + int cuIdx;
>>> + int8_t cuQPOffSet = 0;
>>> +
>>> + if (m_param->scaleFactor == 2 ||
>>> m_param->analysisLoadReuseLevel == 10)
>>> + cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) +
>>> cuGeom.absPartIdx;
>>> + else
>>> + cuIdx = (ctu.m_cuAddr * MAX_NUM_CU_GEOMS) +
>>> cuGeom.geomRecurId;
>>> +
>>> if (ctu.m_slice->m_sliceType == I_SLICE)
>>> - return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
>>> (int32_t)(qp + 0.5 +
>>> ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx]));
>>> + cuQPOffSet =
>>> ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx];
>>> else
>>> - return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
>>> (int32_t)(qp + 0.5 +
>>> ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx]));
>>> + cuQPOffSet =
>>> ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx];
>>> +
>>> + return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
>>> (int32_t)(qp + 0.5 + cuQPOffSet));
>>> }
>>> if (m_param->rc.hevcAq)
>>> {
>>> diff --git a/source/encoder/analysis.h b/source/encoder/analysis.h
>>> index 3bcb56bc3..8d76d5c5e 100644
>>> --- a/source/encoder/analysis.h
>>> +++ b/source/encoder/analysis.h
>>> @@ -126,6 +126,7 @@ protected:
>>> int32_t* m_reuseRef;
>>> uint8_t* m_reuseDepth;
>>> uint8_t* m_reuseModes;
>>> + int8_t * m_reuseQP; // array of QP values for
>>> analysis reuse at reuse levels > 1 and < 10 when cutree is enabled
>>> uint8_t* m_reusePartSize;
>>> uint8_t* m_reuseMergeFlag;
>>> x265_analysis_MV* m_reuseMv[2];
>>> diff --git a/source/encoder/api.cpp b/source/encoder/api.cpp
>>> index a986355e0..2c90fe8f2 100644
>>> --- a/source/encoder/api.cpp
>>> +++ b/source/encoder/api.cpp
>>> @@ -825,7 +825,16 @@ void x265_alloc_analysis_data(x265_param *param,
>>> x265_analysis_data* analysis)
>>> CHECKED_MALLOC_ZERO(intraData->partSizes, char,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>> CHECKED_MALLOC_ZERO(intraData->chromaModes, uint8_t,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>> if (param->rc.cuTree)
>>> - CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>> + {
>>> + if (maxReuseLevel == 10)
>>> + {
>>> + CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>> + }
>>> + else
>>> + {
>>> + CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t,
>>> MAX_NUM_CU_GEOMS * analysis->numCUsInFrame);
>>> + }
>>> + }
>>> }
>>> analysis->intraData = intraData;
>>>
>>> @@ -837,7 +846,16 @@ void x265_alloc_analysis_data(x265_param *param,
>>> x265_analysis_data* analysis)
>>> CHECKED_MALLOC_ZERO(interData->modes, uint8_t,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>>
>>> if (param->rc.cuTree && !isMultiPassOpt)
>>> - CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>> + {
>>> + if (maxReuseLevel == 10)
>>> + {
>>> + CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>> + }
>>> + else
>>> + {
>>> + CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t,
>>> MAX_NUM_CU_GEOMS * analysis->numCUsInFrame);
>>>
>> [AM] Can't we share lowres cutree stats generated at qg size granularity?
>> Why MAX_NUM_CU_GEOMS combinations?
>>
>>> + }
>>> + }
>>> CHECKED_MALLOC_ZERO(interData->mvpIdx[0], uint8_t,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>> CHECKED_MALLOC_ZERO(interData->mvpIdx[1], uint8_t,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>> CHECKED_MALLOC_ZERO(interData->mv[0], x265_analysis_MV,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>> @@ -919,7 +937,9 @@ void x265_free_analysis_data(x265_param *param,
>>> x265_analysis_data* analysis)
>>> X265_FREE((analysis->intraData)->partSizes);
>>> X265_FREE((analysis->intraData)->chromaModes);
>>> if (param->rc.cuTree)
>>> - X265_FREE((analysis->intraData)->cuQPOff);
>>> + {
>>> + X265_FREE_ZERO((analysis->intraData)->cuQPOff);
>>> + }
>>> }
>>> X265_FREE(analysis->intraData);
>>> analysis->intraData = NULL;
>>> @@ -931,7 +951,9 @@ void x265_free_analysis_data(x265_param *param,
>>> x265_analysis_data* analysis)
>>> X265_FREE((analysis->interData)->depth);
>>> X265_FREE((analysis->interData)->modes);
>>> if (!isMultiPassOpt && param->rc.cuTree)
>>> + {
>>> X265_FREE((analysis->interData)->cuQPOff);
>>> + }
>>> X265_FREE((analysis->interData)->mvpIdx[0]);
>>> X265_FREE((analysis->interData)->mvpIdx[1]);
>>> X265_FREE((analysis->interData)->mv[0]);
>>> diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp
>>> index 1f710e1ce..5eb123d31 100644
>>> --- a/source/encoder/encoder.cpp
>>> +++ b/source/encoder/encoder.cpp
>>> @@ -4444,6 +4444,26 @@ void
>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>>> }
>>> }
>>> }
>>> +
>>> + int8_t *cuQPBuf = NULL, *cuQPOffSets = NULL;
>>> + uint32_t reuseBufSize = 0;
>>> +
>>> + if (m_param->rc.cuTree)
>>> + {
>>> + if (m_param->analysisLoadReuseLevel == 10)
>>> + reuseBufSize = depthBytes;
>>> + else if (m_param->analysisLoadReuseLevel > 1)
>>> + reuseBufSize = MAX_NUM_CU_GEOMS * analysis->numCUsInFrame;
>>> + cuQPBuf = X265_MALLOC(int8_t, reuseBufSize);
>>> + if (!m_param->bUseAnalysisFile)
>>> + {
>>> + if (analysis->sliceType == X265_TYPE_IDR ||
>>> analysis->sliceType == X265_TYPE_I)
>>> + cuQPOffSets = intraPic->cuQPOff;
>>> + else
>>> + cuQPOffSets = interPic->cuQPOff;
>>> + }
>>> + }
>>> +
>>> if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType ==
>>> X265_TYPE_I)
>>> {
>>> if (m_param->bAnalysisType == HEVC_INFO)
>>> @@ -4452,19 +4472,21 @@ void
>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>>> return;
>>>
>>> uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
>>> *partSizes = NULL;
>>> - int8_t *cuQPBuf = NULL;
>>>
>>> tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);
>>> depthBuf = tempBuf;
>>> modeBuf = tempBuf + depthBytes;
>>> partSizes = tempBuf + 2 * depthBytes;
>>> - if (m_param->rc.cuTree)
>>> - cuQPBuf = X265_MALLOC(int8_t, depthBytes);
>>>
>>> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
>>> m_analysisFileIn, intraPic->depth);
>>> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
>>> m_analysisFileIn, intraPic->chromaModes);
>>> X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
>>> m_analysisFileIn, intraPic->partSizes);
>>> - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
>>> depthBytes, m_analysisFileIn, intraPic->cuQPOff); }
>>> + if (m_param->rc.cuTree)
>>> + {
>>> + X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize,
>>> m_analysisFileIn, cuQPOffSets);
>>> + if (m_param->analysisLoadReuseLevel > 1 &&
>>> m_param->analysisLoadReuseLevel < 10)
>>> + memcpy(analysis->intraData->cuQPOff, cuQPBuf,
>>> sizeof(int8_t) * reuseBufSize);
>>> + }
>>>
>>> size_t count = 0;
>>> for (uint32_t d = 0; d < depthBytes; d++)
>>> @@ -4480,7 +4502,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>>> analysis, int curPoc, const x
>>> memset(&(analysis->intraData)->depth[count], depthBuf[d],
>>> bytes);
>>> memset(&(analysis->intraData)->chromaModes[count],
>>> modeBuf[d], bytes);
>>> memset(&(analysis->intraData)->partSizes[count],
>>> partSizes[d], bytes);
>>> - if (m_param->rc.cuTree)
>>> + if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel
>>> == 10)
>>> memset(&(analysis->intraData)->cuQPOff[count],
>>> cuQPBuf[d], bytes);
>>> count += bytes;
>>> }
>>> @@ -4515,7 +4537,6 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>>> analysis, int curPoc, const x
>>> uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];
>>> MV* mv[2];
>>> int8_t* refIdx[2];
>>> - int8_t* cuQPBuf = NULL;
>>>
>>> int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2;
>>> bool bIntraInInter = false;
>>> @@ -4535,12 +4556,15 @@ void
>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>>> tempBuf = X265_MALLOC(uint8_t, depthBytes * numBuf);
>>> depthBuf = tempBuf;
>>> modeBuf = tempBuf + depthBytes;
>>> - if (m_param->rc.cuTree)
>>> - cuQPBuf = X265_MALLOC(int8_t, depthBytes);
>>>
>>> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
>>> m_analysisFileIn, interPic->depth);
>>> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
>>> m_analysisFileIn, interPic->modes);
>>> - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf,
>>> sizeof(int8_t), depthBytes, m_analysisFileIn, interPic->cuQPOff); }
>>> + if (m_param->rc.cuTree)
>>> + {
>>> + X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize,
>>> m_analysisFileIn, cuQPOffSets);
>>> + if (m_param->analysisLoadReuseLevel > 1 &&
>>> m_param->analysisLoadReuseLevel < 10)
>>> + memcpy(analysis->interData->cuQPOff, cuQPBuf,
>>> sizeof(int8_t) * reuseBufSize);
>>> + }
>>>
>>> if (m_param->analysisLoadReuseLevel > 4)
>>> {
>>> @@ -4578,7 +4602,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>>> analysis, int curPoc, const x
>>> depthBuf[d] = 1;
>>> memset(&(analysis->interData)->depth[count],
>>> depthBuf[d], bytes);
>>> memset(&(analysis->interData)->modes[count],
>>> modeBuf[d], bytes);
>>> - if (m_param->rc.cuTree)
>>> + if (m_param->rc.cuTree &&
>>> m_param->analysisLoadReuseLevel == 10)
>>> memset(&(analysis->interData)->cuQPOff[count],
>>> cuQPBuf[d], bytes);
>>> if (m_param->analysisLoadReuseLevel > 4)
>>> {
>>> @@ -4736,7 +4760,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>>> analysis, int curPoc, const x
>>> int numPartitions = analysis->numPartitions;
>>> int numCUsInFrame = analysis->numCUsInFrame;
>>> int numCuInHeight = analysis->numCuInHeight;
>>> - /* Allocate memory for scaled resoultion's numPartitions and
>>> numCUsInFrame*/
>>> + /* Allocate memory for scaled resolution's numPartitions and
>>> numCUsInFrame */
>>> analysis->numPartitions = m_param->num4x4Partitions;
>>> analysis->numCUsInFrame = cuLoc.heightInCU * cuLoc.widthInCU;
>>> analysis->numCuInHeight = cuLoc.heightInCU;
>>> @@ -4808,25 +4832,40 @@ void
>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>>> X265_FREE(vbvCostBuf);
>>> }
>>>
>>> + uint32_t reuseBufSize = 0;
>>> + int8_t *cuQPOffSets = NULL, *cuQPBuf = NULL;
>>> + if (m_param->rc.cuTree)
>>> + {
>>> + if (m_param->analysisLoadReuseLevel == 10)
>>> + reuseBufSize = depthBytes;
>>> + else if (m_param->analysisLoadReuseLevel > 1)
>>> + reuseBufSize = (MAX_NUM_CU_GEOMS / factor) *
>>> (analysis->numCUsInFrame);
>>> + cuQPBuf = X265_MALLOC(int8_t, reuseBufSize);
>>> + if (!m_param->bUseAnalysisFile)
>>> + {
>>> + if (analysis->sliceType == X265_TYPE_IDR ||
>>> analysis->sliceType == X265_TYPE_I)
>>> + cuQPOffSets = intraPic->cuQPOff;
>>> + else
>>> + cuQPOffSets = interPic->cuQPOff;
>>> + }
>>> + }
>>> +
>>> if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType ==
>>> X265_TYPE_I)
>>> {
>>> if (m_param->analysisLoadReuseLevel < 2)
>>> return;
>>>
>>> uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
>>> *partSizes = NULL;
>>> - int8_t *cuQPBuf = NULL;
>>>
>>> tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);
>>> depthBuf = tempBuf;
>>> modeBuf = tempBuf + depthBytes;
>>> partSizes = tempBuf + 2 * depthBytes;
>>> - if (m_param->rc.cuTree)
>>> - cuQPBuf = X265_MALLOC(int8_t, depthBytes);
>>>
>>> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
>>> m_analysisFileIn, intraPic->depth);
>>> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
>>> m_analysisFileIn, intraPic->chromaModes);
>>> X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
>>> m_analysisFileIn, intraPic->partSizes);
>>> - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
>>> depthBytes, m_analysisFileIn, intraPic->cuQPOff); }
>>> + if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
>>> reuseBufSize, m_analysisFileIn, cuQPOffSets); }
>>>
>>> uint32_t count = 0;
>>> for (uint32_t d = 0; d < depthBytes; d++)
>>> @@ -4848,7 +4887,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>>> analysis, int curPoc, const x
>>> memset(&(analysis->intraData)->depth[count],
>>> depthBuf[d], bytes);
>>> memset(&(analysis->intraData)->chromaModes[count],
>>> modeBuf[d], bytes);
>>> memset(&(analysis->intraData)->partSizes[count],
>>> partSizes[d], bytes);
>>> - if (m_param->rc.cuTree)
>>> + if (m_param->rc.cuTree &&
>>> m_param->analysisLoadReuseLevel == 10)
>>> memset(&(analysis->intraData)->cuQPOff[count],
>>> cuQPBuf[d], bytes);
>>> count += bytes;
>>> d += getCUIndex(&cuLoc, &count, bytes, 1);
>>> @@ -4886,7 +4925,6 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>>> analysis, int curPoc, const x
>>> uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];
>>> MV* mv[2];
>>> int8_t* refIdx[2];
>>> - int8_t* cuQPBuf = NULL;
>>>
>>> int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2;
>>> bool bIntraInInter = false;
>>> @@ -4900,12 +4938,16 @@ void
>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>>> tempBuf = X265_MALLOC(uint8_t, depthBytes * numBuf);
>>> depthBuf = tempBuf;
>>> modeBuf = tempBuf + depthBytes;
>>> - if (m_param->rc.cuTree)
>>> - cuQPBuf = X265_MALLOC(int8_t, depthBytes);
>>>
>>> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
>>> m_analysisFileIn, interPic->depth);
>>> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
>>> m_analysisFileIn, interPic->modes);
>>> - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
>>> depthBytes, m_analysisFileIn, interPic->cuQPOff); }
>>> + if (m_param->rc.cuTree)
>>> + {
>>> + X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize,
>>> m_analysisFileIn, cuQPOffSets);
>>> + if (m_param->analysisLoadReuseLevel > 1 &&
>>> m_param->analysisLoadReuseLevel < 10)
>>> + memcpy(&(analysis->interData)->cuQPOff, cuQPBuf,
>>> sizeof(int8_t) * reuseBufSize);
>>> + }
>>> +
>>> if (m_param->analysisLoadReuseLevel > 4)
>>> {
>>> partSize = modeBuf + depthBytes;
>>> @@ -4954,7 +4996,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>>> analysis, int curPoc, const x
>>> {
>>> memset(&(analysis->interData)->depth[count],
>>> writeDepth, bytes);
>>> memset(&(analysis->interData)->modes[count],
>>> modeBuf[d], bytes);
>>> - if (m_param->rc.cuTree)
>>> + if (m_param->rc.cuTree &&
>>> m_param->analysisLoadReuseLevel == 10)
>>> memset(&(analysis->interData)->cuQPOff[count],
>>> cuQPBuf[d], bytes);
>>> if (m_param->analysisLoadReuseLevel == 10 &&
>>> bIntraInInter)
>>> memset(&(analysis->intraData)->chromaModes[count],
>>> chromaDir[d], bytes);
>>> @@ -5046,7 +5088,9 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>>> analysis, int curPoc, const x
>>> }
>>> }
>>> else
>>> + {
>>> X265_FREAD((analysis->interData)->ref, sizeof(int32_t),
>>> analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir,
>>> m_analysisFileIn, interPic->ref);
>>> + }
>>>
>>> consumedBytes += frameRecordSize;
>>> if (numDir == 1)
>>> @@ -5510,9 +5554,10 @@ void
>>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>>> analysis->frameRecordSize += analysis->numCUsInFrame *
>>> sizeof(sse_t);
>>> }
>>>
>>> + uint32_t reuseQPBufsize = 0;
>>> if (m_param->analysisSaveReuseLevel > 1)
>>> {
>>> -
>>> + reuseQPBufsize = MAX_NUM_CU_GEOMS * analysis->numCUsInFrame;
>>> if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType
>>> == X265_TYPE_I)
>>> {
>>> for (uint32_t cuAddr = 0; cuAddr < analysis->numCUsInFrame;
>>> cuAddr++)
>>> @@ -5536,12 +5581,21 @@ void
>>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>>> partSize = ctu->m_partSize[absPartIdx];
>>> intraDataCTU->partSizes[depthBytes] = partSize;
>>>
>>> - if (m_param->rc.cuTree)
>>> + if (m_param->rc.cuTree &&
>>> m_param->analysisSaveReuseLevel == 10)
>>> intraDataCTU->cuQPOff[depthBytes] =
>>> (int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP);
>>> absPartIdx += ctu->m_numPartitions >> (depth * 2);
>>> }
>>> +
>>> + if (m_param->rc.cuTree &&
>>> m_param->analysisSaveReuseLevel < 10)
>>> + {
>>> + uint32_t nextCuIdx = (cuAddr + 1) *
>>> MAX_NUM_CU_GEOMS;
>>> + for (uint32_t i = cuAddr * MAX_NUM_CU_GEOMS; i <
>>> nextCuIdx; i++)
>>> + intraDataCTU->cuQPOff[i] =
>>> (int8_t)(intraDataCTU->cuQPOff[i] - baseQP);
>>> + }
>>> memcpy(&intraDataCTU->modes[ctu->m_cuAddr *
>>> ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)*
>>> ctu->m_numPartitions);
>>> }
>>> + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel
>>> == 10)
>>> + reuseQPBufsize = depthBytes;
>>> }
>>> else
>>> {
>>> @@ -5567,7 +5621,7 @@ void
>>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>>> predMode = 4; // used as indicator if the block
>>> is coded as bidir
>>>
>>> interDataCTU->modes[depthBytes] = predMode;
>>> - if (m_param->rc.cuTree)
>>> + if (m_param->rc.cuTree &&
>>> m_param->analysisSaveReuseLevel == 10)
>>> interDataCTU->cuQPOff[depthBytes] =
>>> (int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP);
>>>
>>> if (m_param->analysisSaveReuseLevel > 4)
>>> @@ -5599,13 +5653,23 @@ void
>>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>>> }
>>> absPartIdx += ctu->m_numPartitions >> (depth * 2);
>>> }
>>> +
>>> + if (m_param->rc.cuTree &&
>>> m_param->analysisSaveReuseLevel < 10)
>>> + {
>>> + uint32_t nextCuIdx = (cuAddr + 1) *
>>> MAX_NUM_CU_GEOMS;
>>> + for (uint32_t i = cuAddr * MAX_NUM_CU_GEOMS; i <
>>> nextCuIdx ; i++)
>>> + interDataCTU->cuQPOff[i] =
>>> (int8_t)(interDataCTU->cuQPOff[i] - baseQP);
>>> + }
>>> +
>>> if (m_param->analysisSaveReuseLevel == 10 &&
>>> bIntraInInter)
>>> memcpy(&intraDataCTU->modes[ctu->m_cuAddr *
>>> ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)*
>>> ctu->m_numPartitions);
>>> }
>>> + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel
>>> == 10)
>>> + reuseQPBufsize = depthBytes;
>>> }
>>>
>>> if ((analysis->sliceType == X265_TYPE_IDR ||
>>> analysis->sliceType == X265_TYPE_I) && m_param->rc.cuTree)
>>> - analysis->frameRecordSize += sizeof(uint8_t)*
>>> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 +
>>> (sizeof(int8_t) * depthBytes);
>>> + analysis->frameRecordSize += sizeof(uint8_t)*
>>> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 +
>>> (sizeof(int8_t) * reuseQPBufsize);
>>> else if (analysis->sliceType == X265_TYPE_IDR ||
>>> analysis->sliceType == X265_TYPE_I)
>>> analysis->frameRecordSize += sizeof(uint8_t)*
>>> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3;
>>> else
>>> @@ -5613,7 +5677,8 @@ void
>>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>>> /* Add sizeof depth, modes, partSize, cuQPOffset, mergeFlag
>>> */
>>> analysis->frameRecordSize += depthBytes * 2;
>>> if (m_param->rc.cuTree)
>>> - analysis->frameRecordSize += (sizeof(int8_t) * depthBytes);
>>> + analysis->frameRecordSize += (sizeof(int8_t) *
>>> reuseQPBufsize);
>>> +
>>> if (m_param->analysisSaveReuseLevel > 4)
>>> analysis->frameRecordSize += (depthBytes * 2);
>>>
>>> @@ -5669,7 +5734,7 @@ void
>>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>>> X265_FWRITE((analysis->intraData)->chromaModes,
>>> sizeof(uint8_t), depthBytes, m_analysisFileOut);
>>> X265_FWRITE((analysis->intraData)->partSizes, sizeof(char),
>>> depthBytes, m_analysisFileOut);
>>> if (m_param->rc.cuTree)
>>> - X265_FWRITE((analysis->intraData)->cuQPOff, sizeof(int8_t),
>>> depthBytes, m_analysisFileOut);
>>> + X265_FWRITE((analysis->intraData)->cuQPOff, sizeof(int8_t),
>>> reuseQPBufsize, m_analysisFileOut);
>>> X265_FWRITE((analysis->intraData)->modes, sizeof(uint8_t),
>>> analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileOut);
>>> }
>>> else
>>> @@ -5677,7 +5742,7 @@ void
>>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>>> X265_FWRITE((analysis->interData)->depth, sizeof(uint8_t),
>>> depthBytes, m_analysisFileOut);
>>> X265_FWRITE((analysis->interData)->modes, sizeof(uint8_t),
>>> depthBytes, m_analysisFileOut);
>>> if (m_param->rc.cuTree)
>>> - X265_FWRITE((analysis->interData)->cuQPOff, sizeof(int8_t),
>>> depthBytes, m_analysisFileOut);
>>> + X265_FWRITE((analysis->interData)->cuQPOff, sizeof(int8_t),
>>> reuseQPBufsize, m_analysisFileOut);
>>> if (m_param->analysisSaveReuseLevel > 4)
>>> {
>>> X265_FWRITE((analysis->interData)->partSize,
>>> sizeof(uint8_t), depthBytes, m_analysisFileOut);
>>> @@ -5762,7 +5827,7 @@ void
>>> Encoder::writeAnalysisFileRefine(x265_analysis_data* analysis, FrameData &c
>>> interData->mv[1][depthBytes].word =
>>> ctu->m_mv[1][absPartIdx].word;
>>> interData->mvpIdx[1][depthBytes] =
>>> ctu->m_mvpIdx[1][absPartIdx];
>>> ref[1][depthBytes] = ctu->m_refIdx[1][absPartIdx];
>>> - predMode = 4; // used as indiacator if the block is
>>> coded as bidir
>>> + predMode = 4; // used as indicator if the block is
>>> coded as bidir
>>> }
>>> interData->modes[depthBytes] = predMode;
>>>
>>> diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
>>> index 0adb0d0db..3bc01268b 100644
>>> --- a/source/encoder/slicetype.cpp
>>> +++ b/source/encoder/slicetype.cpp
>>> @@ -1894,7 +1894,7 @@ void Lookahead::slicetypeAnalyse(Lowres **frames,
>>> bool bKeyframe)
>>>
>>> if (!framecnt)
>>> {
>>> - if (m_param->rc.cuTree)
>>> + if (m_param->rc.cuTree && !m_param->analysisLoad)
>>>
>> [AM] Won't this implicitly turn OFF cutree at reuse-level 1?
>>
>>> cuTree(frames, 0, bKeyframe);
>>> return;
>>> }
>>> diff --git a/source/x265.h b/source/x265.h
>>> index f44040ba7..8d7a75826 100644
>>> --- a/source/x265.h
>>> +++ b/source/x265.h
>>> @@ -144,7 +144,7 @@ typedef struct x265_analysis_intra_data
>>> uint8_t* modes;
>>> char* partSizes;
>>> uint8_t* chromaModes;
>>> - int8_t* cuQPOff;
>>> + int8_t* cuQPOff;
>>> }x265_analysis_intra_data;
>>>
>>> typedef struct x265_analysis_MV
>>> @@ -167,7 +167,7 @@ typedef struct x265_analysis_inter_data
>>> uint8_t* interDir;
>>> uint8_t* mvpIdx[2];
>>> int8_t* refIdx[2];
>>> - x265_analysis_MV* mv[2];
>>> + x265_analysis_MV* mv[2];
>>> int64_t* sadCost;
>>> int8_t* cuQPOff;
>>> }x265_analysis_inter_data;
>>> --
>>> 2.20.1.windows.1
>>>
>>>
>>> --
>>> *With Regards,*
>>> *Srikanth Kurapati.*
>>> _______________________________________________
>>> x265-devel mailing list
>>> x265-devel at videolan.org
>>> https://mailman.videolan.org/listinfo/x265-devel
>>>
>>
>>
>> --
>> Regards,
>> *Aruna Matheswaran,*
>> Video Codec Engineer,
>> Media & AI analytics BU,
>>
>>
>>
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>
>
> --
> *With Regards,*
> *Srikanth Kurapati.*
>
--
*With Regards,*
*Srikanth Kurapati.*
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20210121/96549d17/attachment-0001.html>
More information about the x265-devel
mailing list