[x265] [Patch] fix: corrects output mismatch for cutree enabled analysis save/load enodes with reuse-levels in between 1 to 10 for similar encoder settings.
Srikanth Kurapati
srikanth.kurapati at multicorewareinc.com
Thu Jan 21 10:39:34 UTC 2021
[AM] Can't we share lowres cutree stats generated at qg size granularity?
Why MAX_NUM_CU_GEOMS combinations?
[KS] If we share like that then we will have to calculate the dqp per cu at
analysis phase just like save encode and we will not get the savings in cpu
cycles there. Currently we are storing the final dqp derived from lowres mv
costs at qg size granularity by taking the difference between the final qp
and base qp per slice.
MAX_NUM_CU_GEOMS is 85 = ( 1 + 4 + 16 + 64 ) this is maximum number of
partitions at which qp can be computed and used in a ctu.
[AM] Won't this implicitly turn OFF cutree at reuse-level 1?
[KS] Agreed and addressed.
On Tue, Jan 19, 2021 at 11:12 PM Aruna Matheswaran <
aruna at multicorewareinc.com> wrote:
>
>
> On Mon, Jan 11, 2021 at 8:08 PM Srikanth Kurapati <
> srikanth.kurapati at multicorewareinc.com> wrote:
>
>> From d516d0564888e154d88d89320302725d87bfab78 Mon Sep 17 00:00:00 2001
>> From: Srikanth Kurapati <srikanth.kurapati at multicorewareinc.com>
>> Date: Wed, 30 Dec 2020 17:00:08 +0530
>> Subject: [PATCH] fix: corrects output mismatch for cutree enabled analysis
>> save/load enodes with reuse-levels in between 1 to 10 for similar encoder
>> settings.
>>
>> ---
>> source/abrEncApp.cpp | 14 +++-
>> source/common/common.h | 3 +-
>> source/common/cudata.h | 2 +-
>> source/encoder/analysis.cpp | 31 ++++++++-
>> source/encoder/analysis.h | 1 +
>> source/encoder/api.cpp | 28 +++++++-
>> source/encoder/encoder.cpp | 123 ++++++++++++++++++++++++++---------
>> source/encoder/slicetype.cpp | 2 +-
>> source/x265.h | 4 +-
>> 9 files changed, 166 insertions(+), 42 deletions(-)
>>
>> diff --git a/source/abrEncApp.cpp b/source/abrEncApp.cpp
>> index fa62ebf63..ea255e3f6 100644
>> --- a/source/abrEncApp.cpp
>> +++ b/source/abrEncApp.cpp
>> @@ -340,7 +340,12 @@ namespace X265_NS {
>> memcpy(intraDst->partSizes, intraSrc->partSizes,
>> sizeof(char) * src->depthBytes);
>> memcpy(intraDst->chromaModes, intraSrc->chromaModes,
>> sizeof(uint8_t) * src->depthBytes);
>> if (m_param->rc.cuTree)
>> - memcpy(intraDst->cuQPOff, intraSrc->cuQPOff,
>> sizeof(int8_t) * src->depthBytes);
>> + {
>> + if (m_param->analysisSaveReuseLevel == 10)
>> + memcpy(intraDst->cuQPOff, intraSrc->cuQPOff,
>> sizeof(int8_t) * src->depthBytes);
>> + else
>> + memcpy(intraDst->cuQPOff, intraSrc->cuQPOff,
>> sizeof(int8_t) * (src->numCUsInFrame * MAX_NUM_CU_GEOMS));
>> + }
>> }
>> else
>> {
>> @@ -355,7 +360,12 @@ namespace X265_NS {
>> memcpy(interDst->depth, interSrc->depth, sizeof(uint8_t) *
>> src->depthBytes);
>> memcpy(interDst->modes, interSrc->modes, sizeof(uint8_t) *
>> src->depthBytes);
>> if (m_param->rc.cuTree)
>> - memcpy(interDst->cuQPOff, interSrc->cuQPOff,
>> sizeof(int8_t) * src->depthBytes);
>> + {
>> + if (m_param->analysisReuseLevel == 10)
>> + memcpy(interDst->cuQPOff, interSrc->cuQPOff,
>> sizeof(int8_t) * src->depthBytes);
>> + else
>> + memcpy(interDst->cuQPOff, interSrc->cuQPOff,
>> sizeof(int8_t) * (src->numCUsInFrame * MAX_NUM_CU_GEOMS));
>> + }
>> if (m_param->analysisSaveReuseLevel > 4)
>> {
>> memcpy(interDst->partSize, interSrc->partSize,
>> sizeof(uint8_t) * src->depthBytes);
>> diff --git a/source/common/common.h b/source/common/common.h
>> index 8c06cd79e..0ffbf17eb 100644
>> --- a/source/common/common.h
>> +++ b/source/common/common.h
>> @@ -326,7 +326,8 @@ typedef int16_t coeff_t; // transform
>> coefficient
>>
>> #define CHROMA_H_SHIFT(x) (x == X265_CSP_I420 || x == X265_CSP_I422)
>> #define CHROMA_V_SHIFT(x) (x == X265_CSP_I420)
>> -#define X265_MAX_PRED_MODE_PER_CTU 85 * 2 * 8
>> +#define MAX_NUM_CU_GEOMS 85
>> +#define X265_MAX_PRED_MODE_PER_CTU MAX_NUM_CU_GEOMS * 2 * 8
>>
>> #define MAX_NUM_TR_COEFFS MAX_TR_SIZE * MAX_TR_SIZE // Maximum
>> number of transform coefficients, for a 32x32 transform
>> #define MAX_NUM_TR_CATEGORIES 16 // 32, 16,
>> 8, 4 transform categories each for luma and chroma
>> diff --git a/source/common/cudata.h b/source/common/cudata.h
>> index 8397f0568..c7d9a1972 100644
>> --- a/source/common/cudata.h
>> +++ b/source/common/cudata.h
>> @@ -371,7 +371,7 @@ struct CUDataMemPool
>> CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL) *
>> numInstances);
>> }
>> else
>> - {
>> + {
>> uint32_t sizeC = sizeL >> (CHROMA_H_SHIFT(csp) +
>> CHROMA_V_SHIFT(csp));
>> CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL + sizeC * 2)
>> * numInstances);
>> }
>> diff --git a/source/encoder/analysis.cpp b/source/encoder/analysis.cpp
>> index aabf386ca..22a4ba74f 100644
>> --- a/source/encoder/analysis.cpp
>> +++ b/source/encoder/analysis.cpp
>> @@ -220,6 +220,9 @@ Mode& Analysis::compressCTU(CUData& ctu, Frame&
>> frame, const CUGeom& cuGeom, con
>> if (m_param->analysisSave && !m_param->analysisLoad)
>> for (int i = 0; i < X265_MAX_PRED_MODE_PER_CTU * numPredDir;
>> i++)
>> m_reuseRef[i] = -1;
>> +
>> + if (m_param->rc.cuTree)
>> + m_reuseQP = &m_reuseInterDataCTU->cuQPOff[ctu.m_cuAddr *
>> MAX_NUM_CU_GEOMS];
>> }
>> ProfileCUScope(ctu, totalCTUTime, totalCTUs);
>>
>> @@ -233,6 +236,8 @@ Mode& Analysis::compressCTU(CUData& ctu, Frame&
>> frame, const CUGeom& cuGeom, con
>> memcpy(ctu.m_partSize, &intraDataCTU->partSizes[ctu.m_cuAddr
>> * numPartition], sizeof(char) * numPartition);
>> memcpy(ctu.m_chromaIntraDir,
>> &intraDataCTU->chromaModes[ctu.m_cuAddr * numPartition], sizeof(uint8_t) *
>> numPartition);
>> }
>> + if (m_param->rc.cuTree && reuseLevel > 1 && reuseLevel < 10)
>> + m_reuseQP = &intraDataCTU->cuQPOff[ctu.m_cuAddr *
>> MAX_NUM_CU_GEOMS];
>> compressIntraCU(ctu, cuGeom, qp);
>> }
>> else
>> @@ -520,6 +525,9 @@ uint64_t Analysis::compressIntraCU(const CUData&
>> parentCTU, const CUGeom& cuGeom
>> bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
>> bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
>>
>> + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 &&
>> m_param->analysisSaveReuseLevel < 10)
>> + m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp;
>>
> +
>> bool bAlreadyDecided = m_param->intraRefine != 4 &&
>> parentCTU.m_lumaIntraDir[cuGeom.absPartIdx] != (uint8_t)ALL_IDX &&
>> !(m_param->bAnalysisType == HEVC_INFO);
>> bool bDecidedDepth = m_param->intraRefine != 4 &&
>> parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth;
>> int split = 0;
>> @@ -870,6 +878,9 @@ uint32_t Analysis::compressInterCU_dist(const CUData&
>> parentCTU, const CUGeom& c
>> uint32_t minDepth = m_param->rdLevel <= 4 ?
>> topSkipMinDepth(parentCTU, cuGeom) : 0;
>> uint32_t splitRefs[4] = { 0, 0, 0, 0 };
>>
>> + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 &&
>> m_param->analysisSaveReuseLevel < 10)
>> + m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp;
>> +
>> X265_CHECK(m_param->rdLevel >= 2, "compressInterCU_dist does not
>> support RD 0 or 1\n");
>>
>> PMODE pmode(*this, cuGeom);
>> @@ -1152,6 +1163,8 @@ SplitData Analysis::compressInterCU_rd0_4(const
>> CUData& parentCTU, const CUGeom&
>> uint32_t cuAddr = parentCTU.m_cuAddr;
>> ModeDepth& md = m_modeDepth[depth];
>>
>> + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 &&
>> m_param->analysisSaveReuseLevel < 10)
>> + m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp;
>>
>> if (m_param->searchMethod == X265_SEA)
>> {
>> @@ -1856,6 +1869,9 @@ SplitData Analysis::compressInterCU_rd5_6(const
>> CUData& parentCTU, const CUGeom&
>> ModeDepth& md = m_modeDepth[depth];
>> md.bestMode = NULL;
>>
>> + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 &&
>> m_param->analysisSaveReuseLevel < 10)
>> + m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp;
>> +
>> if (m_param->searchMethod == X265_SEA)
>> {
>> int numPredDir = m_slice->isInterP() ? 1 : 2;
>> @@ -3647,11 +3663,20 @@ int Analysis::calculateQpforCuSize(const CUData&
>> ctu, const CUGeom& cuGeom, int3
>>
>> if (m_param->analysisLoadReuseLevel >= 2 && m_param->rc.cuTree)
>> {
>> - int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) +
>> cuGeom.absPartIdx;
>> + int cuIdx;
>> + int8_t cuQPOffSet = 0;
>> +
>> + if (m_param->scaleFactor == 2 || m_param->analysisLoadReuseLevel
>> == 10)
>> + cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) +
>> cuGeom.absPartIdx;
>> + else
>> + cuIdx = (ctu.m_cuAddr * MAX_NUM_CU_GEOMS) +
>> cuGeom.geomRecurId;
>> +
>> if (ctu.m_slice->m_sliceType == I_SLICE)
>> - return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
>> (int32_t)(qp + 0.5 +
>> ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx]));
>> + cuQPOffSet =
>> ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx];
>> else
>> - return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
>> (int32_t)(qp + 0.5 +
>> ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx]));
>> + cuQPOffSet =
>> ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx];
>> +
>> + return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
>> (int32_t)(qp + 0.5 + cuQPOffSet));
>> }
>> if (m_param->rc.hevcAq)
>> {
>> diff --git a/source/encoder/analysis.h b/source/encoder/analysis.h
>> index 3bcb56bc3..8d76d5c5e 100644
>> --- a/source/encoder/analysis.h
>> +++ b/source/encoder/analysis.h
>> @@ -126,6 +126,7 @@ protected:
>> int32_t* m_reuseRef;
>> uint8_t* m_reuseDepth;
>> uint8_t* m_reuseModes;
>> + int8_t * m_reuseQP; // array of QP values for
>> analysis reuse at reuse levels > 1 and < 10 when cutree is enabled
>> uint8_t* m_reusePartSize;
>> uint8_t* m_reuseMergeFlag;
>> x265_analysis_MV* m_reuseMv[2];
>> diff --git a/source/encoder/api.cpp b/source/encoder/api.cpp
>> index a986355e0..2c90fe8f2 100644
>> --- a/source/encoder/api.cpp
>> +++ b/source/encoder/api.cpp
>> @@ -825,7 +825,16 @@ void x265_alloc_analysis_data(x265_param *param,
>> x265_analysis_data* analysis)
>> CHECKED_MALLOC_ZERO(intraData->partSizes, char,
>> analysis->numPartitions * analysis->numCUsInFrame);
>> CHECKED_MALLOC_ZERO(intraData->chromaModes, uint8_t,
>> analysis->numPartitions * analysis->numCUsInFrame);
>> if (param->rc.cuTree)
>> - CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t,
>> analysis->numPartitions * analysis->numCUsInFrame);
>> + {
>> + if (maxReuseLevel == 10)
>> + {
>> + CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t,
>> analysis->numPartitions * analysis->numCUsInFrame);
>> + }
>> + else
>> + {
>> + CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t,
>> MAX_NUM_CU_GEOMS * analysis->numCUsInFrame);
>> + }
>> + }
>> }
>> analysis->intraData = intraData;
>>
>> @@ -837,7 +846,16 @@ void x265_alloc_analysis_data(x265_param *param,
>> x265_analysis_data* analysis)
>> CHECKED_MALLOC_ZERO(interData->modes, uint8_t,
>> analysis->numPartitions * analysis->numCUsInFrame);
>>
>> if (param->rc.cuTree && !isMultiPassOpt)
>> - CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t,
>> analysis->numPartitions * analysis->numCUsInFrame);
>> + {
>> + if (maxReuseLevel == 10)
>> + {
>> + CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t,
>> analysis->numPartitions * analysis->numCUsInFrame);
>> + }
>> + else
>> + {
>> + CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t,
>> MAX_NUM_CU_GEOMS * analysis->numCUsInFrame);
>>
> [AM] Can't we share lowres cutree stats generated at qg size granularity?
> Why MAX_NUM_CU_GEOMS combinations?
>
>> + }
>> + }
>> CHECKED_MALLOC_ZERO(interData->mvpIdx[0], uint8_t,
>> analysis->numPartitions * analysis->numCUsInFrame);
>> CHECKED_MALLOC_ZERO(interData->mvpIdx[1], uint8_t,
>> analysis->numPartitions * analysis->numCUsInFrame);
>> CHECKED_MALLOC_ZERO(interData->mv[0], x265_analysis_MV,
>> analysis->numPartitions * analysis->numCUsInFrame);
>> @@ -919,7 +937,9 @@ void x265_free_analysis_data(x265_param *param,
>> x265_analysis_data* analysis)
>> X265_FREE((analysis->intraData)->partSizes);
>> X265_FREE((analysis->intraData)->chromaModes);
>> if (param->rc.cuTree)
>> - X265_FREE((analysis->intraData)->cuQPOff);
>> + {
>> + X265_FREE_ZERO((analysis->intraData)->cuQPOff);
>> + }
>> }
>> X265_FREE(analysis->intraData);
>> analysis->intraData = NULL;
>> @@ -931,7 +951,9 @@ void x265_free_analysis_data(x265_param *param,
>> x265_analysis_data* analysis)
>> X265_FREE((analysis->interData)->depth);
>> X265_FREE((analysis->interData)->modes);
>> if (!isMultiPassOpt && param->rc.cuTree)
>> + {
>> X265_FREE((analysis->interData)->cuQPOff);
>> + }
>> X265_FREE((analysis->interData)->mvpIdx[0]);
>> X265_FREE((analysis->interData)->mvpIdx[1]);
>> X265_FREE((analysis->interData)->mv[0]);
>> diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp
>> index 1f710e1ce..5eb123d31 100644
>> --- a/source/encoder/encoder.cpp
>> +++ b/source/encoder/encoder.cpp
>> @@ -4444,6 +4444,26 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>> analysis, int curPoc, const x
>> }
>> }
>> }
>> +
>> + int8_t *cuQPBuf = NULL, *cuQPOffSets = NULL;
>> + uint32_t reuseBufSize = 0;
>> +
>> + if (m_param->rc.cuTree)
>> + {
>> + if (m_param->analysisLoadReuseLevel == 10)
>> + reuseBufSize = depthBytes;
>> + else if (m_param->analysisLoadReuseLevel > 1)
>> + reuseBufSize = MAX_NUM_CU_GEOMS * analysis->numCUsInFrame;
>> + cuQPBuf = X265_MALLOC(int8_t, reuseBufSize);
>> + if (!m_param->bUseAnalysisFile)
>> + {
>> + if (analysis->sliceType == X265_TYPE_IDR ||
>> analysis->sliceType == X265_TYPE_I)
>> + cuQPOffSets = intraPic->cuQPOff;
>> + else
>> + cuQPOffSets = interPic->cuQPOff;
>> + }
>> + }
>> +
>> if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType ==
>> X265_TYPE_I)
>> {
>> if (m_param->bAnalysisType == HEVC_INFO)
>> @@ -4452,19 +4472,21 @@ void
>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>> return;
>>
>> uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
>> *partSizes = NULL;
>> - int8_t *cuQPBuf = NULL;
>>
>> tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);
>> depthBuf = tempBuf;
>> modeBuf = tempBuf + depthBytes;
>> partSizes = tempBuf + 2 * depthBytes;
>> - if (m_param->rc.cuTree)
>> - cuQPBuf = X265_MALLOC(int8_t, depthBytes);
>>
>> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, intraPic->depth);
>> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, intraPic->chromaModes);
>> X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, intraPic->partSizes);
>> - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
>> depthBytes, m_analysisFileIn, intraPic->cuQPOff); }
>> + if (m_param->rc.cuTree)
>> + {
>> + X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize,
>> m_analysisFileIn, cuQPOffSets);
>> + if (m_param->analysisLoadReuseLevel > 1 &&
>> m_param->analysisLoadReuseLevel < 10)
>> + memcpy(analysis->intraData->cuQPOff, cuQPBuf,
>> sizeof(int8_t) * reuseBufSize);
>> + }
>>
>> size_t count = 0;
>> for (uint32_t d = 0; d < depthBytes; d++)
>> @@ -4480,7 +4502,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>> analysis, int curPoc, const x
>> memset(&(analysis->intraData)->depth[count], depthBuf[d],
>> bytes);
>> memset(&(analysis->intraData)->chromaModes[count],
>> modeBuf[d], bytes);
>> memset(&(analysis->intraData)->partSizes[count],
>> partSizes[d], bytes);
>> - if (m_param->rc.cuTree)
>> + if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel ==
>> 10)
>> memset(&(analysis->intraData)->cuQPOff[count],
>> cuQPBuf[d], bytes);
>> count += bytes;
>> }
>> @@ -4515,7 +4537,6 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>> analysis, int curPoc, const x
>> uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];
>> MV* mv[2];
>> int8_t* refIdx[2];
>> - int8_t* cuQPBuf = NULL;
>>
>> int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2;
>> bool bIntraInInter = false;
>> @@ -4535,12 +4556,15 @@ void
>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>> tempBuf = X265_MALLOC(uint8_t, depthBytes * numBuf);
>> depthBuf = tempBuf;
>> modeBuf = tempBuf + depthBytes;
>> - if (m_param->rc.cuTree)
>> - cuQPBuf = X265_MALLOC(int8_t, depthBytes);
>>
>> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, interPic->depth);
>> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, interPic->modes);
>> - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf,
>> sizeof(int8_t), depthBytes, m_analysisFileIn, interPic->cuQPOff); }
>> + if (m_param->rc.cuTree)
>> + {
>> + X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize,
>> m_analysisFileIn, cuQPOffSets);
>> + if (m_param->analysisLoadReuseLevel > 1 &&
>> m_param->analysisLoadReuseLevel < 10)
>> + memcpy(analysis->interData->cuQPOff, cuQPBuf,
>> sizeof(int8_t) * reuseBufSize);
>> + }
>>
>> if (m_param->analysisLoadReuseLevel > 4)
>> {
>> @@ -4578,7 +4602,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>> analysis, int curPoc, const x
>> depthBuf[d] = 1;
>> memset(&(analysis->interData)->depth[count],
>> depthBuf[d], bytes);
>> memset(&(analysis->interData)->modes[count], modeBuf[d],
>> bytes);
>> - if (m_param->rc.cuTree)
>> + if (m_param->rc.cuTree &&
>> m_param->analysisLoadReuseLevel == 10)
>> memset(&(analysis->interData)->cuQPOff[count],
>> cuQPBuf[d], bytes);
>> if (m_param->analysisLoadReuseLevel > 4)
>> {
>> @@ -4736,7 +4760,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>> analysis, int curPoc, const x
>> int numPartitions = analysis->numPartitions;
>> int numCUsInFrame = analysis->numCUsInFrame;
>> int numCuInHeight = analysis->numCuInHeight;
>> - /* Allocate memory for scaled resoultion's numPartitions and
>> numCUsInFrame*/
>> + /* Allocate memory for scaled resolution's numPartitions and
>> numCUsInFrame */
>> analysis->numPartitions = m_param->num4x4Partitions;
>> analysis->numCUsInFrame = cuLoc.heightInCU * cuLoc.widthInCU;
>> analysis->numCuInHeight = cuLoc.heightInCU;
>> @@ -4808,25 +4832,40 @@ void
>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>> X265_FREE(vbvCostBuf);
>> }
>>
>> + uint32_t reuseBufSize = 0;
>> + int8_t *cuQPOffSets = NULL, *cuQPBuf = NULL;
>> + if (m_param->rc.cuTree)
>> + {
>> + if (m_param->analysisLoadReuseLevel == 10)
>> + reuseBufSize = depthBytes;
>> + else if (m_param->analysisLoadReuseLevel > 1)
>> + reuseBufSize = (MAX_NUM_CU_GEOMS / factor) *
>> (analysis->numCUsInFrame);
>> + cuQPBuf = X265_MALLOC(int8_t, reuseBufSize);
>> + if (!m_param->bUseAnalysisFile)
>> + {
>> + if (analysis->sliceType == X265_TYPE_IDR ||
>> analysis->sliceType == X265_TYPE_I)
>> + cuQPOffSets = intraPic->cuQPOff;
>> + else
>> + cuQPOffSets = interPic->cuQPOff;
>> + }
>> + }
>> +
>> if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType ==
>> X265_TYPE_I)
>> {
>> if (m_param->analysisLoadReuseLevel < 2)
>> return;
>>
>> uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
>> *partSizes = NULL;
>> - int8_t *cuQPBuf = NULL;
>>
>> tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);
>> depthBuf = tempBuf;
>> modeBuf = tempBuf + depthBytes;
>> partSizes = tempBuf + 2 * depthBytes;
>> - if (m_param->rc.cuTree)
>> - cuQPBuf = X265_MALLOC(int8_t, depthBytes);
>>
>> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, intraPic->depth);
>> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, intraPic->chromaModes);
>> X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, intraPic->partSizes);
>> - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
>> depthBytes, m_analysisFileIn, intraPic->cuQPOff); }
>> + if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
>> reuseBufSize, m_analysisFileIn, cuQPOffSets); }
>>
>> uint32_t count = 0;
>> for (uint32_t d = 0; d < depthBytes; d++)
>> @@ -4848,7 +4887,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>> analysis, int curPoc, const x
>> memset(&(analysis->intraData)->depth[count],
>> depthBuf[d], bytes);
>> memset(&(analysis->intraData)->chromaModes[count],
>> modeBuf[d], bytes);
>> memset(&(analysis->intraData)->partSizes[count],
>> partSizes[d], bytes);
>> - if (m_param->rc.cuTree)
>> + if (m_param->rc.cuTree &&
>> m_param->analysisLoadReuseLevel == 10)
>> memset(&(analysis->intraData)->cuQPOff[count],
>> cuQPBuf[d], bytes);
>> count += bytes;
>> d += getCUIndex(&cuLoc, &count, bytes, 1);
>> @@ -4886,7 +4925,6 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>> analysis, int curPoc, const x
>> uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];
>> MV* mv[2];
>> int8_t* refIdx[2];
>> - int8_t* cuQPBuf = NULL;
>>
>> int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2;
>> bool bIntraInInter = false;
>> @@ -4900,12 +4938,16 @@ void
>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>> tempBuf = X265_MALLOC(uint8_t, depthBytes * numBuf);
>> depthBuf = tempBuf;
>> modeBuf = tempBuf + depthBytes;
>> - if (m_param->rc.cuTree)
>> - cuQPBuf = X265_MALLOC(int8_t, depthBytes);
>>
>> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, interPic->depth);
>> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, interPic->modes);
>> - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
>> depthBytes, m_analysisFileIn, interPic->cuQPOff); }
>> + if (m_param->rc.cuTree)
>> + {
>> + X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize,
>> m_analysisFileIn, cuQPOffSets);
>> + if (m_param->analysisLoadReuseLevel > 1 &&
>> m_param->analysisLoadReuseLevel < 10)
>> + memcpy(&(analysis->interData)->cuQPOff, cuQPBuf,
>> sizeof(int8_t) * reuseBufSize);
>> + }
>> +
>> if (m_param->analysisLoadReuseLevel > 4)
>> {
>> partSize = modeBuf + depthBytes;
>> @@ -4954,7 +4996,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>> analysis, int curPoc, const x
>> {
>> memset(&(analysis->interData)->depth[count], writeDepth,
>> bytes);
>> memset(&(analysis->interData)->modes[count], modeBuf[d],
>> bytes);
>> - if (m_param->rc.cuTree)
>> + if (m_param->rc.cuTree &&
>> m_param->analysisLoadReuseLevel == 10)
>> memset(&(analysis->interData)->cuQPOff[count],
>> cuQPBuf[d], bytes);
>> if (m_param->analysisLoadReuseLevel == 10 &&
>> bIntraInInter)
>> memset(&(analysis->intraData)->chromaModes[count],
>> chromaDir[d], bytes);
>> @@ -5046,7 +5088,9 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>> analysis, int curPoc, const x
>> }
>> }
>> else
>> + {
>> X265_FREAD((analysis->interData)->ref, sizeof(int32_t),
>> analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir,
>> m_analysisFileIn, interPic->ref);
>> + }
>>
>> consumedBytes += frameRecordSize;
>> if (numDir == 1)
>> @@ -5510,9 +5554,10 @@ void
>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>> analysis->frameRecordSize += analysis->numCUsInFrame *
>> sizeof(sse_t);
>> }
>>
>> + uint32_t reuseQPBufsize = 0;
>> if (m_param->analysisSaveReuseLevel > 1)
>> {
>> -
>> + reuseQPBufsize = MAX_NUM_CU_GEOMS * analysis->numCUsInFrame;
>> if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType
>> == X265_TYPE_I)
>> {
>> for (uint32_t cuAddr = 0; cuAddr < analysis->numCUsInFrame;
>> cuAddr++)
>> @@ -5536,12 +5581,21 @@ void
>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>> partSize = ctu->m_partSize[absPartIdx];
>> intraDataCTU->partSizes[depthBytes] = partSize;
>>
>> - if (m_param->rc.cuTree)
>> + if (m_param->rc.cuTree &&
>> m_param->analysisSaveReuseLevel == 10)
>> intraDataCTU->cuQPOff[depthBytes] =
>> (int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP);
>> absPartIdx += ctu->m_numPartitions >> (depth * 2);
>> }
>> +
>> + if (m_param->rc.cuTree &&
>> m_param->analysisSaveReuseLevel < 10)
>> + {
>> + uint32_t nextCuIdx = (cuAddr + 1) * MAX_NUM_CU_GEOMS;
>> + for (uint32_t i = cuAddr * MAX_NUM_CU_GEOMS; i <
>> nextCuIdx; i++)
>> + intraDataCTU->cuQPOff[i] =
>> (int8_t)(intraDataCTU->cuQPOff[i] - baseQP);
>> + }
>> memcpy(&intraDataCTU->modes[ctu->m_cuAddr *
>> ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)*
>> ctu->m_numPartitions);
>> }
>> + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel ==
>> 10)
>> + reuseQPBufsize = depthBytes;
>> }
>> else
>> {
>> @@ -5567,7 +5621,7 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
>> analysis, FrameData &curEncD
>> predMode = 4; // used as indicator if the block
>> is coded as bidir
>>
>> interDataCTU->modes[depthBytes] = predMode;
>> - if (m_param->rc.cuTree)
>> + if (m_param->rc.cuTree &&
>> m_param->analysisSaveReuseLevel == 10)
>> interDataCTU->cuQPOff[depthBytes] =
>> (int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP);
>>
>> if (m_param->analysisSaveReuseLevel > 4)
>> @@ -5599,13 +5653,23 @@ void
>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>> }
>> absPartIdx += ctu->m_numPartitions >> (depth * 2);
>> }
>> +
>> + if (m_param->rc.cuTree &&
>> m_param->analysisSaveReuseLevel < 10)
>> + {
>> + uint32_t nextCuIdx = (cuAddr + 1) * MAX_NUM_CU_GEOMS;
>> + for (uint32_t i = cuAddr * MAX_NUM_CU_GEOMS; i <
>> nextCuIdx ; i++)
>> + interDataCTU->cuQPOff[i] =
>> (int8_t)(interDataCTU->cuQPOff[i] - baseQP);
>> + }
>> +
>> if (m_param->analysisSaveReuseLevel == 10 &&
>> bIntraInInter)
>> memcpy(&intraDataCTU->modes[ctu->m_cuAddr *
>> ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)*
>> ctu->m_numPartitions);
>> }
>> + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel ==
>> 10)
>> + reuseQPBufsize = depthBytes;
>> }
>>
>> if ((analysis->sliceType == X265_TYPE_IDR || analysis->sliceType
>> == X265_TYPE_I) && m_param->rc.cuTree)
>> - analysis->frameRecordSize += sizeof(uint8_t)*
>> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 +
>> (sizeof(int8_t) * depthBytes);
>> + analysis->frameRecordSize += sizeof(uint8_t)*
>> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 +
>> (sizeof(int8_t) * reuseQPBufsize);
>> else if (analysis->sliceType == X265_TYPE_IDR ||
>> analysis->sliceType == X265_TYPE_I)
>> analysis->frameRecordSize += sizeof(uint8_t)*
>> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3;
>> else
>> @@ -5613,7 +5677,8 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
>> analysis, FrameData &curEncD
>> /* Add sizeof depth, modes, partSize, cuQPOffset, mergeFlag
>> */
>> analysis->frameRecordSize += depthBytes * 2;
>> if (m_param->rc.cuTree)
>> - analysis->frameRecordSize += (sizeof(int8_t) * depthBytes);
>> + analysis->frameRecordSize += (sizeof(int8_t) *
>> reuseQPBufsize);
>> +
>> if (m_param->analysisSaveReuseLevel > 4)
>> analysis->frameRecordSize += (depthBytes * 2);
>>
>> @@ -5669,7 +5734,7 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
>> analysis, FrameData &curEncD
>> X265_FWRITE((analysis->intraData)->chromaModes, sizeof(uint8_t),
>> depthBytes, m_analysisFileOut);
>> X265_FWRITE((analysis->intraData)->partSizes, sizeof(char),
>> depthBytes, m_analysisFileOut);
>> if (m_param->rc.cuTree)
>> - X265_FWRITE((analysis->intraData)->cuQPOff, sizeof(int8_t),
>> depthBytes, m_analysisFileOut);
>> + X265_FWRITE((analysis->intraData)->cuQPOff, sizeof(int8_t),
>> reuseQPBufsize, m_analysisFileOut);
>> X265_FWRITE((analysis->intraData)->modes, sizeof(uint8_t),
>> analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileOut);
>> }
>> else
>> @@ -5677,7 +5742,7 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
>> analysis, FrameData &curEncD
>> X265_FWRITE((analysis->interData)->depth, sizeof(uint8_t),
>> depthBytes, m_analysisFileOut);
>> X265_FWRITE((analysis->interData)->modes, sizeof(uint8_t),
>> depthBytes, m_analysisFileOut);
>> if (m_param->rc.cuTree)
>> - X265_FWRITE((analysis->interData)->cuQPOff, sizeof(int8_t),
>> depthBytes, m_analysisFileOut);
>> + X265_FWRITE((analysis->interData)->cuQPOff, sizeof(int8_t),
>> reuseQPBufsize, m_analysisFileOut);
>> if (m_param->analysisSaveReuseLevel > 4)
>> {
>> X265_FWRITE((analysis->interData)->partSize,
>> sizeof(uint8_t), depthBytes, m_analysisFileOut);
>> @@ -5762,7 +5827,7 @@ void
>> Encoder::writeAnalysisFileRefine(x265_analysis_data* analysis, FrameData &c
>> interData->mv[1][depthBytes].word =
>> ctu->m_mv[1][absPartIdx].word;
>> interData->mvpIdx[1][depthBytes] =
>> ctu->m_mvpIdx[1][absPartIdx];
>> ref[1][depthBytes] = ctu->m_refIdx[1][absPartIdx];
>> - predMode = 4; // used as indiacator if the block is
>> coded as bidir
>> + predMode = 4; // used as indicator if the block is
>> coded as bidir
>> }
>> interData->modes[depthBytes] = predMode;
>>
>> diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
>> index 0adb0d0db..3bc01268b 100644
>> --- a/source/encoder/slicetype.cpp
>> +++ b/source/encoder/slicetype.cpp
>> @@ -1894,7 +1894,7 @@ void Lookahead::slicetypeAnalyse(Lowres **frames,
>> bool bKeyframe)
>>
>> if (!framecnt)
>> {
>> - if (m_param->rc.cuTree)
>> + if (m_param->rc.cuTree && !m_param->analysisLoad)
>>
> [AM] Won't this implicitly turn OFF cutree at reuse-level 1?
>
>> cuTree(frames, 0, bKeyframe);
>> return;
>> }
>> diff --git a/source/x265.h b/source/x265.h
>> index f44040ba7..8d7a75826 100644
>> --- a/source/x265.h
>> +++ b/source/x265.h
>> @@ -144,7 +144,7 @@ typedef struct x265_analysis_intra_data
>> uint8_t* modes;
>> char* partSizes;
>> uint8_t* chromaModes;
>> - int8_t* cuQPOff;
>> + int8_t* cuQPOff;
>> }x265_analysis_intra_data;
>>
>> typedef struct x265_analysis_MV
>> @@ -167,7 +167,7 @@ typedef struct x265_analysis_inter_data
>> uint8_t* interDir;
>> uint8_t* mvpIdx[2];
>> int8_t* refIdx[2];
>> - x265_analysis_MV* mv[2];
>> + x265_analysis_MV* mv[2];
>> int64_t* sadCost;
>> int8_t* cuQPOff;
>> }x265_analysis_inter_data;
>> --
>> 2.20.1.windows.1
>>
>>
>> --
>> *With Regards,*
>> *Srikanth Kurapati.*
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>
>
> --
> Regards,
> *Aruna Matheswaran,*
> Video Codec Engineer,
> Media & AI analytics BU,
>
>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
--
*With Regards,*
*Srikanth Kurapati.*
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20210121/07907873/attachment-0001.html>
More information about the x265-devel
mailing list