[x265] [PATCH] [Release_3.5] correct reusing cutree qp offsets in load encode for reuse-level > 1 and < 10 for same resolution
Srikanth Kurapati
srikanth.kurapati at multicorewareinc.com
Fri Jan 8 05:25:36 UTC 2021
On Fri, Nov 20, 2020 at 3:32 PM Kavitha Sampath <
kavitha at multicorewareinc.com> wrote:
>
>
> On Tue, Nov 17, 2020 at 8:22 AM Mahesh Pittala <
> mahesh at multicorewareinc.com> wrote:
>
>> From 787ae5da7431b5d113ea033cf6502ac1cc1e7572 Mon Sep 17 00:00:00 2001
>> From: maheshpittala <mahesh at multicorewareinc.com>
>> Date: Sun, 1 Nov 2020 10:09:28 +0530
>> Subject: [PATCH] correct reusing cutree qp offsets in load encode for
>> reuse-level > 1 and < 10 for same resolution
>>
>> Earlier in save encode, dumped only best modes analysis data of that CTU
>> into file after encoding, not for each split CU's analysis. So in analysis
>> load, it reads the same best mode's qp value even for split CU's(whereas
>> split CU's qp would be different in save encode) and redo-analysis.
>>
>> So now, cuGeom.geomRecurId stores unique ID for each CU and even for
>> parents CU so based on this storing cutree qp offset and loaded same
>>
> [KS] Commit message sounds informal. Suggest rephrasing
>
[SK] Addressed the same.
> ---
>> source/abrEncApp.cpp | 6 +++
>> source/common/cudata.cpp | 6 ++-
>> source/common/cudata.h | 3 +-
>> source/encoder/analysis.cpp | 32 ++++++++++--
>> source/encoder/api.cpp | 12 +++++
>> source/encoder/encoder.cpp | 97 ++++++++++++++++++++++++++++++++----
>> source/encoder/slicetype.cpp | 2 +-
>> source/x265.h | 2 +
>> 8 files changed, 140 insertions(+), 20 deletions(-)
>>
>> diff --git a/source/abrEncApp.cpp b/source/abrEncApp.cpp
>> index cd85154f1..3550d8b11 100644
>> --- a/source/abrEncApp.cpp
>> +++ b/source/abrEncApp.cpp
>> @@ -342,7 +342,10 @@ namespace X265_NS {
>> memcpy(intraDst->partSizes, intraSrc->partSizes,
>> sizeof(char) * src->depthBytes);
>> memcpy(intraDst->chromaModes, intraSrc->chromaModes,
>> sizeof(uint8_t) * src->depthBytes);
>> if (m_param->rc.cuTree)
>> + {
>> memcpy(intraDst->cuQPOff, intraSrc->cuQPOff,
>> sizeof(int8_t) * src->depthBytes);
>> + memcpy(intraDst->cuQPOffReuse, intraSrc->cuQPOffReuse,
>> sizeof(int8_t) * (src->numCUsInFrame * src->numPartitions));
>>
> [KS] maximum number of qp's saved per CTU is 85. Allocating copying
> numPartition size is unnecessary
>
[SK] Agreed. Fixed the same.
> + }
>> }
>> else
>> {
>> @@ -357,7 +360,10 @@ namespace X265_NS {
>> memcpy(interDst->depth, interSrc->depth, sizeof(uint8_t) *
>> src->depthBytes);
>> memcpy(interDst->modes, interSrc->modes, sizeof(uint8_t) *
>> src->depthBytes);
>> if (m_param->rc.cuTree)
>> + {
>> memcpy(interDst->cuQPOff, interSrc->cuQPOff,
>> sizeof(int8_t) * src->depthBytes);
>> + memcpy(interDst->cuQPOffReuse, interSrc->cuQPOffReuse,
>> sizeof(int8_t) * (src->numCUsInFrame * src->numPartitions));
>> + }
>> if (m_param->analysisSaveReuseLevel > 4)
>> {
>> memcpy(interDst->partSize, interSrc->partSize,
>> sizeof(uint8_t) * src->depthBytes);
>> diff --git a/source/common/cudata.cpp b/source/common/cudata.cpp
>> index 19281dee2..08cdff11a 100644
>> --- a/source/common/cudata.cpp
>> +++ b/source/common/cudata.cpp
>> @@ -194,6 +194,7 @@ void CUData::initialize(const CUDataMemPool&
>> dataPool, uint32_t depth, const x26
>>
>> m_qp = (int8_t*)charBuf; charBuf += m_numPartitions;
>> m_qpAnalysis = (int8_t*)charBuf; charBuf += m_numPartitions;
>> + m_qpreuse = (int8_t*)charBuf; charBuf += m_numPartitions;
>>
> [KS] Can you move this out of parentCTU? Would be appropriate to include
> it as an Analysis class member - just like other reuse parameters such
> as m_reuseRef, m_reuseDepth,..
>
[SK] addressed the same so that cudata mem pool can be used for other
purposes. We will store the offsets only in the frame's analysis data
structures.
>
>
>> m_log2CUSize = charBuf; charBuf += m_numPartitions;
>> m_lumaIntraDir = charBuf; charBuf += m_numPartitions;
>> m_tqBypass = charBuf; charBuf += m_numPartitions;
>> @@ -235,6 +236,7 @@ void CUData::initialize(const CUDataMemPool&
>> dataPool, uint32_t depth, const x26
>>
>> m_qp = (int8_t*)charBuf; charBuf += m_numPartitions;
>> m_qpAnalysis = (int8_t*)charBuf; charBuf += m_numPartitions;
>> + m_qpreuse = (int8_t*)charBuf; charBuf += m_numPartitions;
>> m_log2CUSize = charBuf; charBuf += m_numPartitions;
>> m_lumaIntraDir = charBuf; charBuf += m_numPartitions;
>> m_tqBypass = charBuf; charBuf += m_numPartitions;
>> @@ -307,7 +309,7 @@ void CUData::initCTU(const Frame& frame, uint32_t
>> cuAddr, int qp, uint32_t first
>> X265_CHECK(!(frame.m_encData->m_param->bLossless &&
>> !m_slice->m_pps->bTransquantBypassEnabled), "lossless enabled without
>> TQbypass in PPS\n");
>>
>> /* initialize the remaining CU data in one memset */
>> - memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ?
>> BytesPerPartition - 12 : BytesPerPartition - 8) * m_numPartitions);
>> + memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ?
>> BytesPerPartition - 13 : BytesPerPartition - 9) * m_numPartitions);
>>
>> for (int8_t i = 0; i < NUM_TU_DEPTH; i++)
>> m_refTuDepth[i] = -1;
>> @@ -358,7 +360,7 @@ void CUData::initSubCU(const CUData& ctu, const
>> CUGeom& cuGeom, int qp)
>> m_partSet(m_cuDepth, (uint8_t)cuGeom.depth);
>>
>> /* initialize the remaining CU data in one memset */
>> - memset(m_predMode, 0, (ctu.m_chromaFormat == X265_CSP_I400 ?
>> BytesPerPartition - 13 : BytesPerPartition - 9) * m_numPartitions);
>> + memset(m_predMode, 0, (ctu.m_chromaFormat == X265_CSP_I400 ?
>> BytesPerPartition - 14 : BytesPerPartition - 10) * m_numPartitions);
>> memset(m_distortion, 0, m_numPartitions * sizeof(sse_t));
>> }
>>
>> diff --git a/source/common/cudata.h b/source/common/cudata.h
>> index 8397f0568..d58f53e39 100644
>> --- a/source/common/cudata.h
>> +++ b/source/common/cudata.h
>> @@ -192,6 +192,7 @@ public:
>> /* Per-part data, stored contiguously */
>> int8_t* m_qp; // array of QP values
>> int8_t* m_qpAnalysis; // array of QP values for analysis
>> reuse
>> + int8_t* m_qpreuse; // array of QP values for analysis
>> reuse for reuse levels > 1 and < 10
>> uint8_t* m_log2CUSize; // array of cu log2Size TODO:
>> seems redundant to depth
>> uint8_t* m_lumaIntraDir; // array of intra directions (luma)
>> uint8_t* m_tqBypass; // array of CU lossless flags
>> @@ -207,7 +208,7 @@ public:
>> uint8_t* m_transformSkip[3]; // array of transform skipping
>> flags per plane
>> uint8_t* m_cbf[3]; // array of coded block flags
>> (CBF) per plane
>> uint8_t* m_chromaIntraDir; // array of intra directions
>> (chroma)
>> - enum { BytesPerPartition = 24 }; // combined sizeof() of all
>> per-part data
>> + enum { BytesPerPartition = 25 }; // combined sizeof() of all
>> per-part data
>>
>> sse_t* m_distortion;
>> coeff_t* m_trCoeff[3]; // transformed coefficient buffer
>> per plane
>> diff --git a/source/encoder/analysis.cpp b/source/encoder/analysis.cpp
>> index aabf386ca..b1d7e3ad1 100644
>> --- a/source/encoder/analysis.cpp
>> +++ b/source/encoder/analysis.cpp
>> @@ -520,6 +520,9 @@ uint64_t Analysis::compressIntraCU(const CUData&
>> parentCTU, const CUGeom& cuGeom
>> bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
>> bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
>>
>> + if (m_param->rc.cuTree)
>> + parentCTU.m_qpreuse[cuGeom.geomRecurId] = (int8_t)qp;
>> +
>> bool bAlreadyDecided = m_param->intraRefine != 4 &&
>> parentCTU.m_lumaIntraDir[cuGeom.absPartIdx] != (uint8_t)ALL_IDX &&
>> !(m_param->bAnalysisType == HEVC_INFO);
>> bool bDecidedDepth = m_param->intraRefine != 4 &&
>> parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth;
>> int split = 0;
>> @@ -870,6 +873,9 @@ uint32_t Analysis::compressInterCU_dist(const CUData&
>> parentCTU, const CUGeom& c
>> uint32_t minDepth = m_param->rdLevel <= 4 ?
>> topSkipMinDepth(parentCTU, cuGeom) : 0;
>> uint32_t splitRefs[4] = { 0, 0, 0, 0 };
>>
>> + if (m_param->rc.cuTree)
>> + parentCTU.m_qpreuse[cuGeom.geomRecurId] = (int8_t)qp;
>> +
>> X265_CHECK(m_param->rdLevel >= 2, "compressInterCU_dist does not
>> support RD 0 or 1\n");
>>
>> PMODE pmode(*this, cuGeom);
>> @@ -1152,6 +1158,8 @@ SplitData Analysis::compressInterCU_rd0_4(const
>> CUData& parentCTU, const CUGeom&
>> uint32_t cuAddr = parentCTU.m_cuAddr;
>> ModeDepth& md = m_modeDepth[depth];
>>
>> + if (m_param->rc.cuTree)
>> + parentCTU.m_qpreuse[cuGeom.geomRecurId] = (int8_t)qp;
>>
>> if (m_param->searchMethod == X265_SEA)
>> {
>> @@ -1856,6 +1864,9 @@ SplitData Analysis::compressInterCU_rd5_6(const
>> CUData& parentCTU, const CUGeom&
>> ModeDepth& md = m_modeDepth[depth];
>> md.bestMode = NULL;
>>
>> + if (m_param->rc.cuTree)
>> + parentCTU.m_qpreuse[cuGeom.geomRecurId] = (int8_t)qp;
>> +
>> if (m_param->searchMethod == X265_SEA)
>> {
>> int numPredDir = m_slice->isInterP() ? 1 : 2;
>> @@ -3643,15 +3654,26 @@ int Analysis::calculateQpforCuSize(const CUData&
>> ctu, const CUGeom& cuGeom, int3
>> if ((distortionData->threshold[ctu.m_cuAddr] < 0.9 ||
>> distortionData->threshold[ctu.m_cuAddr] > 1.1)
>> && distortionData->highDistortionCtuCount &&
>> distortionData->lowDistortionCtuCount)
>> qp += distortionData->offset[ctu.m_cuAddr];
>> - }
>> + }
>>
>> if (m_param->analysisLoadReuseLevel >= 2 && m_param->rc.cuTree)
>> {
>> - int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) +
>> cuGeom.absPartIdx;
>> - if (ctu.m_slice->m_sliceType == I_SLICE)
>> - return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
>> (int32_t)(qp + 0.5 +
>> ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx]));
>> + if (m_param->scaleFactor == 2 || m_param->analysisLoadReuseLevel
>> == 10)
>> + {
>> + int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) +
>> cuGeom.absPartIdx;
>> + if (ctu.m_slice->m_sliceType == I_SLICE)
>> + return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
>> (int32_t)(qp + 0.5 +
>> ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx]));
>> + else
>> + return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
>> (int32_t)(qp + 0.5 +
>> ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx]));
>> + }
>> else
>> - return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
>> (int32_t)(qp + 0.5 +
>> ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx]));
>> + {
>> + int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) +
>> cuGeom.geomRecurId;
>> + if (ctu.m_slice->m_sliceType == I_SLICE)
>> + return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
>> (int32_t)(qp + 0.5 +
>> ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOffReuse[cuIdx]));
>> + else
>> + return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
>> (int32_t)(qp + 0.5 +
>> ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOffReuse[cuIdx]));
>> + }
>>
> [KS] Why is this reuse not applicable to reuse level 1?
>
[SK] Not sure of the improvements or gain in this case. Since this is a
general question , we will be tracking this and other improvements
possible for multipass encoding as a separate action item under
x265-Story - 1059.
> }
>> if (m_param->rc.hevcAq)
>> {
>> diff --git a/source/encoder/api.cpp b/source/encoder/api.cpp
>> index a986355e0..0f266d328 100644
>> --- a/source/encoder/api.cpp
>> +++ b/source/encoder/api.cpp
>> @@ -825,7 +825,10 @@ void x265_alloc_analysis_data(x265_param *param,
>> x265_analysis_data* analysis)
>> CHECKED_MALLOC_ZERO(intraData->partSizes, char,
>> analysis->numPartitions * analysis->numCUsInFrame);
>> CHECKED_MALLOC_ZERO(intraData->chromaModes, uint8_t,
>> analysis->numPartitions * analysis->numCUsInFrame);
>> if (param->rc.cuTree)
>> + {
>> CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t,
>> analysis->numPartitions * analysis->numCUsInFrame);
>> + CHECKED_MALLOC_ZERO(intraData->cuQPOffReuse, int8_t,
>> analysis->numPartitions * analysis->numCUsInFrame);
>> + }
>> }
>> analysis->intraData = intraData;
>>
>> @@ -837,7 +840,10 @@ void x265_alloc_analysis_data(x265_param *param,
>> x265_analysis_data* analysis)
>> CHECKED_MALLOC_ZERO(interData->modes, uint8_t,
>> analysis->numPartitions * analysis->numCUsInFrame);
>>
>> if (param->rc.cuTree && !isMultiPassOpt)
>> + {
>> CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t,
>> analysis->numPartitions * analysis->numCUsInFrame);
>> + CHECKED_MALLOC_ZERO(interData->cuQPOffReuse, int8_t,
>> analysis->numPartitions * analysis->numCUsInFrame);
>> + }
>> CHECKED_MALLOC_ZERO(interData->mvpIdx[0], uint8_t,
>> analysis->numPartitions * analysis->numCUsInFrame);
>> CHECKED_MALLOC_ZERO(interData->mvpIdx[1], uint8_t,
>> analysis->numPartitions * analysis->numCUsInFrame);
>> CHECKED_MALLOC_ZERO(interData->mv[0], x265_analysis_MV,
>> analysis->numPartitions * analysis->numCUsInFrame);
>> @@ -919,7 +925,10 @@ void x265_free_analysis_data(x265_param *param,
>> x265_analysis_data* analysis)
>> X265_FREE((analysis->intraData)->partSizes);
>> X265_FREE((analysis->intraData)->chromaModes);
>> if (param->rc.cuTree)
>> + {
>> X265_FREE((analysis->intraData)->cuQPOff);
>> + X265_FREE((analysis->intraData)->cuQPOffReuse);
>> + }
>> }
>> X265_FREE(analysis->intraData);
>> analysis->intraData = NULL;
>> @@ -931,7 +940,10 @@ void x265_free_analysis_data(x265_param *param,
>> x265_analysis_data* analysis)
>> X265_FREE((analysis->interData)->depth);
>> X265_FREE((analysis->interData)->modes);
>> if (!isMultiPassOpt && param->rc.cuTree)
>> + {
>> X265_FREE((analysis->interData)->cuQPOff);
>> + X265_FREE((analysis->interData)->cuQPOffReuse);
>> + }
>> X265_FREE((analysis->interData)->mvpIdx[0]);
>> X265_FREE((analysis->interData)->mvpIdx[1]);
>> X265_FREE((analysis->interData)->mv[0]);
>> diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp
>> index 1f710e1ce..9666744f3 100644
>> --- a/source/encoder/encoder.cpp
>> +++ b/source/encoder/encoder.cpp
>> @@ -4452,19 +4452,25 @@ void
>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>> return;
>>
>> uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
>> *partSizes = NULL;
>> - int8_t *cuQPBuf = NULL;
>> + int8_t *cuQPBuf = NULL, *cuQPReuseBuf = NULL;
>>
>> tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);
>> depthBuf = tempBuf;
>> modeBuf = tempBuf + depthBytes;
>> partSizes = tempBuf + 2 * depthBytes;
>> if (m_param->rc.cuTree)
>> + {
>> cuQPBuf = X265_MALLOC(int8_t, depthBytes);
>> + cuQPReuseBuf = X265_MALLOC(int8_t, scaledNumPartition *
>> analysis->numCUsInFrame);
>>
> [KS] Check whitespaces
>
>> + }
>>
>> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, intraPic->depth);
>> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, intraPic->chromaModes);
>> X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, intraPic->partSizes);
>> - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
>> depthBytes, m_analysisFileIn, intraPic->cuQPOff); }
>> + if (m_param->rc.cuTree) {
>> + X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
>> m_analysisFileIn, intraPic->cuQPOff);
>> + X265_FREAD(cuQPReuseBuf, sizeof(int8_t), (scaledNumPartition
>> * analysis->numCUsInFrame), m_analysisFileIn, intraPic->cuQPOffReuse);
>> + }
>>
>> size_t count = 0;
>> for (uint32_t d = 0; d < depthBytes; d++)
>> @@ -4484,7 +4490,11 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>> analysis, int curPoc, const x
>> memset(&(analysis->intraData)->cuQPOff[count],
>> cuQPBuf[d], bytes);
>> count += bytes;
>> }
>> -
>> + if (m_param->rc.cuTree)
>> + {
>> + for (uint32_t i = 0; i < (scaledNumPartition *
>> analysis->numCUsInFrame); i++)
>> + memset(&(analysis->intraData)->cuQPOffReuse[i],
>> cuQPReuseBuf[i], sizeof(int8_t));
>> + }
>> if (!m_param->scaleFactor)
>> {
>> X265_FREAD((analysis->intraData)->modes, sizeof(uint8_t),
>> numCUsLoad * analysis->numPartitions, m_analysisFileIn, intraPic->modes);
>> @@ -4498,7 +4508,10 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>> analysis, int curPoc, const x
>> X265_FREE(tempLumaBuf);
>> }
>> if (m_param->rc.cuTree)
>> + {
>> X265_FREE(cuQPBuf);
>> + X265_FREE(cuQPReuseBuf);
>> + }
>> X265_FREE(tempBuf);
>> consumedBytes += frameRecordSize;
>> }
>> @@ -4515,7 +4528,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>> analysis, int curPoc, const x
>> uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];
>> MV* mv[2];
>> int8_t* refIdx[2];
>> - int8_t* cuQPBuf = NULL;
>> + int8_t* cuQPBuf = NULL, *cuQPReuseBuf = NULL;
>>
> [KS] Why can't we reuse cuQPBuf ? I agree that the size of offsets differ
> for reuse level 10 and others but that can be taken care of in allocation.
>
[SK] We can use the same and also use the same buffer in analysis data
for all reuse levels. Hence optimized the memory footprint per frame.
>
>> int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2;
>> bool bIntraInInter = false;
>> @@ -4536,11 +4549,17 @@ void
>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>> depthBuf = tempBuf;
>> modeBuf = tempBuf + depthBytes;
>> if (m_param->rc.cuTree)
>> + {
>> cuQPBuf = X265_MALLOC(int8_t, depthBytes);
>> + cuQPReuseBuf = X265_MALLOC(int8_t, scaledNumPartition *
>> analysis->numCUsInFrame);
>> + }
>>
>> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, interPic->depth);
>> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, interPic->modes);
>> - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf,
>> sizeof(int8_t), depthBytes, m_analysisFileIn, interPic->cuQPOff); }
>> + if (m_param->rc.cuTree) {
>> + X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
>> m_analysisFileIn, interPic->cuQPOff);
>> + X265_FREAD(cuQPReuseBuf, sizeof(int8_t),
>> (scaledNumPartition * analysis->numCUsInFrame), m_analysisFileIn,
>> interPic->cuQPOffReuse);
>> + }
>>
>> if (m_param->analysisLoadReuseLevel > 4)
>> {
>> @@ -4611,9 +4630,17 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>> analysis, int curPoc, const x
>> }
>> count += bytes;
>> }
>> + if (m_param->rc.cuTree)
>> + {
>> + for (uint32_t i = 0; i < (scaledNumPartition *
>> analysis->numCUsInFrame); i++)
>> + memset(&(analysis->interData)->cuQPOffReuse[i],
>> cuQPReuseBuf[i], sizeof(int8_t));
>> + }
>>
>> if (m_param->rc.cuTree)
>> + {
>> X265_FREE(cuQPBuf);
>> + X265_FREE(cuQPReuseBuf);
>> + }
>> X265_FREE(tempBuf);
>> }
>> if (m_param->analysisLoadReuseLevel == 10)
>> @@ -4814,19 +4841,26 @@ void
>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>> return;
>>
>> uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
>> *partSizes = NULL;
>> - int8_t *cuQPBuf = NULL;
>> + int8_t *cuQPBuf = NULL, *cuQPReuseBuf = NULL;;
>>
>> tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);
>> depthBuf = tempBuf;
>> modeBuf = tempBuf + depthBytes;
>> partSizes = tempBuf + 2 * depthBytes;
>> if (m_param->rc.cuTree)
>> + {
>> cuQPBuf = X265_MALLOC(int8_t, depthBytes);
>> + cuQPReuseBuf = X265_MALLOC(int8_t, (analysis->numPartitions
>> / factor) * analysis->numCUsInFrame);
>> + }
>>
>> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, intraPic->depth);
>> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, intraPic->chromaModes);
>> X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, intraPic->partSizes);
>> - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
>> depthBytes, m_analysisFileIn, intraPic->cuQPOff); }
>> + if (m_param->rc.cuTree)
>> + {
>> + X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
>> m_analysisFileIn, intraPic->cuQPOff);
>> + X265_FREAD(cuQPReuseBuf, sizeof(int8_t),
>> ((analysis->numPartitions / factor) * analysis->numCUsInFrame),
>> m_analysisFileIn, intraPic->cuQPOffReuse);
>> + }
>>
>> uint32_t count = 0;
>> for (uint32_t d = 0; d < depthBytes; d++)
>> @@ -4869,7 +4903,10 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>> analysis, int curPoc, const x
>> }
>> X265_FREE(tempLumaBuf);
>> if (m_param->rc.cuTree)
>> + {
>> X265_FREE(cuQPBuf);
>> + X265_FREE(cuQPReuseBuf);
>> + }
>> X265_FREE(tempBuf);
>> consumedBytes += frameRecordSize;
>> }
>> @@ -4886,7 +4923,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>> analysis, int curPoc, const x
>> uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];
>> MV* mv[2];
>> int8_t* refIdx[2];
>> - int8_t* cuQPBuf = NULL;
>> + int8_t* cuQPBuf = NULL, *cuQPReuseBuf = NULL;
>>
>> int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2;
>> bool bIntraInInter = false;
>> @@ -4901,11 +4938,18 @@ void
>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>> depthBuf = tempBuf;
>> modeBuf = tempBuf + depthBytes;
>> if (m_param->rc.cuTree)
>> + {
>> cuQPBuf = X265_MALLOC(int8_t, depthBytes);
>> + cuQPReuseBuf = X265_MALLOC(int8_t, (analysis->numPartitions
>> / factor) * analysis->numCUsInFrame);
>> + }
>>
>> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, interPic->depth);
>> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
>> m_analysisFileIn, interPic->modes);
>> - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
>> depthBytes, m_analysisFileIn, interPic->cuQPOff); }
>> + if (m_param->rc.cuTree)
>> + {
>> + X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
>> m_analysisFileIn, interPic->cuQPOff);
>> + X265_FREAD(cuQPReuseBuf, sizeof(int8_t),
>> (analysis->numPartitions / factor) * analysis->numCUsInFrame,
>> m_analysisFileIn, interPic->cuQPOffReuse);
>> + }
>> if (m_param->analysisLoadReuseLevel > 4)
>> {
>> partSize = modeBuf + depthBytes;
>> @@ -5017,7 +5061,16 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>> analysis, int curPoc, const x
>> }
>>
>> if (m_param->rc.cuTree)
>> + {
>> + for (uint32_t i = 0; i < ((analysis->numPartitions / factor)
>> * analysis->numCUsInFrame); i++)
>> + memset(&(analysis->interData)->cuQPOffReuse[i],
>> cuQPReuseBuf[i], sizeof(int8_t));
>> + }
>> +
>> + if (m_param->rc.cuTree)
>> + {
>> X265_FREE(cuQPBuf);
>> + X265_FREE(cuQPReuseBuf);
>> + }
>> X265_FREE(tempBuf);
>>
>> if (m_param->analysisLoadReuseLevel == 10)
>> @@ -5540,6 +5593,12 @@ void
>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>> intraDataCTU->cuQPOff[depthBytes] =
>> (int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP);
>> absPartIdx += ctu->m_numPartitions >> (depth * 2);
>> }
>> +
>> + if (m_param->rc.cuTree)
>> + {
>> + for (uint32_t i = (cuAddr * ctu->m_numPartitions), j
>> = 0; j < ctu->m_numPartitions; i++, j++)
>> + intraDataCTU->cuQPOffReuse[i] =
>> (int8_t)(ctu->m_qpreuse[j] - baseQP);
>> + }
>> memcpy(&intraDataCTU->modes[ctu->m_cuAddr *
>> ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)*
>> ctu->m_numPartitions);
>> }
>> }
>> @@ -5599,13 +5658,20 @@ void
>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>> }
>> absPartIdx += ctu->m_numPartitions >> (depth * 2);
>> }
>> +
>> + if (m_param->rc.cuTree)
>> + {
>> + for (uint32_t i = (cuAddr * ctu->m_numPartitions), j
>> = 0; j < ctu->m_numPartitions; i++, j++)
>> + interDataCTU->cuQPOffReuse[i] =
>> (int8_t)(ctu->m_qpreuse[j] - baseQP);
>> + }
>> +
>> if (m_param->analysisSaveReuseLevel == 10 &&
>> bIntraInInter)
>> memcpy(&intraDataCTU->modes[ctu->m_cuAddr *
>> ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)*
>> ctu->m_numPartitions);
>> }
>> }
>>
>> if ((analysis->sliceType == X265_TYPE_IDR || analysis->sliceType
>> == X265_TYPE_I) && m_param->rc.cuTree)
>> - analysis->frameRecordSize += sizeof(uint8_t)*
>> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 +
>> (sizeof(int8_t) * depthBytes);
>> + analysis->frameRecordSize += sizeof(uint8_t)*
>> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 +
>> (sizeof(int8_t) * depthBytes) + (sizeof(int8_t) * analysis->numPartitions
>> * analysis->numCUsInFrame);
>> else if (analysis->sliceType == X265_TYPE_IDR ||
>> analysis->sliceType == X265_TYPE_I)
>> analysis->frameRecordSize += sizeof(uint8_t)*
>> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3;
>> else
>> @@ -5613,7 +5679,10 @@ void
>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>> /* Add sizeof depth, modes, partSize, cuQPOffset, mergeFlag
>> */
>> analysis->frameRecordSize += depthBytes * 2;
>> if (m_param->rc.cuTree)
>> - analysis->frameRecordSize += (sizeof(int8_t) * depthBytes);
>> + {
>> + analysis->frameRecordSize += (sizeof(int8_t) *
>> depthBytes);
>> + analysis->frameRecordSize += (sizeof(int8_t) *
>> analysis->numPartitions * analysis->numCUsInFrame);
>> + }
>> if (m_param->analysisSaveReuseLevel > 4)
>> analysis->frameRecordSize += (depthBytes * 2);
>>
>> @@ -5669,7 +5738,10 @@ void
>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>> X265_FWRITE((analysis->intraData)->chromaModes, sizeof(uint8_t),
>> depthBytes, m_analysisFileOut);
>> X265_FWRITE((analysis->intraData)->partSizes, sizeof(char),
>> depthBytes, m_analysisFileOut);
>> if (m_param->rc.cuTree)
>> + {
>> X265_FWRITE((analysis->intraData)->cuQPOff, sizeof(int8_t),
>> depthBytes, m_analysisFileOut);
>> + X265_FWRITE((analysis->intraData)->cuQPOffReuse,
>> sizeof(int8_t), (analysis->numCUsInFrame * analysis->numPartitions),
>> m_analysisFileOut);
>> + }
>> X265_FWRITE((analysis->intraData)->modes, sizeof(uint8_t),
>> analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileOut);
>> }
>> else
>> @@ -5677,7 +5749,10 @@ void
>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>> X265_FWRITE((analysis->interData)->depth, sizeof(uint8_t),
>> depthBytes, m_analysisFileOut);
>> X265_FWRITE((analysis->interData)->modes, sizeof(uint8_t),
>> depthBytes, m_analysisFileOut);
>> if (m_param->rc.cuTree)
>> + {
>> X265_FWRITE((analysis->interData)->cuQPOff, sizeof(int8_t),
>> depthBytes, m_analysisFileOut);
>> + X265_FWRITE((analysis->interData)->cuQPOffReuse,
>> sizeof(int8_t), (analysis->numCUsInFrame * analysis->numPartitions),
>> m_analysisFileOut);
>> + }
>> if (m_param->analysisSaveReuseLevel > 4)
>> {
>> X265_FWRITE((analysis->interData)->partSize,
>> sizeof(uint8_t), depthBytes, m_analysisFileOut);
>> diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
>> index 0adb0d0db..3bc01268b 100644
>> --- a/source/encoder/slicetype.cpp
>> +++ b/source/encoder/slicetype.cpp
>> @@ -1894,7 +1894,7 @@ void Lookahead::slicetypeAnalyse(Lowres **frames,
>> bool bKeyframe)
>>
>> if (!framecnt)
>> {
>> - if (m_param->rc.cuTree)
>> + if (m_param->rc.cuTree && !m_param->analysisLoad)
>> cuTree(frames, 0, bKeyframe);
>> return;
>> }
>> diff --git a/source/x265.h b/source/x265.h
>> index f44040ba7..d6a828539 100644
>> --- a/source/x265.h
>> +++ b/source/x265.h
>> @@ -145,6 +145,7 @@ typedef struct x265_analysis_intra_data
>> char* partSizes;
>> uint8_t* chromaModes;
>> int8_t* cuQPOff;
>> + int8_t* cuQPOffReuse;
>> }x265_analysis_intra_data;
>>
>> typedef struct x265_analysis_MV
>> @@ -170,6 +171,7 @@ typedef struct x265_analysis_inter_data
>> x265_analysis_MV* mv[2];
>> int64_t* sadCost;
>> int8_t* cuQPOff;
>> + int8_t* cuQPOffReuse;
>> }x265_analysis_inter_data;
>>
>> typedef struct x265_weight_param
>> --
>> 2.23.0.windows.1
>>
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>
>
> --
> Regards,
> Kavitha
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
--
*With Regards,*
*Srikanth Kurapati.*
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20210108/e0308000/attachment-0001.html>
More information about the x265-devel
mailing list