[x265] [PATCH] [Release_3.5] correct reusing cutree qp offsets in load encode for reuse-level > 1 and < 10 for same resolution
Kavitha Sampath
kavitha at multicorewareinc.com
Fri Nov 20 11:02:10 CET 2020
On Tue, Nov 17, 2020 at 8:22 AM Mahesh Pittala <mahesh at multicorewareinc.com>
wrote:
> From 787ae5da7431b5d113ea033cf6502ac1cc1e7572 Mon Sep 17 00:00:00 2001
> From: maheshpittala <mahesh at multicorewareinc.com>
> Date: Sun, 1 Nov 2020 10:09:28 +0530
> Subject: [PATCH] correct reusing cutree qp offsets in load encode for
> reuse-level > 1 and < 10 for same resolution
>
> Earlier in save encode, dumped only best modes analysis data of that CTU
> into file after encoding, not for each split CU's analysis. So in analysis
> load, it reads the same best mode's qp value even for split CU's(whereas
> split CU's qp would be different in save encode) and redo-analysis.
>
> So now, cuGeom.geomRecurId stores unique ID for each CU and even for
> parents CU so based on this storing cutree qp offset and loaded same
>
[KS] Commit message sounds informal. Suggest rephrasing
> ---
> source/abrEncApp.cpp | 6 +++
> source/common/cudata.cpp | 6 ++-
> source/common/cudata.h | 3 +-
> source/encoder/analysis.cpp | 32 ++++++++++--
> source/encoder/api.cpp | 12 +++++
> source/encoder/encoder.cpp | 97 ++++++++++++++++++++++++++++++++----
> source/encoder/slicetype.cpp | 2 +-
> source/x265.h | 2 +
> 8 files changed, 140 insertions(+), 20 deletions(-)
>
> diff --git a/source/abrEncApp.cpp b/source/abrEncApp.cpp
> index cd85154f1..3550d8b11 100644
> --- a/source/abrEncApp.cpp
> +++ b/source/abrEncApp.cpp
> @@ -342,7 +342,10 @@ namespace X265_NS {
> memcpy(intraDst->partSizes, intraSrc->partSizes, sizeof(char)
> * src->depthBytes);
> memcpy(intraDst->chromaModes, intraSrc->chromaModes,
> sizeof(uint8_t) * src->depthBytes);
> if (m_param->rc.cuTree)
> + {
> memcpy(intraDst->cuQPOff, intraSrc->cuQPOff,
> sizeof(int8_t) * src->depthBytes);
> + memcpy(intraDst->cuQPOffReuse, intraSrc->cuQPOffReuse,
> sizeof(int8_t) * (src->numCUsInFrame * src->numPartitions));
>
[KS] maximum number of qps saved per CTU is 85. Allocating copying
numPartition size is unnecessary
> + }
> }
> else
> {
> @@ -357,7 +360,10 @@ namespace X265_NS {
> memcpy(interDst->depth, interSrc->depth, sizeof(uint8_t) *
> src->depthBytes);
> memcpy(interDst->modes, interSrc->modes, sizeof(uint8_t) *
> src->depthBytes);
> if (m_param->rc.cuTree)
> + {
> memcpy(interDst->cuQPOff, interSrc->cuQPOff,
> sizeof(int8_t) * src->depthBytes);
> + memcpy(interDst->cuQPOffReuse, interSrc->cuQPOffReuse,
> sizeof(int8_t) * (src->numCUsInFrame * src->numPartitions));
> + }
> if (m_param->analysisSaveReuseLevel > 4)
> {
> memcpy(interDst->partSize, interSrc->partSize,
> sizeof(uint8_t) * src->depthBytes);
> diff --git a/source/common/cudata.cpp b/source/common/cudata.cpp
> index 19281dee2..08cdff11a 100644
> --- a/source/common/cudata.cpp
> +++ b/source/common/cudata.cpp
> @@ -194,6 +194,7 @@ void CUData::initialize(const CUDataMemPool& dataPool,
> uint32_t depth, const x26
>
> m_qp = (int8_t*)charBuf; charBuf += m_numPartitions;
> m_qpAnalysis = (int8_t*)charBuf; charBuf += m_numPartitions;
> + m_qpreuse = (int8_t*)charBuf; charBuf += m_numPartitions;
>
[KS] Can you move this out of parentCTU? Would be appropriate to include it
as an Analysis class member - just like other reuse parameters such
as m_reuseRef, m_reuseDepth,..
> m_log2CUSize = charBuf; charBuf += m_numPartitions;
> m_lumaIntraDir = charBuf; charBuf += m_numPartitions;
> m_tqBypass = charBuf; charBuf += m_numPartitions;
> @@ -235,6 +236,7 @@ void CUData::initialize(const CUDataMemPool& dataPool,
> uint32_t depth, const x26
>
> m_qp = (int8_t*)charBuf; charBuf += m_numPartitions;
> m_qpAnalysis = (int8_t*)charBuf; charBuf += m_numPartitions;
> + m_qpreuse = (int8_t*)charBuf; charBuf += m_numPartitions;
> m_log2CUSize = charBuf; charBuf += m_numPartitions;
> m_lumaIntraDir = charBuf; charBuf += m_numPartitions;
> m_tqBypass = charBuf; charBuf += m_numPartitions;
> @@ -307,7 +309,7 @@ void CUData::initCTU(const Frame& frame, uint32_t
> cuAddr, int qp, uint32_t first
> X265_CHECK(!(frame.m_encData->m_param->bLossless &&
> !m_slice->m_pps->bTransquantBypassEnabled), "lossless enabled without
> TQbypass in PPS\n");
>
> /* initialize the remaining CU data in one memset */
> - memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ?
> BytesPerPartition - 12 : BytesPerPartition - 8) * m_numPartitions);
> + memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ?
> BytesPerPartition - 13 : BytesPerPartition - 9) * m_numPartitions);
>
> for (int8_t i = 0; i < NUM_TU_DEPTH; i++)
> m_refTuDepth[i] = -1;
> @@ -358,7 +360,7 @@ void CUData::initSubCU(const CUData& ctu, const
> CUGeom& cuGeom, int qp)
> m_partSet(m_cuDepth, (uint8_t)cuGeom.depth);
>
> /* initialize the remaining CU data in one memset */
> - memset(m_predMode, 0, (ctu.m_chromaFormat == X265_CSP_I400 ?
> BytesPerPartition - 13 : BytesPerPartition - 9) * m_numPartitions);
> + memset(m_predMode, 0, (ctu.m_chromaFormat == X265_CSP_I400 ?
> BytesPerPartition - 14 : BytesPerPartition - 10) * m_numPartitions);
> memset(m_distortion, 0, m_numPartitions * sizeof(sse_t));
> }
>
> diff --git a/source/common/cudata.h b/source/common/cudata.h
> index 8397f0568..d58f53e39 100644
> --- a/source/common/cudata.h
> +++ b/source/common/cudata.h
> @@ -192,6 +192,7 @@ public:
> /* Per-part data, stored contiguously */
> int8_t* m_qp; // array of QP values
> int8_t* m_qpAnalysis; // array of QP values for analysis
> reuse
> + int8_t* m_qpreuse; // array of QP values for analysis
> reuse for reuse levels > 1 and < 10
> uint8_t* m_log2CUSize; // array of cu log2Size TODO: seems
> redundant to depth
> uint8_t* m_lumaIntraDir; // array of intra directions (luma)
> uint8_t* m_tqBypass; // array of CU lossless flags
> @@ -207,7 +208,7 @@ public:
> uint8_t* m_transformSkip[3]; // array of transform skipping
> flags per plane
> uint8_t* m_cbf[3]; // array of coded block flags (CBF)
> per plane
> uint8_t* m_chromaIntraDir; // array of intra directions
> (chroma)
> - enum { BytesPerPartition = 24 }; // combined sizeof() of all
> per-part data
> + enum { BytesPerPartition = 25 }; // combined sizeof() of all
> per-part data
>
> sse_t* m_distortion;
> coeff_t* m_trCoeff[3]; // transformed coefficient buffer
> per plane
> diff --git a/source/encoder/analysis.cpp b/source/encoder/analysis.cpp
> index aabf386ca..b1d7e3ad1 100644
> --- a/source/encoder/analysis.cpp
> +++ b/source/encoder/analysis.cpp
> @@ -520,6 +520,9 @@ uint64_t Analysis::compressIntraCU(const CUData&
> parentCTU, const CUGeom& cuGeom
> bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
> bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
>
> + if (m_param->rc.cuTree)
> + parentCTU.m_qpreuse[cuGeom.geomRecurId] = (int8_t)qp;
> +
> bool bAlreadyDecided = m_param->intraRefine != 4 &&
> parentCTU.m_lumaIntraDir[cuGeom.absPartIdx] != (uint8_t)ALL_IDX &&
> !(m_param->bAnalysisType == HEVC_INFO);
> bool bDecidedDepth = m_param->intraRefine != 4 &&
> parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth;
> int split = 0;
> @@ -870,6 +873,9 @@ uint32_t Analysis::compressInterCU_dist(const CUData&
> parentCTU, const CUGeom& c
> uint32_t minDepth = m_param->rdLevel <= 4 ?
> topSkipMinDepth(parentCTU, cuGeom) : 0;
> uint32_t splitRefs[4] = { 0, 0, 0, 0 };
>
> + if (m_param->rc.cuTree)
> + parentCTU.m_qpreuse[cuGeom.geomRecurId] = (int8_t)qp;
> +
> X265_CHECK(m_param->rdLevel >= 2, "compressInterCU_dist does not
> support RD 0 or 1\n");
>
> PMODE pmode(*this, cuGeom);
> @@ -1152,6 +1158,8 @@ SplitData Analysis::compressInterCU_rd0_4(const
> CUData& parentCTU, const CUGeom&
> uint32_t cuAddr = parentCTU.m_cuAddr;
> ModeDepth& md = m_modeDepth[depth];
>
> + if (m_param->rc.cuTree)
> + parentCTU.m_qpreuse[cuGeom.geomRecurId] = (int8_t)qp;
>
> if (m_param->searchMethod == X265_SEA)
> {
> @@ -1856,6 +1864,9 @@ SplitData Analysis::compressInterCU_rd5_6(const
> CUData& parentCTU, const CUGeom&
> ModeDepth& md = m_modeDepth[depth];
> md.bestMode = NULL;
>
> + if (m_param->rc.cuTree)
> + parentCTU.m_qpreuse[cuGeom.geomRecurId] = (int8_t)qp;
> +
> if (m_param->searchMethod == X265_SEA)
> {
> int numPredDir = m_slice->isInterP() ? 1 : 2;
> @@ -3643,15 +3654,26 @@ int Analysis::calculateQpforCuSize(const CUData&
> ctu, const CUGeom& cuGeom, int3
> if ((distortionData->threshold[ctu.m_cuAddr] < 0.9 ||
> distortionData->threshold[ctu.m_cuAddr] > 1.1)
> && distortionData->highDistortionCtuCount &&
> distortionData->lowDistortionCtuCount)
> qp += distortionData->offset[ctu.m_cuAddr];
> - }
> + }
>
> if (m_param->analysisLoadReuseLevel >= 2 && m_param->rc.cuTree)
> {
> - int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) +
> cuGeom.absPartIdx;
> - if (ctu.m_slice->m_sliceType == I_SLICE)
> - return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
> (int32_t)(qp + 0.5 +
> ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx]));
> + if (m_param->scaleFactor == 2 || m_param->analysisLoadReuseLevel
> == 10)
> + {
> + int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) +
> cuGeom.absPartIdx;
> + if (ctu.m_slice->m_sliceType == I_SLICE)
> + return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
> (int32_t)(qp + 0.5 +
> ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx]));
> + else
> + return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
> (int32_t)(qp + 0.5 +
> ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx]));
> + }
> else
> - return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
> (int32_t)(qp + 0.5 +
> ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx]));
> + {
> + int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) +
> cuGeom.geomRecurId;
> + if (ctu.m_slice->m_sliceType == I_SLICE)
> + return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
> (int32_t)(qp + 0.5 +
> ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOffReuse[cuIdx]));
> + else
> + return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
> (int32_t)(qp + 0.5 +
> ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOffReuse[cuIdx]));
> + }
>
[KS] Why is this reuse not applicable to reuse level 1?
> }
> if (m_param->rc.hevcAq)
> {
> diff --git a/source/encoder/api.cpp b/source/encoder/api.cpp
> index a986355e0..0f266d328 100644
> --- a/source/encoder/api.cpp
> +++ b/source/encoder/api.cpp
> @@ -825,7 +825,10 @@ void x265_alloc_analysis_data(x265_param *param,
> x265_analysis_data* analysis)
> CHECKED_MALLOC_ZERO(intraData->partSizes, char,
> analysis->numPartitions * analysis->numCUsInFrame);
> CHECKED_MALLOC_ZERO(intraData->chromaModes, uint8_t,
> analysis->numPartitions * analysis->numCUsInFrame);
> if (param->rc.cuTree)
> + {
> CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t,
> analysis->numPartitions * analysis->numCUsInFrame);
> + CHECKED_MALLOC_ZERO(intraData->cuQPOffReuse, int8_t,
> analysis->numPartitions * analysis->numCUsInFrame);
> + }
> }
> analysis->intraData = intraData;
>
> @@ -837,7 +840,10 @@ void x265_alloc_analysis_data(x265_param *param,
> x265_analysis_data* analysis)
> CHECKED_MALLOC_ZERO(interData->modes, uint8_t,
> analysis->numPartitions * analysis->numCUsInFrame);
>
> if (param->rc.cuTree && !isMultiPassOpt)
> + {
> CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t,
> analysis->numPartitions * analysis->numCUsInFrame);
> + CHECKED_MALLOC_ZERO(interData->cuQPOffReuse, int8_t,
> analysis->numPartitions * analysis->numCUsInFrame);
> + }
> CHECKED_MALLOC_ZERO(interData->mvpIdx[0], uint8_t,
> analysis->numPartitions * analysis->numCUsInFrame);
> CHECKED_MALLOC_ZERO(interData->mvpIdx[1], uint8_t,
> analysis->numPartitions * analysis->numCUsInFrame);
> CHECKED_MALLOC_ZERO(interData->mv[0], x265_analysis_MV,
> analysis->numPartitions * analysis->numCUsInFrame);
> @@ -919,7 +925,10 @@ void x265_free_analysis_data(x265_param *param,
> x265_analysis_data* analysis)
> X265_FREE((analysis->intraData)->partSizes);
> X265_FREE((analysis->intraData)->chromaModes);
> if (param->rc.cuTree)
> + {
> X265_FREE((analysis->intraData)->cuQPOff);
> + X265_FREE((analysis->intraData)->cuQPOffReuse);
> + }
> }
> X265_FREE(analysis->intraData);
> analysis->intraData = NULL;
> @@ -931,7 +940,10 @@ void x265_free_analysis_data(x265_param *param,
> x265_analysis_data* analysis)
> X265_FREE((analysis->interData)->depth);
> X265_FREE((analysis->interData)->modes);
> if (!isMultiPassOpt && param->rc.cuTree)
> + {
> X265_FREE((analysis->interData)->cuQPOff);
> + X265_FREE((analysis->interData)->cuQPOffReuse);
> + }
> X265_FREE((analysis->interData)->mvpIdx[0]);
> X265_FREE((analysis->interData)->mvpIdx[1]);
> X265_FREE((analysis->interData)->mv[0]);
> diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp
> index 1f710e1ce..9666744f3 100644
> --- a/source/encoder/encoder.cpp
> +++ b/source/encoder/encoder.cpp
> @@ -4452,19 +4452,25 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
> return;
>
> uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
> *partSizes = NULL;
> - int8_t *cuQPBuf = NULL;
> + int8_t *cuQPBuf = NULL, *cuQPReuseBuf = NULL;
>
> tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);
> depthBuf = tempBuf;
> modeBuf = tempBuf + depthBytes;
> partSizes = tempBuf + 2 * depthBytes;
> if (m_param->rc.cuTree)
> + {
> cuQPBuf = X265_MALLOC(int8_t, depthBytes);
> + cuQPReuseBuf = X265_MALLOC(int8_t, scaledNumPartition *
> analysis->numCUsInFrame);
>
[KS] Check whitespaces
> + }
>
> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, intraPic->depth);
> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, intraPic->chromaModes);
> X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, intraPic->partSizes);
> - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
> depthBytes, m_analysisFileIn, intraPic->cuQPOff); }
> + if (m_param->rc.cuTree) {
> + X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
> m_analysisFileIn, intraPic->cuQPOff);
> + X265_FREAD(cuQPReuseBuf, sizeof(int8_t), (scaledNumPartition
> * analysis->numCUsInFrame), m_analysisFileIn, intraPic->cuQPOffReuse);
> + }
>
> size_t count = 0;
> for (uint32_t d = 0; d < depthBytes; d++)
> @@ -4484,7 +4490,11 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
> memset(&(analysis->intraData)->cuQPOff[count],
> cuQPBuf[d], bytes);
> count += bytes;
> }
> -
> + if (m_param->rc.cuTree)
> + {
> + for (uint32_t i = 0; i < (scaledNumPartition * analysis->numCUsInFrame);
> i++)
> + memset(&(analysis->intraData)->cuQPOffReuse[i],
> cuQPReuseBuf[i], sizeof(int8_t));
> + }
> if (!m_param->scaleFactor)
> {
> X265_FREAD((analysis->intraData)->modes, sizeof(uint8_t),
> numCUsLoad * analysis->numPartitions, m_analysisFileIn, intraPic->modes);
> @@ -4498,7 +4508,10 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
> X265_FREE(tempLumaBuf);
> }
> if (m_param->rc.cuTree)
> + {
> X265_FREE(cuQPBuf);
> + X265_FREE(cuQPReuseBuf);
> + }
> X265_FREE(tempBuf);
> consumedBytes += frameRecordSize;
> }
> @@ -4515,7 +4528,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
> uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];
> MV* mv[2];
> int8_t* refIdx[2];
> - int8_t* cuQPBuf = NULL;
> + int8_t* cuQPBuf = NULL, *cuQPReuseBuf = NULL;
>
[KS] Why can't we reuse cuQPBuf ? I agree that the size of offsets differ
for reuse level 10 and others but that can be taken care of in allocation.
>
> int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2;
> bool bIntraInInter = false;
> @@ -4536,11 +4549,17 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
> depthBuf = tempBuf;
> modeBuf = tempBuf + depthBytes;
> if (m_param->rc.cuTree)
> + {
> cuQPBuf = X265_MALLOC(int8_t, depthBytes);
> + cuQPReuseBuf = X265_MALLOC(int8_t, scaledNumPartition *
> analysis->numCUsInFrame);
> + }
>
> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, interPic->depth);
> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, interPic->modes);
> - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
> depthBytes, m_analysisFileIn, interPic->cuQPOff); }
> + if (m_param->rc.cuTree) {
> + X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
> m_analysisFileIn, interPic->cuQPOff);
> + X265_FREAD(cuQPReuseBuf, sizeof(int8_t),
> (scaledNumPartition * analysis->numCUsInFrame), m_analysisFileIn,
> interPic->cuQPOffReuse);
> + }
>
> if (m_param->analysisLoadReuseLevel > 4)
> {
> @@ -4611,9 +4630,17 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
> }
> count += bytes;
> }
> + if (m_param->rc.cuTree)
> + {
> + for (uint32_t i = 0; i < (scaledNumPartition * analysis->numCUsInFrame);
> i++)
> + memset(&(analysis->interData)->cuQPOffReuse[i],
> cuQPReuseBuf[i], sizeof(int8_t));
> + }
>
> if (m_param->rc.cuTree)
> + {
> X265_FREE(cuQPBuf);
> + X265_FREE(cuQPReuseBuf);
> + }
> X265_FREE(tempBuf);
> }
> if (m_param->analysisLoadReuseLevel == 10)
> @@ -4814,19 +4841,26 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
> return;
>
> uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
> *partSizes = NULL;
> - int8_t *cuQPBuf = NULL;
> + int8_t *cuQPBuf = NULL, *cuQPReuseBuf = NULL;;
>
> tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);
> depthBuf = tempBuf;
> modeBuf = tempBuf + depthBytes;
> partSizes = tempBuf + 2 * depthBytes;
> if (m_param->rc.cuTree)
> + {
> cuQPBuf = X265_MALLOC(int8_t, depthBytes);
> + cuQPReuseBuf = X265_MALLOC(int8_t, (analysis->numPartitions /
> factor) * analysis->numCUsInFrame);
> + }
>
> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, intraPic->depth);
> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, intraPic->chromaModes);
> X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, intraPic->partSizes);
> - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
> depthBytes, m_analysisFileIn, intraPic->cuQPOff); }
> + if (m_param->rc.cuTree)
> + {
> + X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
> m_analysisFileIn, intraPic->cuQPOff);
> + X265_FREAD(cuQPReuseBuf, sizeof(int8_t),
> ((analysis->numPartitions / factor) * analysis->numCUsInFrame),
> m_analysisFileIn, intraPic->cuQPOffReuse);
> + }
>
> uint32_t count = 0;
> for (uint32_t d = 0; d < depthBytes; d++)
> @@ -4869,7 +4903,10 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
> }
> X265_FREE(tempLumaBuf);
> if (m_param->rc.cuTree)
> + {
> X265_FREE(cuQPBuf);
> + X265_FREE(cuQPReuseBuf);
> + }
> X265_FREE(tempBuf);
> consumedBytes += frameRecordSize;
> }
> @@ -4886,7 +4923,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
> uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];
> MV* mv[2];
> int8_t* refIdx[2];
> - int8_t* cuQPBuf = NULL;
> + int8_t* cuQPBuf = NULL, *cuQPReuseBuf = NULL;
>
> int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2;
> bool bIntraInInter = false;
> @@ -4901,11 +4938,18 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
> depthBuf = tempBuf;
> modeBuf = tempBuf + depthBytes;
> if (m_param->rc.cuTree)
> + {
> cuQPBuf = X265_MALLOC(int8_t, depthBytes);
> + cuQPReuseBuf = X265_MALLOC(int8_t, (analysis->numPartitions /
> factor) * analysis->numCUsInFrame);
> + }
>
> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, interPic->depth);
> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, interPic->modes);
> - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
> depthBytes, m_analysisFileIn, interPic->cuQPOff); }
> + if (m_param->rc.cuTree)
> + {
> + X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
> m_analysisFileIn, interPic->cuQPOff);
> + X265_FREAD(cuQPReuseBuf, sizeof(int8_t),
> (analysis->numPartitions / factor) * analysis->numCUsInFrame,
> m_analysisFileIn, interPic->cuQPOffReuse);
> + }
> if (m_param->analysisLoadReuseLevel > 4)
> {
> partSize = modeBuf + depthBytes;
> @@ -5017,7 +5061,16 @@ void Encoder::readAnalysisFile(x265_analysis_data*
> analysis, int curPoc, const x
> }
>
> if (m_param->rc.cuTree)
> + {
> + for (uint32_t i = 0; i < ((analysis->numPartitions / factor)
> * analysis->numCUsInFrame); i++)
> + memset(&(analysis->interData)->cuQPOffReuse[i],
> cuQPReuseBuf[i], sizeof(int8_t));
> + }
> +
> + if (m_param->rc.cuTree)
> + {
> X265_FREE(cuQPBuf);
> + X265_FREE(cuQPReuseBuf);
> + }
> X265_FREE(tempBuf);
>
> if (m_param->analysisLoadReuseLevel == 10)
> @@ -5540,6 +5593,12 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
> analysis, FrameData &curEncD
> intraDataCTU->cuQPOff[depthBytes] =
> (int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP);
> absPartIdx += ctu->m_numPartitions >> (depth * 2);
> }
> +
> + if (m_param->rc.cuTree)
> + {
> + for (uint32_t i = (cuAddr * ctu->m_numPartitions), j
> = 0; j < ctu->m_numPartitions; i++, j++)
> + intraDataCTU->cuQPOffReuse[i] =
> (int8_t)(ctu->m_qpreuse[j] - baseQP);
> + }
> memcpy(&intraDataCTU->modes[ctu->m_cuAddr *
> ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)*
> ctu->m_numPartitions);
> }
> }
> @@ -5599,13 +5658,20 @@ void
> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
> }
> absPartIdx += ctu->m_numPartitions >> (depth * 2);
> }
> +
> + if (m_param->rc.cuTree)
> + {
> + for (uint32_t i = (cuAddr * ctu->m_numPartitions), j
> = 0; j < ctu->m_numPartitions; i++, j++)
> + interDataCTU->cuQPOffReuse[i] =
> (int8_t)(ctu->m_qpreuse[j] - baseQP);
> + }
> +
> if (m_param->analysisSaveReuseLevel == 10 &&
> bIntraInInter)
> memcpy(&intraDataCTU->modes[ctu->m_cuAddr *
> ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)*
> ctu->m_numPartitions);
> }
> }
>
> if ((analysis->sliceType == X265_TYPE_IDR || analysis->sliceType
> == X265_TYPE_I) && m_param->rc.cuTree)
> - analysis->frameRecordSize += sizeof(uint8_t)*
> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 +
> (sizeof(int8_t) * depthBytes);
> + analysis->frameRecordSize += sizeof(uint8_t)*
> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 +
> (sizeof(int8_t) * depthBytes) + (sizeof(int8_t) * analysis->numPartitions
> * analysis->numCUsInFrame);
> else if (analysis->sliceType == X265_TYPE_IDR ||
> analysis->sliceType == X265_TYPE_I)
> analysis->frameRecordSize += sizeof(uint8_t)*
> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3;
> else
> @@ -5613,7 +5679,10 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
> analysis, FrameData &curEncD
> /* Add sizeof depth, modes, partSize, cuQPOffset, mergeFlag */
> analysis->frameRecordSize += depthBytes * 2;
> if (m_param->rc.cuTree)
> - analysis->frameRecordSize += (sizeof(int8_t) * depthBytes);
> + {
> + analysis->frameRecordSize += (sizeof(int8_t) *
> depthBytes);
> + analysis->frameRecordSize += (sizeof(int8_t) *
> analysis->numPartitions * analysis->numCUsInFrame);
> + }
> if (m_param->analysisSaveReuseLevel > 4)
> analysis->frameRecordSize += (depthBytes * 2);
>
> @@ -5669,7 +5738,10 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
> analysis, FrameData &curEncD
> X265_FWRITE((analysis->intraData)->chromaModes, sizeof(uint8_t),
> depthBytes, m_analysisFileOut);
> X265_FWRITE((analysis->intraData)->partSizes, sizeof(char),
> depthBytes, m_analysisFileOut);
> if (m_param->rc.cuTree)
> + {
> X265_FWRITE((analysis->intraData)->cuQPOff, sizeof(int8_t),
> depthBytes, m_analysisFileOut);
> + X265_FWRITE((analysis->intraData)->cuQPOffReuse,
> sizeof(int8_t), (analysis->numCUsInFrame * analysis->numPartitions),
> m_analysisFileOut);
> + }
> X265_FWRITE((analysis->intraData)->modes, sizeof(uint8_t),
> analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileOut);
> }
> else
> @@ -5677,7 +5749,10 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
> analysis, FrameData &curEncD
> X265_FWRITE((analysis->interData)->depth, sizeof(uint8_t),
> depthBytes, m_analysisFileOut);
> X265_FWRITE((analysis->interData)->modes, sizeof(uint8_t),
> depthBytes, m_analysisFileOut);
> if (m_param->rc.cuTree)
> + {
> X265_FWRITE((analysis->interData)->cuQPOff, sizeof(int8_t),
> depthBytes, m_analysisFileOut);
> + X265_FWRITE((analysis->interData)->cuQPOffReuse,
> sizeof(int8_t), (analysis->numCUsInFrame * analysis->numPartitions),
> m_analysisFileOut);
> + }
> if (m_param->analysisSaveReuseLevel > 4)
> {
> X265_FWRITE((analysis->interData)->partSize, sizeof(uint8_t),
> depthBytes, m_analysisFileOut);
> diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
> index 0adb0d0db..3bc01268b 100644
> --- a/source/encoder/slicetype.cpp
> +++ b/source/encoder/slicetype.cpp
> @@ -1894,7 +1894,7 @@ void Lookahead::slicetypeAnalyse(Lowres **frames,
> bool bKeyframe)
>
> if (!framecnt)
> {
> - if (m_param->rc.cuTree)
> + if (m_param->rc.cuTree && !m_param->analysisLoad)
> cuTree(frames, 0, bKeyframe);
> return;
> }
> diff --git a/source/x265.h b/source/x265.h
> index f44040ba7..d6a828539 100644
> --- a/source/x265.h
> +++ b/source/x265.h
> @@ -145,6 +145,7 @@ typedef struct x265_analysis_intra_data
> char* partSizes;
> uint8_t* chromaModes;
> int8_t* cuQPOff;
> + int8_t* cuQPOffReuse;
> }x265_analysis_intra_data;
>
> typedef struct x265_analysis_MV
> @@ -170,6 +171,7 @@ typedef struct x265_analysis_inter_data
> x265_analysis_MV* mv[2];
> int64_t* sadCost;
> int8_t* cuQPOff;
> + int8_t* cuQPOffReuse;
> }x265_analysis_inter_data;
>
> typedef struct x265_weight_param
> --
> 2.23.0.windows.1
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
--
Regards,
Kavitha
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20201120/df2ef79d/attachment-0001.html>
More information about the x265-devel
mailing list