<div dir="ltr"><div dir="ltr"><br></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Mon, Jan 11, 2021 at 8:08 PM Srikanth Kurapati <<a href="mailto:srikanth.kurapati@multicorewareinc.com">srikanth.kurapati@multicorewareinc.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div dir="ltr">From d516d0564888e154d88d89320302725d87bfab78 Mon Sep 17 00:00:00 2001<br>From: Srikanth Kurapati <<a href="mailto:srikanth.kurapati@multicorewareinc.com" target="_blank">srikanth.kurapati@multicorewareinc.com</a>><br>Date: Wed, 30 Dec 2020 17:00:08 +0530<br>Subject: [PATCH] fix: corrects output mismatch for cutree enabled analysis<br> save/load enodes with reuse-levels in between 1 to 10 for similar encoder<br> settings.<br><br>---<br> source/abrEncApp.cpp | 14 +++-<br> source/common/common.h | 3 +-<br> source/common/cudata.h | 2 +-<br> source/encoder/analysis.cpp | 31 ++++++++-<br> source/encoder/analysis.h | 1 +<br> source/encoder/api.cpp | 28 +++++++-<br> source/encoder/encoder.cpp | 123 ++++++++++++++++++++++++++---------<br> source/encoder/slicetype.cpp | 2 +-<br> source/x265.h | 4 +-<br> 9 files changed, 166 insertions(+), 42 deletions(-)<br><br>diff --git a/source/abrEncApp.cpp b/source/abrEncApp.cpp<br>index fa62ebf63..ea255e3f6 100644<br>--- a/source/abrEncApp.cpp<br>+++ b/source/abrEncApp.cpp<br>@@ -340,7 +340,12 @@ namespace X265_NS {<br> memcpy(intraDst->partSizes, intraSrc->partSizes, sizeof(char) * src->depthBytes);<br> memcpy(intraDst->chromaModes, intraSrc->chromaModes, sizeof(uint8_t) * src->depthBytes);<br> if (m_param->rc.cuTree)<br>- memcpy(intraDst->cuQPOff, intraSrc->cuQPOff, sizeof(int8_t) * src->depthBytes);<br>+ {<br>+ if (m_param->analysisSaveReuseLevel == 10)<br>+ memcpy(intraDst->cuQPOff, intraSrc->cuQPOff, sizeof(int8_t) * src->depthBytes);<br>+ else<br>+ memcpy(intraDst->cuQPOff, intraSrc->cuQPOff, sizeof(int8_t) * (src->numCUsInFrame * MAX_NUM_CU_GEOMS));<br>+ }<br> }<br> else<br> {<br>@@ -355,7 +360,12 @@ namespace X265_NS {<br> memcpy(interDst->depth, interSrc->depth, sizeof(uint8_t) * src->depthBytes);<br> memcpy(interDst->modes, interSrc->modes, sizeof(uint8_t) * src->depthBytes);<br> if (m_param->rc.cuTree)<br>- memcpy(interDst->cuQPOff, interSrc->cuQPOff, sizeof(int8_t) * src->depthBytes);<br>+ {<br>+ if (m_param->analysisReuseLevel == 10)<br>+ memcpy(interDst->cuQPOff, interSrc->cuQPOff, sizeof(int8_t) * src->depthBytes);<br>+ else<br>+ memcpy(interDst->cuQPOff, interSrc->cuQPOff, sizeof(int8_t) * (src->numCUsInFrame * MAX_NUM_CU_GEOMS));<br>+ }<br> if (m_param->analysisSaveReuseLevel > 4)<br> {<br> memcpy(interDst->partSize, interSrc->partSize, sizeof(uint8_t) * src->depthBytes);<br>diff --git a/source/common/common.h b/source/common/common.h<br>index 8c06cd79e..0ffbf17eb 100644<br>--- a/source/common/common.h<br>+++ b/source/common/common.h<br>@@ -326,7 +326,8 @@ typedef int16_t coeff_t; // transform coefficient<br> <br> #define CHROMA_H_SHIFT(x) (x == X265_CSP_I420 || x == X265_CSP_I422)<br> #define CHROMA_V_SHIFT(x) (x == X265_CSP_I420)<br>-#define X265_MAX_PRED_MODE_PER_CTU 85 * 2 * 8<br>+#define MAX_NUM_CU_GEOMS 85<br>+#define X265_MAX_PRED_MODE_PER_CTU MAX_NUM_CU_GEOMS * 2 * 8<br> <br> #define MAX_NUM_TR_COEFFS MAX_TR_SIZE * MAX_TR_SIZE // Maximum number of transform coefficients, for a 32x32 transform<br> #define MAX_NUM_TR_CATEGORIES 16 // 32, 16, 8, 4 transform categories each for luma and chroma<br>diff --git a/source/common/cudata.h b/source/common/cudata.h<br>index 8397f0568..c7d9a1972 100644<br>--- a/source/common/cudata.h<br>+++ b/source/common/cudata.h<br>@@ -371,7 +371,7 @@ struct CUDataMemPool<br> CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL) * numInstances);<br> }<br> else<br>- { <br>+ {<br> uint32_t sizeC = sizeL >> (CHROMA_H_SHIFT(csp) + CHROMA_V_SHIFT(csp));<br> CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL + sizeC * 2) * numInstances);<br> }<br>diff --git a/source/encoder/analysis.cpp b/source/encoder/analysis.cpp<br>index aabf386ca..22a4ba74f 100644<br>--- a/source/encoder/analysis.cpp<br>+++ b/source/encoder/analysis.cpp<br>@@ -220,6 +220,9 @@ Mode& Analysis::compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, con<br> if (m_param->analysisSave && !m_param->analysisLoad)<br> for (int i = 0; i < X265_MAX_PRED_MODE_PER_CTU * numPredDir; i++)<br> m_reuseRef[i] = -1;<br>+<br>+ if (m_param->rc.cuTree)<br>+ m_reuseQP = &m_reuseInterDataCTU->cuQPOff[ctu.m_cuAddr * MAX_NUM_CU_GEOMS];<br> }<br> ProfileCUScope(ctu, totalCTUTime, totalCTUs);<br> <br>@@ -233,6 +236,8 @@ Mode& Analysis::compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, con<br> memcpy(ctu.m_partSize, &intraDataCTU->partSizes[ctu.m_cuAddr * numPartition], sizeof(char) * numPartition);<br> memcpy(ctu.m_chromaIntraDir, &intraDataCTU->chromaModes[ctu.m_cuAddr * numPartition], sizeof(uint8_t) * numPartition);<br> }<br>+ if (m_param->rc.cuTree && reuseLevel > 1 && reuseLevel < 10)<br>+ m_reuseQP = &intraDataCTU->cuQPOff[ctu.m_cuAddr * MAX_NUM_CU_GEOMS];<br> compressIntraCU(ctu, cuGeom, qp);<br> }<br> else<br>@@ -520,6 +525,9 @@ uint64_t Analysis::compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom<br> bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);<br> bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);<br> <br>+ if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 && m_param->analysisSaveReuseLevel < 10)<br>+ m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp;<br></div></blockquote><div></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div dir="ltr">+<br> bool bAlreadyDecided = m_param->intraRefine != 4 && parentCTU.m_lumaIntraDir[cuGeom.absPartIdx] != (uint8_t)ALL_IDX && !(m_param->bAnalysisType == HEVC_INFO);<br> bool bDecidedDepth = m_param->intraRefine != 4 && parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth;<br> int split = 0;<br>@@ -870,6 +878,9 @@ uint32_t Analysis::compressInterCU_dist(const CUData& parentCTU, const CUGeom& c<br> uint32_t minDepth = m_param->rdLevel <= 4 ? topSkipMinDepth(parentCTU, cuGeom) : 0;<br> uint32_t splitRefs[4] = { 0, 0, 0, 0 };<br> <br>+ if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 && m_param->analysisSaveReuseLevel < 10)<br>+ m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp;<br>+<br> X265_CHECK(m_param->rdLevel >= 2, "compressInterCU_dist does not support RD 0 or 1\n");<br> <br> PMODE pmode(*this, cuGeom);<br>@@ -1152,6 +1163,8 @@ SplitData Analysis::compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom&<br> uint32_t cuAddr = parentCTU.m_cuAddr;<br> ModeDepth& md = m_modeDepth[depth];<br> <br>+ if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 && m_param->analysisSaveReuseLevel < 10)<br>+ m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp;<br> <br> if (m_param->searchMethod == X265_SEA)<br> {<br>@@ -1856,6 +1869,9 @@ SplitData Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom&<br> ModeDepth& md = m_modeDepth[depth];<br> md.bestMode = NULL;<br> <br>+ if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 && m_param->analysisSaveReuseLevel < 10)<br>+ m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp;<br>+<br> if (m_param->searchMethod == X265_SEA)<br> {<br> int numPredDir = m_slice->isInterP() ? 1 : 2;<br>@@ -3647,11 +3663,20 @@ int Analysis::calculateQpforCuSize(const CUData& ctu, const CUGeom& cuGeom, int3<br> <br> if (m_param->analysisLoadReuseLevel >= 2 && m_param->rc.cuTree)<br> {<br>- int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) + cuGeom.absPartIdx;<br>+ int cuIdx;<br>+ int8_t cuQPOffSet = 0;<br>+<br>+ if (m_param->scaleFactor == 2 || m_param->analysisLoadReuseLevel == 10)<br>+ cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) + cuGeom.absPartIdx;<br>+ else<br>+ cuIdx = (ctu.m_cuAddr * MAX_NUM_CU_GEOMS) + cuGeom.geomRecurId;<br>+<br> if (ctu.m_slice->m_sliceType == I_SLICE)<br>- return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, (int32_t)(qp + 0.5 + ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx]));<br>+ cuQPOffSet = ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx];<br> else<br>- return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, (int32_t)(qp + 0.5 + ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx]));<br>+ cuQPOffSet = ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx];<br>+<br>+ return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, (int32_t)(qp + 0.5 + cuQPOffSet));<br> }<br> if (m_param->rc.hevcAq)<br> {<br>diff --git a/source/encoder/analysis.h b/source/encoder/analysis.h<br>index 3bcb56bc3..8d76d5c5e 100644<br>--- a/source/encoder/analysis.h<br>+++ b/source/encoder/analysis.h<br>@@ -126,6 +126,7 @@ protected:<br> int32_t* m_reuseRef;<br> uint8_t* m_reuseDepth;<br> uint8_t* m_reuseModes;<br>+ int8_t * m_reuseQP; // array of QP values for analysis reuse at reuse levels > 1 and < 10 when cutree is enabled<br> uint8_t* m_reusePartSize;<br> uint8_t* m_reuseMergeFlag;<br> x265_analysis_MV* m_reuseMv[2];<br>diff --git a/source/encoder/api.cpp b/source/encoder/api.cpp<br>index a986355e0..2c90fe8f2 100644<br>--- a/source/encoder/api.cpp<br>+++ b/source/encoder/api.cpp<br>@@ -825,7 +825,16 @@ void x265_alloc_analysis_data(x265_param *param, x265_analysis_data* analysis)<br> CHECKED_MALLOC_ZERO(intraData->partSizes, char, analysis->numPartitions * analysis->numCUsInFrame);<br> CHECKED_MALLOC_ZERO(intraData->chromaModes, uint8_t, analysis->numPartitions * analysis->numCUsInFrame);<br> if (param->rc.cuTree)<br>- CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t, analysis->numPartitions * analysis->numCUsInFrame);<br>+ {<br>+ if (maxReuseLevel == 10)<br>+ {<br>+ CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t, analysis->numPartitions * analysis->numCUsInFrame);<br>+ }<br>+ else<br>+ {<br>+ CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t, MAX_NUM_CU_GEOMS * analysis->numCUsInFrame);<br>+ }<br>+ }<br> }<br> analysis->intraData = intraData;<br> <br>@@ -837,7 +846,16 @@ void x265_alloc_analysis_data(x265_param *param, x265_analysis_data* analysis)<br> CHECKED_MALLOC_ZERO(interData->modes, uint8_t, analysis->numPartitions * analysis->numCUsInFrame);<br> <br> if (param->rc.cuTree && !isMultiPassOpt)<br>- CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t, analysis->numPartitions * analysis->numCUsInFrame);<br>+ {<br>+ if (maxReuseLevel == 10)<br>+ {<br>+ CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t, analysis->numPartitions * analysis->numCUsInFrame);<br>+ }<br>+ else<br>+ {<br>+ CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t, MAX_NUM_CU_GEOMS * analysis->numCUsInFrame);<br></div></blockquote><div>
[AM] Can't we share lowres cutree stats generated at qg size granularity? Why MAX_NUM_CU_GEOMS combinations?</div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div dir="ltr">+ }<br>+ }<br> CHECKED_MALLOC_ZERO(interData->mvpIdx[0], uint8_t, analysis->numPartitions * analysis->numCUsInFrame);<br> CHECKED_MALLOC_ZERO(interData->mvpIdx[1], uint8_t, analysis->numPartitions * analysis->numCUsInFrame);<br> CHECKED_MALLOC_ZERO(interData->mv[0], x265_analysis_MV, analysis->numPartitions * analysis->numCUsInFrame);<br>@@ -919,7 +937,9 @@ void x265_free_analysis_data(x265_param *param, x265_analysis_data* analysis)<br> X265_FREE((analysis->intraData)->partSizes);<br> X265_FREE((analysis->intraData)->chromaModes);<br> if (param->rc.cuTree)<br>- X265_FREE((analysis->intraData)->cuQPOff);<br>+ {<br>+ X265_FREE_ZERO((analysis->intraData)->cuQPOff);<br>+ }<br> }<br> X265_FREE(analysis->intraData);<br> analysis->intraData = NULL;<br>@@ -931,7 +951,9 @@ void x265_free_analysis_data(x265_param *param, x265_analysis_data* analysis)<br> X265_FREE((analysis->interData)->depth);<br> X265_FREE((analysis->interData)->modes);<br> if (!isMultiPassOpt && param->rc.cuTree)<br>+ {<br> X265_FREE((analysis->interData)->cuQPOff);<br>+ }<br> X265_FREE((analysis->interData)->mvpIdx[0]);<br> X265_FREE((analysis->interData)->mvpIdx[1]);<br> X265_FREE((analysis->interData)->mv[0]);<br>diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp<br>index 1f710e1ce..5eb123d31 100644<br>--- a/source/encoder/encoder.cpp<br>+++ b/source/encoder/encoder.cpp<br>@@ -4444,6 +4444,26 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x<br> }<br> }<br> }<br>+<br>+ int8_t *cuQPBuf = NULL, *cuQPOffSets = NULL;<br>+ uint32_t reuseBufSize = 0;<br>+<br>+ if (m_param->rc.cuTree)<br>+ {<br>+ if (m_param->analysisLoadReuseLevel == 10)<br>+ reuseBufSize = depthBytes;<br>+ else if (m_param->analysisLoadReuseLevel > 1)<br>+ reuseBufSize = MAX_NUM_CU_GEOMS * analysis->numCUsInFrame;<br>+ cuQPBuf = X265_MALLOC(int8_t, reuseBufSize);<br>+ if (!m_param->bUseAnalysisFile)<br>+ {<br>+ if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I)<br>+ cuQPOffSets = intraPic->cuQPOff;<br>+ else<br>+ cuQPOffSets = interPic->cuQPOff;<br>+ }<br>+ }<br>+<br> if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I)<br> {<br> if (m_param->bAnalysisType == HEVC_INFO)<br>@@ -4452,19 +4472,21 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x<br> return;<br> <br> uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL, *partSizes = NULL;<br>- int8_t *cuQPBuf = NULL;<br> <br> tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);<br> depthBuf = tempBuf;<br> modeBuf = tempBuf + depthBytes;<br> partSizes = tempBuf + 2 * depthBytes;<br>- if (m_param->rc.cuTree)<br>- cuQPBuf = X265_MALLOC(int8_t, depthBytes);<br> <br> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, intraPic->depth);<br> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, intraPic->chromaModes);<br> X265_FREAD(partSizes, sizeof(uint8_t), depthBytes, m_analysisFileIn, intraPic->partSizes);<br>- if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes, m_analysisFileIn, intraPic->cuQPOff); }<br>+ if (m_param->rc.cuTree)<br>+ {<br>+ X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize, m_analysisFileIn, cuQPOffSets);<br>+ if (m_param->analysisLoadReuseLevel > 1 && m_param->analysisLoadReuseLevel < 10)<br>+ memcpy(analysis->intraData->cuQPOff, cuQPBuf, sizeof(int8_t) * reuseBufSize);<br>+ }<br> <br> size_t count = 0;<br> for (uint32_t d = 0; d < depthBytes; d++)<br>@@ -4480,7 +4502,7 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x<br> memset(&(analysis->intraData)->depth[count], depthBuf[d], bytes);<br> memset(&(analysis->intraData)->chromaModes[count], modeBuf[d], bytes);<br> memset(&(analysis->intraData)->partSizes[count], partSizes[d], bytes);<br>- if (m_param->rc.cuTree)<br>+ if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel == 10)<br> memset(&(analysis->intraData)->cuQPOff[count], cuQPBuf[d], bytes);<br> count += bytes;<br> }<br>@@ -4515,7 +4537,6 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x<br> uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];<br> MV* mv[2];<br> int8_t* refIdx[2];<br>- int8_t* cuQPBuf = NULL;<br> <br> int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2;<br> bool bIntraInInter = false;<br>@@ -4535,12 +4556,15 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x<br> tempBuf = X265_MALLOC(uint8_t, depthBytes * numBuf);<br> depthBuf = tempBuf;<br> modeBuf = tempBuf + depthBytes;<br>- if (m_param->rc.cuTree)<br>- cuQPBuf = X265_MALLOC(int8_t, depthBytes);<br> <br> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, interPic->depth);<br> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, interPic->modes);<br>- if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes, m_analysisFileIn, interPic->cuQPOff); }<br>+ if (m_param->rc.cuTree)<br>+ {<br>+ X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize, m_analysisFileIn, cuQPOffSets);<br>+ if (m_param->analysisLoadReuseLevel > 1 && m_param->analysisLoadReuseLevel < 10)<br>+ memcpy(analysis->interData->cuQPOff, cuQPBuf, sizeof(int8_t) * reuseBufSize);<br>+ }<br> <br> if (m_param->analysisLoadReuseLevel > 4)<br> {<br>@@ -4578,7 +4602,7 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x<br> depthBuf[d] = 1;<br> memset(&(analysis->interData)->depth[count], depthBuf[d], bytes);<br> memset(&(analysis->interData)->modes[count], modeBuf[d], bytes);<br>- if (m_param->rc.cuTree)<br>+ if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel == 10)<br> memset(&(analysis->interData)->cuQPOff[count], cuQPBuf[d], bytes);<br> if (m_param->analysisLoadReuseLevel > 4)<br> {<br>@@ -4736,7 +4760,7 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x<br> int numPartitions = analysis->numPartitions;<br> int numCUsInFrame = analysis->numCUsInFrame;<br> int numCuInHeight = analysis->numCuInHeight;<br>- /* Allocate memory for scaled resoultion's numPartitions and numCUsInFrame*/<br>+ /* Allocate memory for scaled resolution's numPartitions and numCUsInFrame */<br> analysis->numPartitions = m_param->num4x4Partitions;<br> analysis->numCUsInFrame = cuLoc.heightInCU * cuLoc.widthInCU;<br> analysis->numCuInHeight = cuLoc.heightInCU;<br>@@ -4808,25 +4832,40 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x<br> X265_FREE(vbvCostBuf);<br> }<br> <br>+ uint32_t reuseBufSize = 0;<br>+ int8_t *cuQPOffSets = NULL, *cuQPBuf = NULL;<br>+ if (m_param->rc.cuTree)<br>+ {<br>+ if (m_param->analysisLoadReuseLevel == 10)<br>+ reuseBufSize = depthBytes;<br>+ else if (m_param->analysisLoadReuseLevel > 1)<br>+ reuseBufSize = (MAX_NUM_CU_GEOMS / factor) * (analysis->numCUsInFrame);<br>+ cuQPBuf = X265_MALLOC(int8_t, reuseBufSize);<br>+ if (!m_param->bUseAnalysisFile)<br>+ {<br>+ if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I)<br>+ cuQPOffSets = intraPic->cuQPOff;<br>+ else<br>+ cuQPOffSets = interPic->cuQPOff;<br>+ }<br>+ }<br>+<br> if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I)<br> {<br> if (m_param->analysisLoadReuseLevel < 2)<br> return;<br> <br> uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL, *partSizes = NULL;<br>- int8_t *cuQPBuf = NULL;<br> <br> tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);<br> depthBuf = tempBuf;<br> modeBuf = tempBuf + depthBytes;<br> partSizes = tempBuf + 2 * depthBytes;<br>- if (m_param->rc.cuTree)<br>- cuQPBuf = X265_MALLOC(int8_t, depthBytes);<br> <br> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, intraPic->depth);<br> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, intraPic->chromaModes);<br> X265_FREAD(partSizes, sizeof(uint8_t), depthBytes, m_analysisFileIn, intraPic->partSizes);<br>- if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes, m_analysisFileIn, intraPic->cuQPOff); }<br>+ if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize, m_analysisFileIn, cuQPOffSets); }<br> <br> uint32_t count = 0;<br> for (uint32_t d = 0; d < depthBytes; d++)<br>@@ -4848,7 +4887,7 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x<br> memset(&(analysis->intraData)->depth[count], depthBuf[d], bytes);<br> memset(&(analysis->intraData)->chromaModes[count], modeBuf[d], bytes);<br> memset(&(analysis->intraData)->partSizes[count], partSizes[d], bytes);<br>- if (m_param->rc.cuTree)<br>+ if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel == 10)<br> memset(&(analysis->intraData)->cuQPOff[count], cuQPBuf[d], bytes);<br> count += bytes;<br> d += getCUIndex(&cuLoc, &count, bytes, 1);<br>@@ -4886,7 +4925,6 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x<br> uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];<br> MV* mv[2];<br> int8_t* refIdx[2];<br>- int8_t* cuQPBuf = NULL;<br> <br> int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2;<br> bool bIntraInInter = false;<br>@@ -4900,12 +4938,16 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x<br> tempBuf = X265_MALLOC(uint8_t, depthBytes * numBuf);<br> depthBuf = tempBuf;<br> modeBuf = tempBuf + depthBytes;<br>- if (m_param->rc.cuTree)<br>- cuQPBuf = X265_MALLOC(int8_t, depthBytes);<br> <br> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, interPic->depth);<br> X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, interPic->modes);<br>- if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes, m_analysisFileIn, interPic->cuQPOff); }<br>+ if (m_param->rc.cuTree)<br>+ {<br>+ X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize, m_analysisFileIn, cuQPOffSets);<br>+ if (m_param->analysisLoadReuseLevel > 1 && m_param->analysisLoadReuseLevel < 10)<br>+ memcpy(&(analysis->interData)->cuQPOff, cuQPBuf, sizeof(int8_t) * reuseBufSize);<br>+ }<br>+<br> if (m_param->analysisLoadReuseLevel > 4)<br> {<br> partSize = modeBuf + depthBytes;<br>@@ -4954,7 +4996,7 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x<br> {<br> memset(&(analysis->interData)->depth[count], writeDepth, bytes);<br> memset(&(analysis->interData)->modes[count], modeBuf[d], bytes);<br>- if (m_param->rc.cuTree)<br>+ if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel == 10)<br> memset(&(analysis->interData)->cuQPOff[count], cuQPBuf[d], bytes);<br> if (m_param->analysisLoadReuseLevel == 10 && bIntraInInter)<br> memset(&(analysis->intraData)->chromaModes[count], chromaDir[d], bytes);<br>@@ -5046,7 +5088,9 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x<br> }<br> }<br> else<br>+ {<br> X265_FREAD((analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir, m_analysisFileIn, interPic->ref);<br>+ }<br> <br> consumedBytes += frameRecordSize;<br> if (numDir == 1)<br>@@ -5510,9 +5554,10 @@ void Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD<br> analysis->frameRecordSize += analysis->numCUsInFrame * sizeof(sse_t);<br> }<br> <br>+ uint32_t reuseQPBufsize = 0;<br> if (m_param->analysisSaveReuseLevel > 1)<br> {<br>-<br>+ reuseQPBufsize = MAX_NUM_CU_GEOMS * analysis->numCUsInFrame;<br> if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I)<br> {<br> for (uint32_t cuAddr = 0; cuAddr < analysis->numCUsInFrame; cuAddr++)<br>@@ -5536,12 +5581,21 @@ void Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD<br> partSize = ctu->m_partSize[absPartIdx];<br> intraDataCTU->partSizes[depthBytes] = partSize;<br> <br>- if (m_param->rc.cuTree)<br>+ if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel == 10)<br> intraDataCTU->cuQPOff[depthBytes] = (int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP);<br> absPartIdx += ctu->m_numPartitions >> (depth * 2);<br> }<br>+<br>+ if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel < 10)<br>+ {<br>+ uint32_t nextCuIdx = (cuAddr + 1) * MAX_NUM_CU_GEOMS;<br>+ for (uint32_t i = cuAddr * MAX_NUM_CU_GEOMS; i < nextCuIdx; i++)<br>+ intraDataCTU->cuQPOff[i] = (int8_t)(intraDataCTU->cuQPOff[i] - baseQP);<br>+ }<br> memcpy(&intraDataCTU->modes[ctu->m_cuAddr * ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)* ctu->m_numPartitions);<br> }<br>+ if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel == 10)<br>+ reuseQPBufsize = depthBytes;<br> }<br> else<br> {<br>@@ -5567,7 +5621,7 @@ void Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD<br> predMode = 4; // used as indicator if the block is coded as bidir<br> <br> interDataCTU->modes[depthBytes] = predMode;<br>- if (m_param->rc.cuTree)<br>+ if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel == 10)<br> interDataCTU->cuQPOff[depthBytes] = (int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP);<br> <br> if (m_param->analysisSaveReuseLevel > 4)<br>@@ -5599,13 +5653,23 @@ void Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD<br> }<br> absPartIdx += ctu->m_numPartitions >> (depth * 2);<br> }<br>+<br>+ if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel < 10)<br>+ {<br>+ uint32_t nextCuIdx = (cuAddr + 1) * MAX_NUM_CU_GEOMS;<br>+ for (uint32_t i = cuAddr * MAX_NUM_CU_GEOMS; i < nextCuIdx ; i++)<br>+ interDataCTU->cuQPOff[i] = (int8_t)(interDataCTU->cuQPOff[i] - baseQP);<br>+ }<br>+<br> if (m_param->analysisSaveReuseLevel == 10 && bIntraInInter)<br> memcpy(&intraDataCTU->modes[ctu->m_cuAddr * ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)* ctu->m_numPartitions);<br> }<br>+ if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel == 10)<br>+ reuseQPBufsize = depthBytes;<br> }<br> <br> if ((analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I) && m_param->rc.cuTree)<br>- analysis->frameRecordSize += sizeof(uint8_t)* analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 + (sizeof(int8_t) * depthBytes);<br>+ analysis->frameRecordSize += sizeof(uint8_t)* analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 + (sizeof(int8_t) * reuseQPBufsize);<br> else if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I)<br> analysis->frameRecordSize += sizeof(uint8_t)* analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3;<br> else<br>@@ -5613,7 +5677,8 @@ void Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD<br> /* Add sizeof depth, modes, partSize, cuQPOffset, mergeFlag */<br> analysis->frameRecordSize += depthBytes * 2;<br> if (m_param->rc.cuTree)<br>- analysis->frameRecordSize += (sizeof(int8_t) * depthBytes);<br>+ analysis->frameRecordSize += (sizeof(int8_t) * reuseQPBufsize);<br>+<br> if (m_param->analysisSaveReuseLevel > 4)<br> analysis->frameRecordSize += (depthBytes * 2);<br> <br>@@ -5669,7 +5734,7 @@ void Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD<br> X265_FWRITE((analysis->intraData)->chromaModes, sizeof(uint8_t), depthBytes, m_analysisFileOut);<br> X265_FWRITE((analysis->intraData)->partSizes, sizeof(char), depthBytes, m_analysisFileOut);<br> if (m_param->rc.cuTree)<br>- X265_FWRITE((analysis->intraData)->cuQPOff, sizeof(int8_t), depthBytes, m_analysisFileOut);<br>+ X265_FWRITE((analysis->intraData)->cuQPOff, sizeof(int8_t), reuseQPBufsize, m_analysisFileOut);<br> X265_FWRITE((analysis->intraData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileOut);<br> }<br> else<br>@@ -5677,7 +5742,7 @@ void Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD<br> X265_FWRITE((analysis->interData)->depth, sizeof(uint8_t), depthBytes, m_analysisFileOut);<br> X265_FWRITE((analysis->interData)->modes, sizeof(uint8_t), depthBytes, m_analysisFileOut);<br> if (m_param->rc.cuTree)<br>- X265_FWRITE((analysis->interData)->cuQPOff, sizeof(int8_t), depthBytes, m_analysisFileOut);<br>+ X265_FWRITE((analysis->interData)->cuQPOff, sizeof(int8_t), reuseQPBufsize, m_analysisFileOut);<br> if (m_param->analysisSaveReuseLevel > 4)<br> {<br> X265_FWRITE((analysis->interData)->partSize, sizeof(uint8_t), depthBytes, m_analysisFileOut);<br>@@ -5762,7 +5827,7 @@ void Encoder::writeAnalysisFileRefine(x265_analysis_data* analysis, FrameData &c<br> interData->mv[1][depthBytes].word = ctu->m_mv[1][absPartIdx].word;<br> interData->mvpIdx[1][depthBytes] = ctu->m_mvpIdx[1][absPartIdx];<br> ref[1][depthBytes] = ctu->m_refIdx[1][absPartIdx];<br>- predMode = 4; // used as indiacator if the block is coded as bidir<br>+ predMode = 4; // used as indicator if the block is coded as bidir<br> }<br> interData->modes[depthBytes] = predMode;<br> <br>diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp<br>index 0adb0d0db..3bc01268b 100644<br>--- a/source/encoder/slicetype.cpp<br>+++ b/source/encoder/slicetype.cpp<br>@@ -1894,7 +1894,7 @@ void Lookahead::slicetypeAnalyse(Lowres **frames, bool bKeyframe)<br> <br> if (!framecnt)<br> {<br>- if (m_param->rc.cuTree)<br>+ if (m_param->rc.cuTree && !m_param->analysisLoad)<br></div></blockquote><div>[AM] Won't this implicitly turn OFF cutree at reuse-level 1? </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div dir="ltr"> cuTree(frames, 0, bKeyframe);<br> return;<br> }<br>diff --git a/source/x265.h b/source/x265.h<br>index f44040ba7..8d7a75826 100644<br>--- a/source/x265.h<br>+++ b/source/x265.h<br>@@ -144,7 +144,7 @@ typedef struct x265_analysis_intra_data<br> uint8_t* modes;<br> char* partSizes;<br> uint8_t* chromaModes;<br>- int8_t* cuQPOff;<br>+ int8_t* cuQPOff;<br> }x265_analysis_intra_data;<br> <br> typedef struct x265_analysis_MV<br>@@ -167,7 +167,7 @@ typedef struct x265_analysis_inter_data<br> uint8_t* interDir;<br> uint8_t* mvpIdx[2];<br> int8_t* refIdx[2];<br>- x265_analysis_MV* mv[2];<br>+ x265_analysis_MV* mv[2];<br> int64_t* sadCost;<br> int8_t* cuQPOff;<br> }x265_analysis_inter_data;<br>-- <br>2.20.1.windows.1<br><br><div><br></div>-- <br><div dir="ltr"><div dir="ltr"><b style="background-color:rgb(255,255,255)"><font color="#0b5394">With Regards,</font></b><div><b style="background-color:rgb(255,255,255)"><font color="#0b5394">Srikanth Kurapati.</font></b></div></div></div></div>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org" target="_blank">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div><br clear="all"><div><br></div>-- <br><div dir="ltr" class="gmail_signature"><div dir="ltr"><div><div dir="ltr"><div><div dir="ltr"><div><div dir="ltr"><font face="georgia, serif">Regards,</font><div><b><font face="georgia, serif">Aruna Matheswaran,</font></b></div><div><font face="georgia, serif">Video Codec Engineer,</font></div><div><font face="georgia, serif">Media & AI analytics BU,</font></div><div><span><span style="font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;vertical-align:baseline;white-space:pre-wrap"><span style="border:none;display:inline-block;overflow:hidden;width:153px;height:58px"><img src="https://lh5.googleusercontent.com/gjX5cPNIZgwUrhfqkTwQUZWztIKmmo0qs3kbwvkS5H-bDVE2ftte9pMTVnFLSjOcjYWLtfc6_OGpxW4vraLg2r5QAIf1Q3MpldFDgWtzK_gXi8ptw5B3joIbsGL6mxj-JRdjHzT5" width="96" height="36" style="margin-left: 0px; margin-top: 0px;"></span></span></span><font face="georgia, serif"><br></font></div><div><span><span style="font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;vertical-align:baseline;white-space:pre-wrap"><span style="border:none;display:inline-block;overflow:hidden;width:153px;height:58px"><img src="https://lh5.googleusercontent.com/gjX5cPNIZgwUrhfqkTwQUZWztIKmmo0qs3kbwvkS5H-bDVE2ftte9pMTVnFLSjOcjYWLtfc6_OGpxW4vraLg2r5QAIf1Q3MpldFDgWtzK_gXi8ptw5B3joIbsGL6mxj-JRdjHzT5" style="margin-left: 0px; margin-top: 0px;"></span></span></span><font face="georgia, serif"><br></font></div><div><font face="georgia, serif"><br></font></div></div></div></div></div></div></div></div></div></div>