<div dir="ltr"><div dir="ltr"><br></div><div dir="ltr">[AM] Can't we share lowres cutree stats generated at qg size granularity? Why MAX_NUM_CU_GEOMS combinations? </div><div><br></div><div>[KS] If we share like that then we will have to calculate the dqp per cu at analysis phase just like save encode and we will not get the savings in cpu cycles there. Currently we are storing the final dqp derived from lowres mv costs at qg size granularity by taking the difference between the final qp and base qp per slice. </div><div><br></div><div>MAX_NUM_CU_GEOMS is 85 = ( 1 + 4 + 16 + 64 ) this is maximum number of partitions at which qp can be computed and used in a ctu.</div><div dir="ltr"><br></div><div dir="ltr">[AM] Won't this implicitly turn OFF cutree at reuse-level 1?   <br></div><div dir="ltr"><br></div><div>[KS]  Agreed and addressed.</div><div dir="ltr"> <br></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Tue, Jan 19, 2021 at 11:12 PM Aruna Matheswaran <<a href="mailto:aruna@multicorewareinc.com">aruna@multicorewareinc.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div dir="ltr"><div dir="ltr"><br></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Mon, Jan 11, 2021 at 8:08 PM Srikanth Kurapati <<a href="mailto:srikanth.kurapati@multicorewareinc.com" target="_blank">srikanth.kurapati@multicorewareinc.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div dir="ltr">From d516d0564888e154d88d89320302725d87bfab78 Mon Sep 17 00:00:00 2001<br>From: Srikanth Kurapati <<a href="mailto:srikanth.kurapati@multicorewareinc.com" target="_blank">srikanth.kurapati@multicorewareinc.com</a>><br>Date: Wed, 30 Dec 2020 17:00:08 +0530<br>Subject: [PATCH] fix: corrects output mismatch for cutree enabled analysis<br> save/load enodes with reuse-levels in between 1 to 10 for similar encoder<br> settings.<br><br>---<br> source/abrEncApp.cpp         |  14 +++-<br> source/common/common.h       |   3 +-<br> source/common/cudata.h       |   2 +-<br> source/encoder/analysis.cpp  |  31 ++++++++-<br> source/encoder/analysis.h    |   1 +<br> source/encoder/api.cpp       |  28 +++++++-<br> source/encoder/encoder.cpp   | 123 ++++++++++++++++++++++++++---------<br> source/encoder/slicetype.cpp |   2 +-<br> source/x265.h                |   4 +-<br> 9 files changed, 166 insertions(+), 42 deletions(-)<br><br>diff --git a/source/abrEncApp.cpp b/source/abrEncApp.cpp<br>index fa62ebf63..ea255e3f6 100644<br>--- a/source/abrEncApp.cpp<br>+++ b/source/abrEncApp.cpp<br>@@ -340,7 +340,12 @@ namespace X265_NS {<br>             memcpy(intraDst->partSizes, intraSrc->partSizes, sizeof(char) * src->depthBytes);<br>             memcpy(intraDst->chromaModes, intraSrc->chromaModes, sizeof(uint8_t) * src->depthBytes);<br>             if (m_param->rc.cuTree)<br>-                memcpy(intraDst->cuQPOff, intraSrc->cuQPOff, sizeof(int8_t) * src->depthBytes);<br>+            {<br>+                if (m_param->analysisSaveReuseLevel == 10)<br>+                    memcpy(intraDst->cuQPOff, intraSrc->cuQPOff, sizeof(int8_t) * src->depthBytes);<br>+                else<br>+                    memcpy(intraDst->cuQPOff, intraSrc->cuQPOff, sizeof(int8_t) * (src->numCUsInFrame * MAX_NUM_CU_GEOMS));<br>+            }<br>         }<br>         else<br>         {<br>@@ -355,7 +360,12 @@ namespace X265_NS {<br>             memcpy(interDst->depth, interSrc->depth, sizeof(uint8_t) * src->depthBytes);<br>             memcpy(interDst->modes, interSrc->modes, sizeof(uint8_t) * src->depthBytes);<br>             if (m_param->rc.cuTree)<br>-                memcpy(interDst->cuQPOff, interSrc->cuQPOff, sizeof(int8_t) * src->depthBytes);<br>+            {<br>+                if (m_param->analysisReuseLevel == 10)<br>+                    memcpy(interDst->cuQPOff, interSrc->cuQPOff, sizeof(int8_t) * src->depthBytes);<br>+                else<br>+                    memcpy(interDst->cuQPOff, interSrc->cuQPOff, sizeof(int8_t) * (src->numCUsInFrame * MAX_NUM_CU_GEOMS));<br>+            }<br>             if (m_param->analysisSaveReuseLevel > 4)<br>             {<br>                 memcpy(interDst->partSize, interSrc->partSize, sizeof(uint8_t) * src->depthBytes);<br>diff --git a/source/common/common.h b/source/common/common.h<br>index 8c06cd79e..0ffbf17eb 100644<br>--- a/source/common/common.h<br>+++ b/source/common/common.h<br>@@ -326,7 +326,8 @@ typedef int16_t  coeff_t;      // transform coefficient<br> <br> #define CHROMA_H_SHIFT(x) (x == X265_CSP_I420 || x == X265_CSP_I422)<br> #define CHROMA_V_SHIFT(x) (x == X265_CSP_I420)<br>-#define X265_MAX_PRED_MODE_PER_CTU 85 * 2 * 8<br>+#define MAX_NUM_CU_GEOMS 85<br>+#define X265_MAX_PRED_MODE_PER_CTU MAX_NUM_CU_GEOMS * 2 * 8<br> <br> #define MAX_NUM_TR_COEFFS           MAX_TR_SIZE * MAX_TR_SIZE // Maximum number of transform coefficients, for a 32x32 transform<br> #define MAX_NUM_TR_CATEGORIES       16                        // 32, 16, 8, 4 transform categories each for luma and chroma<br>diff --git a/source/common/cudata.h b/source/common/cudata.h<br>index 8397f0568..c7d9a1972 100644<br>--- a/source/common/cudata.h<br>+++ b/source/common/cudata.h<br>@@ -371,7 +371,7 @@ struct CUDataMemPool<br>             CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL) * numInstances);<br>         }<br>         else<br>-        {            <br>+        {<br>             uint32_t sizeC = sizeL >> (CHROMA_H_SHIFT(csp) + CHROMA_V_SHIFT(csp));<br>             CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL + sizeC * 2) * numInstances);<br>         }<br>diff --git a/source/encoder/analysis.cpp b/source/encoder/analysis.cpp<br>index aabf386ca..22a4ba74f 100644<br>--- a/source/encoder/analysis.cpp<br>+++ b/source/encoder/analysis.cpp<br>@@ -220,6 +220,9 @@ Mode& Analysis::compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, con<br>         if (m_param->analysisSave && !m_param->analysisLoad)<br>             for (int i = 0; i < X265_MAX_PRED_MODE_PER_CTU * numPredDir; i++)<br>                 m_reuseRef[i] = -1;<br>+<br>+        if (m_param->rc.cuTree)<br>+            m_reuseQP = &m_reuseInterDataCTU->cuQPOff[ctu.m_cuAddr * MAX_NUM_CU_GEOMS];<br>     }<br>     ProfileCUScope(ctu, totalCTUTime, totalCTUs);<br> <br>@@ -233,6 +236,8 @@ Mode& Analysis::compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, con<br>             memcpy(ctu.m_partSize, &intraDataCTU->partSizes[ctu.m_cuAddr * numPartition], sizeof(char) * numPartition);<br>             memcpy(ctu.m_chromaIntraDir, &intraDataCTU->chromaModes[ctu.m_cuAddr * numPartition], sizeof(uint8_t) * numPartition);<br>         }<br>+        if (m_param->rc.cuTree && reuseLevel > 1 && reuseLevel < 10)<br>+            m_reuseQP = &intraDataCTU->cuQPOff[ctu.m_cuAddr * MAX_NUM_CU_GEOMS];<br>         compressIntraCU(ctu, cuGeom, qp);<br>     }<br>     else<br>@@ -520,6 +525,9 @@ uint64_t Analysis::compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom<br>     bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);<br>     bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);<br> <br>+    if (m_param->rc.cuTree  && m_param->analysisSaveReuseLevel > 1 && m_param->analysisSaveReuseLevel < 10)<br>+        m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp;<br></div></blockquote><div></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div dir="ltr">+<br>     bool bAlreadyDecided = m_param->intraRefine != 4 && parentCTU.m_lumaIntraDir[cuGeom.absPartIdx] != (uint8_t)ALL_IDX && !(m_param->bAnalysisType == HEVC_INFO);<br>     bool bDecidedDepth = m_param->intraRefine != 4 && parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth;<br>     int split = 0;<br>@@ -870,6 +878,9 @@ uint32_t Analysis::compressInterCU_dist(const CUData& parentCTU, const CUGeom& c<br>     uint32_t minDepth = m_param->rdLevel <= 4 ? topSkipMinDepth(parentCTU, cuGeom) : 0;<br>     uint32_t splitRefs[4] = { 0, 0, 0, 0 };<br> <br>+    if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 && m_param->analysisSaveReuseLevel < 10)<br>+        m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp;<br>+<br>     X265_CHECK(m_param->rdLevel >= 2, "compressInterCU_dist does not support RD 0 or 1\n");<br> <br>     PMODE pmode(*this, cuGeom);<br>@@ -1152,6 +1163,8 @@ SplitData Analysis::compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom&<br>     uint32_t cuAddr = parentCTU.m_cuAddr;<br>     ModeDepth& md = m_modeDepth[depth];<br> <br>+    if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 && m_param->analysisSaveReuseLevel < 10)<br>+        m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp;<br> <br>     if (m_param->searchMethod == X265_SEA)<br>     {<br>@@ -1856,6 +1869,9 @@ SplitData Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom&<br>     ModeDepth& md = m_modeDepth[depth];<br>     md.bestMode = NULL;<br> <br>+    if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 && m_param->analysisSaveReuseLevel < 10)<br>+        m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp;<br>+<br>     if (m_param->searchMethod == X265_SEA)<br>     {<br>         int numPredDir = m_slice->isInterP() ? 1 : 2;<br>@@ -3647,11 +3663,20 @@ int Analysis::calculateQpforCuSize(const CUData& ctu, const CUGeom& cuGeom, int3<br> <br>     if (m_param->analysisLoadReuseLevel >= 2 && m_param->rc.cuTree)<br>     {<br>-        int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) + cuGeom.absPartIdx;<br>+        int cuIdx;<br>+        int8_t cuQPOffSet = 0;<br>+<br>+        if (m_param->scaleFactor == 2 || m_param->analysisLoadReuseLevel == 10)<br>+            cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) + cuGeom.absPartIdx;<br>+        else<br>+            cuIdx = (ctu.m_cuAddr * MAX_NUM_CU_GEOMS) + cuGeom.geomRecurId;<br>+<br>         if (ctu.m_slice->m_sliceType == I_SLICE)<br>-            return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, (int32_t)(qp + 0.5 + ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx]));<br>+            cuQPOffSet = ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx];<br>         else<br>-            return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, (int32_t)(qp + 0.5 + ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx]));<br>+            cuQPOffSet = ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx];<br>+<br>+        return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, (int32_t)(qp + 0.5 + cuQPOffSet));<br>     }<br>     if (m_param->rc.hevcAq)<br>     {<br>diff --git a/source/encoder/analysis.h b/source/encoder/analysis.h<br>index 3bcb56bc3..8d76d5c5e 100644<br>--- a/source/encoder/analysis.h<br>+++ b/source/encoder/analysis.h<br>@@ -126,6 +126,7 @@ protected:<br>     int32_t*                   m_reuseRef;<br>     uint8_t*                   m_reuseDepth;<br>     uint8_t*                   m_reuseModes;<br>+    int8_t *                   m_reuseQP; // array of QP values for analysis reuse at reuse levels > 1 and < 10 when cutree is enabled<br>     uint8_t*                   m_reusePartSize;<br>     uint8_t*                   m_reuseMergeFlag;<br>     x265_analysis_MV*          m_reuseMv[2];<br>diff --git a/source/encoder/api.cpp b/source/encoder/api.cpp<br>index a986355e0..2c90fe8f2 100644<br>--- a/source/encoder/api.cpp<br>+++ b/source/encoder/api.cpp<br>@@ -825,7 +825,16 @@ void x265_alloc_analysis_data(x265_param *param, x265_analysis_data* analysis)<br>         CHECKED_MALLOC_ZERO(intraData->partSizes, char, analysis->numPartitions * analysis->numCUsInFrame);<br>         CHECKED_MALLOC_ZERO(intraData->chromaModes, uint8_t, analysis->numPartitions * analysis->numCUsInFrame);<br>         if (param->rc.cuTree)<br>-            CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t, analysis->numPartitions * analysis->numCUsInFrame);<br>+        {<br>+            if (maxReuseLevel == 10)<br>+            {<br>+                CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t, analysis->numPartitions * analysis->numCUsInFrame);<br>+            }<br>+            else<br>+            {<br>+                CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t, MAX_NUM_CU_GEOMS * analysis->numCUsInFrame);<br>+            }<br>+        }<br>     }<br>     analysis->intraData = intraData;<br> <br>@@ -837,7 +846,16 @@ void x265_alloc_analysis_data(x265_param *param, x265_analysis_data* analysis)<br>         CHECKED_MALLOC_ZERO(interData->modes, uint8_t, analysis->numPartitions * analysis->numCUsInFrame);<br> <br>         if (param->rc.cuTree && !isMultiPassOpt)<br>-            CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t, analysis->numPartitions * analysis->numCUsInFrame);<br>+        {<br>+            if (maxReuseLevel == 10)<br>+            {<br>+                CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t, analysis->numPartitions * analysis->numCUsInFrame);<br>+            }<br>+            else<br>+            {<br>+                CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t, MAX_NUM_CU_GEOMS * analysis->numCUsInFrame);<br></div></blockquote><div>

[AM] Can't we share lowres cutree stats generated at qg size granularity? Why MAX_NUM_CU_GEOMS combinations?</div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div dir="ltr">+            }<br>+        }<br>         CHECKED_MALLOC_ZERO(interData->mvpIdx[0], uint8_t, analysis->numPartitions * analysis->numCUsInFrame);<br>         CHECKED_MALLOC_ZERO(interData->mvpIdx[1], uint8_t, analysis->numPartitions * analysis->numCUsInFrame);<br>         CHECKED_MALLOC_ZERO(interData->mv[0], x265_analysis_MV, analysis->numPartitions * analysis->numCUsInFrame);<br>@@ -919,7 +937,9 @@ void x265_free_analysis_data(x265_param *param, x265_analysis_data* analysis)<br>             X265_FREE((analysis->intraData)->partSizes);<br>             X265_FREE((analysis->intraData)->chromaModes);<br>             if (param->rc.cuTree)<br>-                X265_FREE((analysis->intraData)->cuQPOff);<br>+            {<br>+                X265_FREE_ZERO((analysis->intraData)->cuQPOff);<br>+            }<br>         }<br>         X265_FREE(analysis->intraData);<br>         analysis->intraData = NULL;<br>@@ -931,7 +951,9 @@ void x265_free_analysis_data(x265_param *param, x265_analysis_data* analysis)<br>         X265_FREE((analysis->interData)->depth);<br>         X265_FREE((analysis->interData)->modes);<br>         if (!isMultiPassOpt && param->rc.cuTree)<br>+        {<br>             X265_FREE((analysis->interData)->cuQPOff);<br>+        }<br>         X265_FREE((analysis->interData)->mvpIdx[0]);<br>         X265_FREE((analysis->interData)->mvpIdx[1]);<br>         X265_FREE((analysis->interData)->mv[0]);<br>diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp<br>index 1f710e1ce..5eb123d31 100644<br>--- a/source/encoder/encoder.cpp<br>+++ b/source/encoder/encoder.cpp<br>@@ -4444,6 +4444,26 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x<br>             }<br>         }<br>     }<br>+<br>+    int8_t *cuQPBuf = NULL, *cuQPOffSets = NULL;<br>+    uint32_t reuseBufSize = 0;<br>+<br>+    if (m_param->rc.cuTree)<br>+    {<br>+        if (m_param->analysisLoadReuseLevel == 10)<br>+            reuseBufSize = depthBytes;<br>+        else if (m_param->analysisLoadReuseLevel > 1)<br>+            reuseBufSize = MAX_NUM_CU_GEOMS * analysis->numCUsInFrame;<br>+        cuQPBuf = X265_MALLOC(int8_t, reuseBufSize);<br>+        if (!m_param->bUseAnalysisFile)<br>+        {<br>+            if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I)<br>+                cuQPOffSets = intraPic->cuQPOff;<br>+            else<br>+                cuQPOffSets = interPic->cuQPOff;<br>+        }<br>+    }<br>+<br>     if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I)<br>     {<br>         if (m_param->bAnalysisType == HEVC_INFO)<br>@@ -4452,19 +4472,21 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x<br>             return;<br> <br>         uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL, *partSizes = NULL;<br>-        int8_t *cuQPBuf = NULL;<br> <br>         tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);<br>         depthBuf = tempBuf;<br>         modeBuf = tempBuf + depthBytes;<br>         partSizes = tempBuf + 2 * depthBytes;<br>-        if (m_param->rc.cuTree)<br>-            cuQPBuf = X265_MALLOC(int8_t, depthBytes);<br> <br>         X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, intraPic->depth);<br>         X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, intraPic->chromaModes);<br>         X265_FREAD(partSizes, sizeof(uint8_t), depthBytes, m_analysisFileIn, intraPic->partSizes);<br>-        if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes, m_analysisFileIn, intraPic->cuQPOff); }<br>+        if (m_param->rc.cuTree)<br>+        {<br>+            X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize, m_analysisFileIn, cuQPOffSets);<br>+            if (m_param->analysisLoadReuseLevel > 1 && m_param->analysisLoadReuseLevel < 10)<br>+                memcpy(analysis->intraData->cuQPOff, cuQPBuf, sizeof(int8_t) * reuseBufSize);<br>+        }<br> <br>         size_t count = 0;<br>         for (uint32_t d = 0; d < depthBytes; d++)<br>@@ -4480,7 +4502,7 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x<br>             memset(&(analysis->intraData)->depth[count], depthBuf[d], bytes);<br>             memset(&(analysis->intraData)->chromaModes[count], modeBuf[d], bytes);<br>             memset(&(analysis->intraData)->partSizes[count], partSizes[d], bytes);<br>-            if (m_param->rc.cuTree)<br>+            if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel == 10)<br>                 memset(&(analysis->intraData)->cuQPOff[count], cuQPBuf[d], bytes);<br>             count += bytes;<br>         }<br>@@ -4515,7 +4537,6 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x<br>         uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];<br>         MV* mv[2];<br>         int8_t* refIdx[2];<br>-        int8_t* cuQPBuf = NULL;<br> <br>         int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2;<br>         bool bIntraInInter = false;<br>@@ -4535,12 +4556,15 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x<br>             tempBuf = X265_MALLOC(uint8_t, depthBytes * numBuf);<br>             depthBuf = tempBuf;<br>             modeBuf = tempBuf + depthBytes;<br>-            if (m_param->rc.cuTree)<br>-                cuQPBuf = X265_MALLOC(int8_t, depthBytes);<br> <br>             X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, interPic->depth);<br>             X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, interPic->modes);<br>-            if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes, m_analysisFileIn, interPic->cuQPOff); }<br>+            if (m_param->rc.cuTree)<br>+            {<br>+                X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize, m_analysisFileIn, cuQPOffSets);<br>+                if (m_param->analysisLoadReuseLevel > 1 && m_param->analysisLoadReuseLevel < 10)<br>+                    memcpy(analysis->interData->cuQPOff, cuQPBuf, sizeof(int8_t) * reuseBufSize);<br>+            }<br> <br>             if (m_param->analysisLoadReuseLevel > 4)<br>             {<br>@@ -4578,7 +4602,7 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x<br>                     depthBuf[d] = 1;<br>                 memset(&(analysis->interData)->depth[count], depthBuf[d], bytes);<br>                 memset(&(analysis->interData)->modes[count], modeBuf[d], bytes);<br>-                if (m_param->rc.cuTree)<br>+                if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel == 10)<br>                     memset(&(analysis->interData)->cuQPOff[count], cuQPBuf[d], bytes);<br>                 if (m_param->analysisLoadReuseLevel > 4)<br>                 {<br>@@ -4736,7 +4760,7 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x<br>     int numPartitions = analysis->numPartitions;<br>     int numCUsInFrame = analysis->numCUsInFrame;<br>     int numCuInHeight = analysis->numCuInHeight;<br>-    /* Allocate memory for scaled resoultion's numPartitions and numCUsInFrame*/<br>+    /* Allocate memory for scaled resolution's numPartitions and numCUsInFrame */<br>     analysis->numPartitions = m_param->num4x4Partitions;<br>     analysis->numCUsInFrame = cuLoc.heightInCU * cuLoc.widthInCU;<br>     analysis->numCuInHeight = cuLoc.heightInCU;<br>@@ -4808,25 +4832,40 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x<br>         X265_FREE(vbvCostBuf);<br>     }<br> <br>+    uint32_t reuseBufSize = 0;<br>+    int8_t *cuQPOffSets = NULL, *cuQPBuf = NULL;<br>+    if (m_param->rc.cuTree)<br>+    {<br>+        if (m_param->analysisLoadReuseLevel == 10)<br>+            reuseBufSize = depthBytes;<br>+        else if (m_param->analysisLoadReuseLevel > 1)<br>+            reuseBufSize = (MAX_NUM_CU_GEOMS / factor) * (analysis->numCUsInFrame);<br>+        cuQPBuf = X265_MALLOC(int8_t, reuseBufSize);<br>+        if (!m_param->bUseAnalysisFile)<br>+        {<br>+            if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I)<br>+                cuQPOffSets = intraPic->cuQPOff;<br>+            else<br>+                cuQPOffSets = interPic->cuQPOff;<br>+        }<br>+    }<br>+<br>     if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I)<br>     {<br>         if (m_param->analysisLoadReuseLevel < 2)<br>             return;<br> <br>         uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL, *partSizes = NULL;<br>-        int8_t *cuQPBuf = NULL;<br> <br>         tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);<br>         depthBuf = tempBuf;<br>         modeBuf = tempBuf + depthBytes;<br>         partSizes = tempBuf + 2 * depthBytes;<br>-        if (m_param->rc.cuTree)<br>-            cuQPBuf = X265_MALLOC(int8_t, depthBytes);<br> <br>         X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, intraPic->depth);<br>         X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, intraPic->chromaModes);<br>         X265_FREAD(partSizes, sizeof(uint8_t), depthBytes, m_analysisFileIn, intraPic->partSizes);<br>-        if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes, m_analysisFileIn, intraPic->cuQPOff); }<br>+        if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize, m_analysisFileIn, cuQPOffSets); }<br> <br>         uint32_t count = 0;<br>         for (uint32_t d = 0; d < depthBytes; d++)<br>@@ -4848,7 +4887,7 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x<br>                 memset(&(analysis->intraData)->depth[count], depthBuf[d], bytes);<br>                 memset(&(analysis->intraData)->chromaModes[count], modeBuf[d], bytes);<br>                 memset(&(analysis->intraData)->partSizes[count], partSizes[d], bytes);<br>-                if (m_param->rc.cuTree)<br>+                if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel == 10)<br>                     memset(&(analysis->intraData)->cuQPOff[count], cuQPBuf[d], bytes);<br>                 count += bytes;<br>                 d += getCUIndex(&cuLoc, &count, bytes, 1);<br>@@ -4886,7 +4925,6 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x<br>         uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];<br>         MV* mv[2];<br>         int8_t* refIdx[2];<br>-        int8_t* cuQPBuf = NULL;<br> <br>         int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2;<br>         bool bIntraInInter = false;<br>@@ -4900,12 +4938,16 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x<br>         tempBuf = X265_MALLOC(uint8_t, depthBytes * numBuf);<br>         depthBuf = tempBuf;<br>         modeBuf = tempBuf + depthBytes;<br>-        if (m_param->rc.cuTree)<br>-            cuQPBuf = X265_MALLOC(int8_t, depthBytes);<br> <br>         X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, interPic->depth);<br>         X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, interPic->modes);<br>-        if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes, m_analysisFileIn, interPic->cuQPOff); }<br>+        if (m_param->rc.cuTree)<br>+        {<br>+            X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize, m_analysisFileIn, cuQPOffSets);<br>+            if (m_param->analysisLoadReuseLevel > 1 && m_param->analysisLoadReuseLevel < 10)<br>+                memcpy(&(analysis->interData)->cuQPOff, cuQPBuf, sizeof(int8_t) * reuseBufSize);<br>+        }<br>+<br>         if (m_param->analysisLoadReuseLevel > 4)<br>         {<br>             partSize = modeBuf + depthBytes;<br>@@ -4954,7 +4996,7 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x<br>             {<br>                 memset(&(analysis->interData)->depth[count], writeDepth, bytes);<br>                 memset(&(analysis->interData)->modes[count], modeBuf[d], bytes);<br>-                if (m_param->rc.cuTree)<br>+                if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel == 10)<br>                     memset(&(analysis->interData)->cuQPOff[count], cuQPBuf[d], bytes);<br>                 if (m_param->analysisLoadReuseLevel == 10 && bIntraInInter)<br>                     memset(&(analysis->intraData)->chromaModes[count], chromaDir[d], bytes);<br>@@ -5046,7 +5088,9 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x<br>             }<br>         }<br>         else<br>+        {<br>             X265_FREAD((analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir, m_analysisFileIn, interPic->ref);<br>+        }<br> <br>         consumedBytes += frameRecordSize;<br>         if (numDir == 1)<br>@@ -5510,9 +5554,10 @@ void Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD<br>         analysis->frameRecordSize += analysis->numCUsInFrame * sizeof(sse_t);<br>     }<br> <br>+    uint32_t reuseQPBufsize = 0;<br>     if (m_param->analysisSaveReuseLevel > 1)<br>     {<br>-<br>+        reuseQPBufsize = MAX_NUM_CU_GEOMS * analysis->numCUsInFrame;<br>         if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I)<br>         {<br>             for (uint32_t cuAddr = 0; cuAddr < analysis->numCUsInFrame; cuAddr++)<br>@@ -5536,12 +5581,21 @@ void Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD<br>                     partSize = ctu->m_partSize[absPartIdx];<br>                     intraDataCTU->partSizes[depthBytes] = partSize;<br> <br>-                    if (m_param->rc.cuTree)<br>+                    if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel == 10)<br>                         intraDataCTU->cuQPOff[depthBytes] = (int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP);<br>                     absPartIdx += ctu->m_numPartitions >> (depth * 2);<br>                 }<br>+<br>+                if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel < 10)<br>+                {<br>+                    uint32_t nextCuIdx = (cuAddr + 1) * MAX_NUM_CU_GEOMS;<br>+                    for (uint32_t i = cuAddr * MAX_NUM_CU_GEOMS; i < nextCuIdx; i++)<br>+                        intraDataCTU->cuQPOff[i] = (int8_t)(intraDataCTU->cuQPOff[i] - baseQP);<br>+                }<br>                 memcpy(&intraDataCTU->modes[ctu->m_cuAddr * ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)* ctu->m_numPartitions);<br>             }<br>+            if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel == 10)<br>+                reuseQPBufsize = depthBytes;<br>         }<br>         else<br>         {<br>@@ -5567,7 +5621,7 @@ void Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD<br>                         predMode = 4; // used as indicator if the block is coded as bidir<br> <br>                     interDataCTU->modes[depthBytes] = predMode;<br>-                    if (m_param->rc.cuTree)<br>+                    if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel == 10)<br>                         interDataCTU->cuQPOff[depthBytes] = (int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP);<br> <br>                     if (m_param->analysisSaveReuseLevel > 4)<br>@@ -5599,13 +5653,23 @@ void Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD<br>                     }<br>                     absPartIdx += ctu->m_numPartitions >> (depth * 2);<br>                 }<br>+<br>+                if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel < 10)<br>+                {<br>+                    uint32_t nextCuIdx = (cuAddr + 1) * MAX_NUM_CU_GEOMS;<br>+                    for (uint32_t i = cuAddr * MAX_NUM_CU_GEOMS; i < nextCuIdx ; i++)<br>+                        interDataCTU->cuQPOff[i] = (int8_t)(interDataCTU->cuQPOff[i] - baseQP);<br>+                }<br>+<br>                 if (m_param->analysisSaveReuseLevel == 10 && bIntraInInter)<br>                     memcpy(&intraDataCTU->modes[ctu->m_cuAddr * ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)* ctu->m_numPartitions);<br>             }<br>+            if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel == 10)<br>+                reuseQPBufsize = depthBytes;<br>         }<br> <br>         if ((analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I) && m_param->rc.cuTree)<br>-            analysis->frameRecordSize += sizeof(uint8_t)* analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 + (sizeof(int8_t) * depthBytes);<br>+            analysis->frameRecordSize += sizeof(uint8_t)* analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 + (sizeof(int8_t) * reuseQPBufsize);<br>         else if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I)<br>             analysis->frameRecordSize += sizeof(uint8_t)* analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3;<br>         else<br>@@ -5613,7 +5677,8 @@ void Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD<br>             /* Add sizeof depth, modes, partSize, cuQPOffset, mergeFlag */<br>             analysis->frameRecordSize += depthBytes * 2;<br>             if (m_param->rc.cuTree)<br>-            analysis->frameRecordSize += (sizeof(int8_t) * depthBytes);<br>+                analysis->frameRecordSize += (sizeof(int8_t) * reuseQPBufsize);<br>+<br>             if (m_param->analysisSaveReuseLevel > 4)<br>                 analysis->frameRecordSize += (depthBytes * 2);<br> <br>@@ -5669,7 +5734,7 @@ void Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD<br>         X265_FWRITE((analysis->intraData)->chromaModes, sizeof(uint8_t), depthBytes, m_analysisFileOut);<br>         X265_FWRITE((analysis->intraData)->partSizes, sizeof(char), depthBytes, m_analysisFileOut);<br>         if (m_param->rc.cuTree)<br>-            X265_FWRITE((analysis->intraData)->cuQPOff, sizeof(int8_t), depthBytes, m_analysisFileOut);<br>+            X265_FWRITE((analysis->intraData)->cuQPOff, sizeof(int8_t), reuseQPBufsize, m_analysisFileOut);<br>         X265_FWRITE((analysis->intraData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileOut);<br>     }<br>     else<br>@@ -5677,7 +5742,7 @@ void Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD<br>         X265_FWRITE((analysis->interData)->depth, sizeof(uint8_t), depthBytes, m_analysisFileOut);<br>         X265_FWRITE((analysis->interData)->modes, sizeof(uint8_t), depthBytes, m_analysisFileOut);<br>         if (m_param->rc.cuTree)<br>-            X265_FWRITE((analysis->interData)->cuQPOff, sizeof(int8_t), depthBytes, m_analysisFileOut);<br>+            X265_FWRITE((analysis->interData)->cuQPOff, sizeof(int8_t), reuseQPBufsize, m_analysisFileOut);<br>         if (m_param->analysisSaveReuseLevel > 4)<br>         {<br>             X265_FWRITE((analysis->interData)->partSize, sizeof(uint8_t), depthBytes, m_analysisFileOut);<br>@@ -5762,7 +5827,7 @@ void Encoder::writeAnalysisFileRefine(x265_analysis_data* analysis, FrameData &c<br>                     interData->mv[1][depthBytes].word = ctu->m_mv[1][absPartIdx].word;<br>                     interData->mvpIdx[1][depthBytes] = ctu->m_mvpIdx[1][absPartIdx];<br>                     ref[1][depthBytes] = ctu->m_refIdx[1][absPartIdx];<br>-                    predMode = 4; // used as indiacator if the block is coded as bidir<br>+                    predMode = 4; // used as indicator if the block is coded as bidir<br>                 }<br>                 interData->modes[depthBytes] = predMode;<br> <br>diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp<br>index 0adb0d0db..3bc01268b 100644<br>--- a/source/encoder/slicetype.cpp<br>+++ b/source/encoder/slicetype.cpp<br>@@ -1894,7 +1894,7 @@ void Lookahead::slicetypeAnalyse(Lowres **frames, bool bKeyframe)<br> <br>     if (!framecnt)<br>     {<br>-        if (m_param->rc.cuTree)<br>+        if (m_param->rc.cuTree && !m_param->analysisLoad)<br></div></blockquote><div>[AM] Won't this implicitly turn OFF cutree at reuse-level 1? </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div dir="ltr">             cuTree(frames, 0, bKeyframe);<br>         return;<br>     }<br>diff --git a/source/x265.h b/source/x265.h<br>index f44040ba7..8d7a75826 100644<br>--- a/source/x265.h<br>+++ b/source/x265.h<br>@@ -144,7 +144,7 @@ typedef struct x265_analysis_intra_data<br>     uint8_t*  modes;<br>     char*     partSizes;<br>     uint8_t*  chromaModes;<br>-    int8_t*    cuQPOff;<br>+    int8_t*   cuQPOff;<br> }x265_analysis_intra_data;<br> <br> typedef struct x265_analysis_MV<br>@@ -167,7 +167,7 @@ typedef struct x265_analysis_inter_data<br>     uint8_t*    interDir;<br>     uint8_t*    mvpIdx[2];<br>     int8_t*     refIdx[2];<br>-    x265_analysis_MV*         mv[2];<br>+    x265_analysis_MV* mv[2];<br>     int64_t*     sadCost;<br>     int8_t*    cuQPOff;<br> }x265_analysis_inter_data;<br>-- <br>2.20.1.windows.1<br><br><div><br></div>-- <br><div dir="ltr"><div dir="ltr"><b style="background-color:rgb(255,255,255)"><font color="#0b5394">With Regards,</font></b><div><b style="background-color:rgb(255,255,255)"><font color="#0b5394">Srikanth Kurapati.</font></b></div></div></div></div>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org" target="_blank">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div><br clear="all"><div><br></div>-- <br><div dir="ltr"><div dir="ltr"><div><div dir="ltr"><div><div dir="ltr"><div><div dir="ltr"><font face="georgia, serif">Regards,</font><div><b><font face="georgia, serif">Aruna Matheswaran,</font></b></div><div><font face="georgia, serif">Video Codec Engineer,</font></div><div><font face="georgia, serif">Media & AI analytics BU,</font></div><div><span><span style="font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;vertical-align:baseline;white-space:pre-wrap"><span style="border:none;display:inline-block;overflow:hidden;width:153px;height:58px"><img src="https://lh5.googleusercontent.com/gjX5cPNIZgwUrhfqkTwQUZWztIKmmo0qs3kbwvkS5H-bDVE2ftte9pMTVnFLSjOcjYWLtfc6_OGpxW4vraLg2r5QAIf1Q3MpldFDgWtzK_gXi8ptw5B3joIbsGL6mxj-JRdjHzT5" width="96" height="36" style="margin-left: 0px; margin-top: 0px;"></span></span></span><font face="georgia, serif"><br></font></div><div><span><span style="font-size:11pt;font-family:Arial;color:rgb(0,0,0);background-color:transparent;vertical-align:baseline;white-space:pre-wrap"><span style="border:none;display:inline-block;overflow:hidden;width:153px;height:58px"><img src="https://lh5.googleusercontent.com/gjX5cPNIZgwUrhfqkTwQUZWztIKmmo0qs3kbwvkS5H-bDVE2ftte9pMTVnFLSjOcjYWLtfc6_OGpxW4vraLg2r5QAIf1Q3MpldFDgWtzK_gXi8ptw5B3joIbsGL6mxj-JRdjHzT5" style="margin-left: 0px; margin-top: 0px;"></span></span></span><font face="georgia, serif"><br></font></div><div><font face="georgia, serif"><br></font></div></div></div></div></div></div></div></div></div></div>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org" target="_blank">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div><br clear="all"><div><br></div>-- <br><div dir="ltr" class="gmail_signature"><div dir="ltr"><b style="background-color:rgb(255,255,255)"><font color="#0b5394">With Regards,</font></b><div><b style="background-color:rgb(255,255,255)"><font color="#0b5394">Srikanth Kurapati.</font></b></div></div></div></div>