[x265] [PATCH] [Release_3.5] correct reusing cutree qp offsets in load encode for reuse-level > 1 and < 10 for same resolution

Mahesh Pittala mahesh at multicorewareinc.com
Tue Nov 17 03:52:16 CET 2020


>From 787ae5da7431b5d113ea033cf6502ac1cc1e7572 Mon Sep 17 00:00:00 2001
From: maheshpittala <mahesh at multicorewareinc.com>
Date: Sun, 1 Nov 2020 10:09:28 +0530
Subject: [PATCH] correct reusing cutree qp offsets in load encode for
 reuse-level > 1 and < 10 for same resolution

Earlier in save encode, dumped only best modes analysis data of that CTU
into file after encoding, not for each split CU's analysis. So in analysis
load, it reads the same best mode's qp value even for split CU's(whereas
split CU's qp would be different in save encode) and redo-analysis.

So now, cuGeom.geomRecurId stores unique ID for each CU and even for
parents CU so based on this storing cutree qp offset and loaded same
---
 source/abrEncApp.cpp         |  6 +++
 source/common/cudata.cpp     |  6 ++-
 source/common/cudata.h       |  3 +-
 source/encoder/analysis.cpp  | 32 ++++++++++--
 source/encoder/api.cpp       | 12 +++++
 source/encoder/encoder.cpp   | 97 ++++++++++++++++++++++++++++++++----
 source/encoder/slicetype.cpp |  2 +-
 source/x265.h                |  2 +
 8 files changed, 140 insertions(+), 20 deletions(-)

diff --git a/source/abrEncApp.cpp b/source/abrEncApp.cpp
index cd85154f1..3550d8b11 100644
--- a/source/abrEncApp.cpp
+++ b/source/abrEncApp.cpp
@@ -342,7 +342,10 @@ namespace X265_NS {
             memcpy(intraDst->partSizes, intraSrc->partSizes, sizeof(char)
* src->depthBytes);
             memcpy(intraDst->chromaModes, intraSrc->chromaModes,
sizeof(uint8_t) * src->depthBytes);
             if (m_param->rc.cuTree)
+            {
                 memcpy(intraDst->cuQPOff, intraSrc->cuQPOff,
sizeof(int8_t) * src->depthBytes);
+                memcpy(intraDst->cuQPOffReuse, intraSrc->cuQPOffReuse,
sizeof(int8_t) * (src->numCUsInFrame * src->numPartitions));
+            }
         }
         else
         {
@@ -357,7 +360,10 @@ namespace X265_NS {
             memcpy(interDst->depth, interSrc->depth, sizeof(uint8_t) *
src->depthBytes);
             memcpy(interDst->modes, interSrc->modes, sizeof(uint8_t) *
src->depthBytes);
             if (m_param->rc.cuTree)
+            {
                 memcpy(interDst->cuQPOff, interSrc->cuQPOff,
sizeof(int8_t) * src->depthBytes);
+                memcpy(interDst->cuQPOffReuse, interSrc->cuQPOffReuse,
sizeof(int8_t) * (src->numCUsInFrame * src->numPartitions));
+            }
             if (m_param->analysisSaveReuseLevel > 4)
             {
                 memcpy(interDst->partSize, interSrc->partSize,
sizeof(uint8_t) * src->depthBytes);
diff --git a/source/common/cudata.cpp b/source/common/cudata.cpp
index 19281dee2..08cdff11a 100644
--- a/source/common/cudata.cpp
+++ b/source/common/cudata.cpp
@@ -194,6 +194,7 @@ void CUData::initialize(const CUDataMemPool& dataPool,
uint32_t depth, const x26

         m_qp        = (int8_t*)charBuf; charBuf += m_numPartitions;
         m_qpAnalysis = (int8_t*)charBuf; charBuf += m_numPartitions;
+        m_qpreuse    = (int8_t*)charBuf; charBuf += m_numPartitions;
         m_log2CUSize         = charBuf; charBuf += m_numPartitions;
         m_lumaIntraDir       = charBuf; charBuf += m_numPartitions;
         m_tqBypass           = charBuf; charBuf += m_numPartitions;
@@ -235,6 +236,7 @@ void CUData::initialize(const CUDataMemPool& dataPool,
uint32_t depth, const x26

         m_qp        = (int8_t*)charBuf; charBuf += m_numPartitions;
         m_qpAnalysis = (int8_t*)charBuf; charBuf += m_numPartitions;
+        m_qpreuse =    (int8_t*)charBuf; charBuf += m_numPartitions;
         m_log2CUSize         = charBuf; charBuf += m_numPartitions;
         m_lumaIntraDir       = charBuf; charBuf += m_numPartitions;
         m_tqBypass           = charBuf; charBuf += m_numPartitions;
@@ -307,7 +309,7 @@ void CUData::initCTU(const Frame& frame, uint32_t
cuAddr, int qp, uint32_t first
     X265_CHECK(!(frame.m_encData->m_param->bLossless &&
!m_slice->m_pps->bTransquantBypassEnabled), "lossless enabled without
TQbypass in PPS\n");

     /* initialize the remaining CU data in one memset */
-    memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ?
BytesPerPartition - 12 : BytesPerPartition - 8) * m_numPartitions);
+    memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ?
BytesPerPartition - 13 : BytesPerPartition - 9) * m_numPartitions);

     for (int8_t i = 0; i < NUM_TU_DEPTH; i++)
         m_refTuDepth[i] = -1;
@@ -358,7 +360,7 @@ void CUData::initSubCU(const CUData& ctu, const CUGeom&
cuGeom, int qp)
     m_partSet(m_cuDepth,      (uint8_t)cuGeom.depth);

     /* initialize the remaining CU data in one memset */
-    memset(m_predMode, 0, (ctu.m_chromaFormat == X265_CSP_I400 ?
BytesPerPartition - 13 : BytesPerPartition - 9) * m_numPartitions);
+    memset(m_predMode, 0, (ctu.m_chromaFormat == X265_CSP_I400 ?
BytesPerPartition - 14 : BytesPerPartition - 10) * m_numPartitions);
     memset(m_distortion, 0, m_numPartitions * sizeof(sse_t));
 }

diff --git a/source/common/cudata.h b/source/common/cudata.h
index 8397f0568..d58f53e39 100644
--- a/source/common/cudata.h
+++ b/source/common/cudata.h
@@ -192,6 +192,7 @@ public:
     /* Per-part data, stored contiguously */
     int8_t*       m_qp;               // array of QP values
     int8_t*       m_qpAnalysis;       // array of QP values for analysis
reuse
+    int8_t*       m_qpreuse;          // array of QP values for analysis
reuse for reuse levels > 1 and < 10
     uint8_t*      m_log2CUSize;       // array of cu log2Size TODO: seems
redundant to depth
     uint8_t*      m_lumaIntraDir;     // array of intra directions (luma)
     uint8_t*      m_tqBypass;         // array of CU lossless flags
@@ -207,7 +208,7 @@ public:
     uint8_t*      m_transformSkip[3]; // array of transform skipping flags
per plane
     uint8_t*      m_cbf[3];           // array of coded block flags (CBF)
per plane
     uint8_t*      m_chromaIntraDir;   // array of intra directions (chroma)
-    enum { BytesPerPartition = 24 };  // combined sizeof() of all per-part
data
+    enum { BytesPerPartition = 25 };  // combined sizeof() of all per-part
data

     sse_t*        m_distortion;
     coeff_t*      m_trCoeff[3];       // transformed coefficient buffer
per plane
diff --git a/source/encoder/analysis.cpp b/source/encoder/analysis.cpp
index aabf386ca..b1d7e3ad1 100644
--- a/source/encoder/analysis.cpp
+++ b/source/encoder/analysis.cpp
@@ -520,6 +520,9 @@ uint64_t Analysis::compressIntraCU(const CUData&
parentCTU, const CUGeom& cuGeom
     bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
     bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);

+    if (m_param->rc.cuTree)
+        parentCTU.m_qpreuse[cuGeom.geomRecurId] = (int8_t)qp;
+
     bool bAlreadyDecided = m_param->intraRefine != 4 &&
parentCTU.m_lumaIntraDir[cuGeom.absPartIdx] != (uint8_t)ALL_IDX &&
!(m_param->bAnalysisType == HEVC_INFO);
     bool bDecidedDepth = m_param->intraRefine != 4 &&
parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth;
     int split = 0;
@@ -870,6 +873,9 @@ uint32_t Analysis::compressInterCU_dist(const CUData&
parentCTU, const CUGeom& c
     uint32_t minDepth = m_param->rdLevel <= 4 ? topSkipMinDepth(parentCTU,
cuGeom) : 0;
     uint32_t splitRefs[4] = { 0, 0, 0, 0 };

+    if (m_param->rc.cuTree)
+        parentCTU.m_qpreuse[cuGeom.geomRecurId] = (int8_t)qp;
+
     X265_CHECK(m_param->rdLevel >= 2, "compressInterCU_dist does not
support RD 0 or 1\n");

     PMODE pmode(*this, cuGeom);
@@ -1152,6 +1158,8 @@ SplitData Analysis::compressInterCU_rd0_4(const
CUData& parentCTU, const CUGeom&
     uint32_t cuAddr = parentCTU.m_cuAddr;
     ModeDepth& md = m_modeDepth[depth];

+    if (m_param->rc.cuTree)
+        parentCTU.m_qpreuse[cuGeom.geomRecurId] = (int8_t)qp;

     if (m_param->searchMethod == X265_SEA)
     {
@@ -1856,6 +1864,9 @@ SplitData Analysis::compressInterCU_rd5_6(const
CUData& parentCTU, const CUGeom&
     ModeDepth& md = m_modeDepth[depth];
     md.bestMode = NULL;

+    if (m_param->rc.cuTree)
+        parentCTU.m_qpreuse[cuGeom.geomRecurId] = (int8_t)qp;
+
     if (m_param->searchMethod == X265_SEA)
     {
         int numPredDir = m_slice->isInterP() ? 1 : 2;
@@ -3643,15 +3654,26 @@ int Analysis::calculateQpforCuSize(const CUData&
ctu, const CUGeom& cuGeom, int3
         if ((distortionData->threshold[ctu.m_cuAddr] < 0.9 ||
distortionData->threshold[ctu.m_cuAddr] > 1.1)
             && distortionData->highDistortionCtuCount &&
distortionData->lowDistortionCtuCount)
             qp += distortionData->offset[ctu.m_cuAddr];
-    }
+ }

     if (m_param->analysisLoadReuseLevel >= 2 && m_param->rc.cuTree)
     {
-        int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) +
cuGeom.absPartIdx;
-        if (ctu.m_slice->m_sliceType == I_SLICE)
-            return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
(int32_t)(qp + 0.5 +
((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx]));
+        if (m_param->scaleFactor == 2 || m_param->analysisLoadReuseLevel
== 10)
+        {
+            int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) +
cuGeom.absPartIdx;
+            if (ctu.m_slice->m_sliceType == I_SLICE)
+                return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
(int32_t)(qp + 0.5 +
((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx]));
+            else
+                return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
(int32_t)(qp + 0.5 +
((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx]));
+        }
         else
-            return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
(int32_t)(qp + 0.5 +
((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx]));
+        {
+            int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) +
cuGeom.geomRecurId;
+            if (ctu.m_slice->m_sliceType == I_SLICE)
+                return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
(int32_t)(qp + 0.5 +
((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOffReuse[cuIdx]));
+            else
+                return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
(int32_t)(qp + 0.5 +
((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOffReuse[cuIdx]));
+        }
     }
     if (m_param->rc.hevcAq)
     {
diff --git a/source/encoder/api.cpp b/source/encoder/api.cpp
index a986355e0..0f266d328 100644
--- a/source/encoder/api.cpp
+++ b/source/encoder/api.cpp
@@ -825,7 +825,10 @@ void x265_alloc_analysis_data(x265_param *param,
x265_analysis_data* analysis)
         CHECKED_MALLOC_ZERO(intraData->partSizes, char,
analysis->numPartitions * analysis->numCUsInFrame);
         CHECKED_MALLOC_ZERO(intraData->chromaModes, uint8_t,
analysis->numPartitions * analysis->numCUsInFrame);
         if (param->rc.cuTree)
+        {
             CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t,
analysis->numPartitions * analysis->numCUsInFrame);
+            CHECKED_MALLOC_ZERO(intraData->cuQPOffReuse, int8_t,
analysis->numPartitions * analysis->numCUsInFrame);
+        }
     }
     analysis->intraData = intraData;

@@ -837,7 +840,10 @@ void x265_alloc_analysis_data(x265_param *param,
x265_analysis_data* analysis)
         CHECKED_MALLOC_ZERO(interData->modes, uint8_t,
analysis->numPartitions * analysis->numCUsInFrame);

         if (param->rc.cuTree && !isMultiPassOpt)
+        {
             CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t,
analysis->numPartitions * analysis->numCUsInFrame);
+            CHECKED_MALLOC_ZERO(interData->cuQPOffReuse, int8_t,
analysis->numPartitions * analysis->numCUsInFrame);
+        }
         CHECKED_MALLOC_ZERO(interData->mvpIdx[0], uint8_t,
analysis->numPartitions * analysis->numCUsInFrame);
         CHECKED_MALLOC_ZERO(interData->mvpIdx[1], uint8_t,
analysis->numPartitions * analysis->numCUsInFrame);
         CHECKED_MALLOC_ZERO(interData->mv[0], x265_analysis_MV,
analysis->numPartitions * analysis->numCUsInFrame);
@@ -919,7 +925,10 @@ void x265_free_analysis_data(x265_param *param,
x265_analysis_data* analysis)
             X265_FREE((analysis->intraData)->partSizes);
             X265_FREE((analysis->intraData)->chromaModes);
             if (param->rc.cuTree)
+            {
                 X265_FREE((analysis->intraData)->cuQPOff);
+                X265_FREE((analysis->intraData)->cuQPOffReuse);
+            }
         }
         X265_FREE(analysis->intraData);
         analysis->intraData = NULL;
@@ -931,7 +940,10 @@ void x265_free_analysis_data(x265_param *param,
x265_analysis_data* analysis)
         X265_FREE((analysis->interData)->depth);
         X265_FREE((analysis->interData)->modes);
         if (!isMultiPassOpt && param->rc.cuTree)
+        {
             X265_FREE((analysis->interData)->cuQPOff);
+            X265_FREE((analysis->interData)->cuQPOffReuse);
+        }
         X265_FREE((analysis->interData)->mvpIdx[0]);
         X265_FREE((analysis->interData)->mvpIdx[1]);
         X265_FREE((analysis->interData)->mv[0]);
diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp
index 1f710e1ce..9666744f3 100644
--- a/source/encoder/encoder.cpp
+++ b/source/encoder/encoder.cpp
@@ -4452,19 +4452,25 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
             return;

         uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
*partSizes = NULL;
-        int8_t *cuQPBuf = NULL;
+        int8_t *cuQPBuf = NULL, *cuQPReuseBuf = NULL;

         tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);
         depthBuf = tempBuf;
         modeBuf = tempBuf + depthBytes;
         partSizes = tempBuf + 2 * depthBytes;
         if (m_param->rc.cuTree)
+        {
             cuQPBuf = X265_MALLOC(int8_t, depthBytes);
+ cuQPReuseBuf = X265_MALLOC(int8_t, scaledNumPartition *
analysis->numCUsInFrame);
+        }

         X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
m_analysisFileIn, intraPic->depth);
         X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn,
intraPic->chromaModes);
         X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
m_analysisFileIn, intraPic->partSizes);
-        if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
depthBytes, m_analysisFileIn, intraPic->cuQPOff); }
+        if (m_param->rc.cuTree) {
+            X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
m_analysisFileIn, intraPic->cuQPOff);
+            X265_FREAD(cuQPReuseBuf, sizeof(int8_t), (scaledNumPartition *
analysis->numCUsInFrame), m_analysisFileIn, intraPic->cuQPOffReuse);
+        }

         size_t count = 0;
         for (uint32_t d = 0; d < depthBytes; d++)
@@ -4484,7 +4490,11 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
                 memset(&(analysis->intraData)->cuQPOff[count], cuQPBuf[d],
bytes);
             count += bytes;
         }
-
+        if (m_param->rc.cuTree)
+        {
+ for (uint32_t i = 0; i < (scaledNumPartition * analysis->numCUsInFrame);
i++)
+                memset(&(analysis->intraData)->cuQPOffReuse[i],
cuQPReuseBuf[i], sizeof(int8_t));
+        }
         if (!m_param->scaleFactor)
         {
             X265_FREAD((analysis->intraData)->modes, sizeof(uint8_t),
numCUsLoad * analysis->numPartitions, m_analysisFileIn, intraPic->modes);
@@ -4498,7 +4508,10 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
             X265_FREE(tempLumaBuf);
         }
         if (m_param->rc.cuTree)
+        {
             X265_FREE(cuQPBuf);
+            X265_FREE(cuQPReuseBuf);
+        }
         X265_FREE(tempBuf);
         consumedBytes += frameRecordSize;
     }
@@ -4515,7 +4528,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
         uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];
         MV* mv[2];
         int8_t* refIdx[2];
-        int8_t* cuQPBuf = NULL;
+        int8_t* cuQPBuf = NULL, *cuQPReuseBuf = NULL;

         int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2;
         bool bIntraInInter = false;
@@ -4536,11 +4549,17 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
             depthBuf = tempBuf;
             modeBuf = tempBuf + depthBytes;
             if (m_param->rc.cuTree)
+            {
                 cuQPBuf = X265_MALLOC(int8_t, depthBytes);
+ cuQPReuseBuf = X265_MALLOC(int8_t, scaledNumPartition *
analysis->numCUsInFrame);
+            }

             X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
m_analysisFileIn, interPic->depth);
             X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
m_analysisFileIn, interPic->modes);
-            if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
depthBytes, m_analysisFileIn, interPic->cuQPOff); }
+            if (m_param->rc.cuTree) {
+                X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
m_analysisFileIn, interPic->cuQPOff);
+                X265_FREAD(cuQPReuseBuf, sizeof(int8_t),
(scaledNumPartition * analysis->numCUsInFrame), m_analysisFileIn,
interPic->cuQPOffReuse);
+            }

             if (m_param->analysisLoadReuseLevel > 4)
             {
@@ -4611,9 +4630,17 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
                 }
                 count += bytes;
             }
+            if (m_param->rc.cuTree)
+            {
+ for (uint32_t i = 0; i < (scaledNumPartition * analysis->numCUsInFrame);
i++)
+                    memset(&(analysis->interData)->cuQPOffReuse[i],
cuQPReuseBuf[i], sizeof(int8_t));
+            }

             if (m_param->rc.cuTree)
+            {
                 X265_FREE(cuQPBuf);
+                X265_FREE(cuQPReuseBuf);
+            }
             X265_FREE(tempBuf);
         }
         if (m_param->analysisLoadReuseLevel == 10)
@@ -4814,19 +4841,26 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
             return;

         uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
*partSizes = NULL;
-        int8_t *cuQPBuf = NULL;
+        int8_t *cuQPBuf = NULL, *cuQPReuseBuf = NULL;;

         tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);
         depthBuf = tempBuf;
         modeBuf = tempBuf + depthBytes;
         partSizes = tempBuf + 2 * depthBytes;
         if (m_param->rc.cuTree)
+        {
             cuQPBuf = X265_MALLOC(int8_t, depthBytes);
+            cuQPReuseBuf = X265_MALLOC(int8_t, (analysis->numPartitions /
factor) * analysis->numCUsInFrame);
+        }

         X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
m_analysisFileIn, intraPic->depth);
         X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn,
intraPic->chromaModes);
         X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
m_analysisFileIn, intraPic->partSizes);
-        if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
depthBytes, m_analysisFileIn, intraPic->cuQPOff); }
+        if (m_param->rc.cuTree)
+        {
+            X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
m_analysisFileIn, intraPic->cuQPOff);
+            X265_FREAD(cuQPReuseBuf, sizeof(int8_t),
((analysis->numPartitions / factor) * analysis->numCUsInFrame),
m_analysisFileIn, intraPic->cuQPOffReuse);
+        }

         uint32_t count = 0;
         for (uint32_t d = 0; d < depthBytes; d++)
@@ -4869,7 +4903,10 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
         }
         X265_FREE(tempLumaBuf);
         if (m_param->rc.cuTree)
+        {
             X265_FREE(cuQPBuf);
+            X265_FREE(cuQPReuseBuf);
+        }
         X265_FREE(tempBuf);
         consumedBytes += frameRecordSize;
     }
@@ -4886,7 +4923,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
         uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];
         MV* mv[2];
         int8_t* refIdx[2];
-        int8_t* cuQPBuf = NULL;
+        int8_t* cuQPBuf = NULL, *cuQPReuseBuf = NULL;

         int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2;
         bool bIntraInInter = false;
@@ -4901,11 +4938,18 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
         depthBuf = tempBuf;
         modeBuf = tempBuf + depthBytes;
         if (m_param->rc.cuTree)
+        {
             cuQPBuf = X265_MALLOC(int8_t, depthBytes);
+            cuQPReuseBuf = X265_MALLOC(int8_t, (analysis->numPartitions /
factor) * analysis->numCUsInFrame);
+        }

         X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
m_analysisFileIn, interPic->depth);
         X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn,
interPic->modes);
-        if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
depthBytes, m_analysisFileIn, interPic->cuQPOff); }
+        if (m_param->rc.cuTree)
+        {
+            X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
m_analysisFileIn, interPic->cuQPOff);
+            X265_FREAD(cuQPReuseBuf, sizeof(int8_t),
(analysis->numPartitions / factor) * analysis->numCUsInFrame,
m_analysisFileIn, interPic->cuQPOffReuse);
+        }
         if (m_param->analysisLoadReuseLevel > 4)
         {
             partSize = modeBuf + depthBytes;
@@ -5017,7 +5061,16 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
         }

         if (m_param->rc.cuTree)
+        {
+            for (uint32_t i = 0; i < ((analysis->numPartitions / factor) *
analysis->numCUsInFrame); i++)
+                memset(&(analysis->interData)->cuQPOffReuse[i],
cuQPReuseBuf[i], sizeof(int8_t));
+        }
+
+        if (m_param->rc.cuTree)
+        {
             X265_FREE(cuQPBuf);
+            X265_FREE(cuQPReuseBuf);
+        }
         X265_FREE(tempBuf);

         if (m_param->analysisLoadReuseLevel == 10)
@@ -5540,6 +5593,12 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
analysis, FrameData &curEncD
                         intraDataCTU->cuQPOff[depthBytes] =
(int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP);
                     absPartIdx += ctu->m_numPartitions >> (depth * 2);
                 }
+
+                if (m_param->rc.cuTree)
+                {
+                    for (uint32_t i = (cuAddr * ctu->m_numPartitions), j =
0; j < ctu->m_numPartitions; i++, j++)
+                        intraDataCTU->cuQPOffReuse[i] =
(int8_t)(ctu->m_qpreuse[j] - baseQP);
+                }
                 memcpy(&intraDataCTU->modes[ctu->m_cuAddr *
ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)*
ctu->m_numPartitions);
             }
         }
@@ -5599,13 +5658,20 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
analysis, FrameData &curEncD
                     }
                     absPartIdx += ctu->m_numPartitions >> (depth * 2);
                 }
+
+                if (m_param->rc.cuTree)
+                {
+                    for (uint32_t i = (cuAddr * ctu->m_numPartitions), j =
0; j < ctu->m_numPartitions; i++, j++)
+                        interDataCTU->cuQPOffReuse[i] =
(int8_t)(ctu->m_qpreuse[j] - baseQP);
+                }
+
                 if (m_param->analysisSaveReuseLevel == 10 && bIntraInInter)
                     memcpy(&intraDataCTU->modes[ctu->m_cuAddr *
ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)*
ctu->m_numPartitions);
             }
         }

         if ((analysis->sliceType == X265_TYPE_IDR || analysis->sliceType
== X265_TYPE_I) && m_param->rc.cuTree)
-            analysis->frameRecordSize += sizeof(uint8_t)*
analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 +
(sizeof(int8_t) * depthBytes);
+            analysis->frameRecordSize += sizeof(uint8_t)*
analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 +
(sizeof(int8_t) * depthBytes) + (sizeof(int8_t) * analysis->numPartitions
 * analysis->numCUsInFrame);
         else if (analysis->sliceType == X265_TYPE_IDR ||
analysis->sliceType == X265_TYPE_I)
             analysis->frameRecordSize += sizeof(uint8_t)*
analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3;
         else
@@ -5613,7 +5679,10 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
analysis, FrameData &curEncD
             /* Add sizeof depth, modes, partSize, cuQPOffset, mergeFlag */
             analysis->frameRecordSize += depthBytes * 2;
             if (m_param->rc.cuTree)
-            analysis->frameRecordSize += (sizeof(int8_t) * depthBytes);
+            {
+                analysis->frameRecordSize += (sizeof(int8_t) * depthBytes);
+                analysis->frameRecordSize += (sizeof(int8_t) *
analysis->numPartitions * analysis->numCUsInFrame);
+            }
             if (m_param->analysisSaveReuseLevel > 4)
                 analysis->frameRecordSize += (depthBytes * 2);

@@ -5669,7 +5738,10 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
analysis, FrameData &curEncD
         X265_FWRITE((analysis->intraData)->chromaModes, sizeof(uint8_t),
depthBytes, m_analysisFileOut);
         X265_FWRITE((analysis->intraData)->partSizes, sizeof(char),
depthBytes, m_analysisFileOut);
         if (m_param->rc.cuTree)
+        {
             X265_FWRITE((analysis->intraData)->cuQPOff, sizeof(int8_t),
depthBytes, m_analysisFileOut);
+            X265_FWRITE((analysis->intraData)->cuQPOffReuse,
sizeof(int8_t), (analysis->numCUsInFrame * analysis->numPartitions),
m_analysisFileOut);
+        }
         X265_FWRITE((analysis->intraData)->modes, sizeof(uint8_t),
analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileOut);
     }
     else
@@ -5677,7 +5749,10 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
analysis, FrameData &curEncD
         X265_FWRITE((analysis->interData)->depth, sizeof(uint8_t),
depthBytes, m_analysisFileOut);
         X265_FWRITE((analysis->interData)->modes, sizeof(uint8_t),
depthBytes, m_analysisFileOut);
         if (m_param->rc.cuTree)
+        {
             X265_FWRITE((analysis->interData)->cuQPOff, sizeof(int8_t),
depthBytes, m_analysisFileOut);
+            X265_FWRITE((analysis->interData)->cuQPOffReuse,
sizeof(int8_t), (analysis->numCUsInFrame * analysis->numPartitions),
m_analysisFileOut);
+        }
         if (m_param->analysisSaveReuseLevel > 4)
         {
             X265_FWRITE((analysis->interData)->partSize, sizeof(uint8_t),
depthBytes, m_analysisFileOut);
diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
index 0adb0d0db..3bc01268b 100644
--- a/source/encoder/slicetype.cpp
+++ b/source/encoder/slicetype.cpp
@@ -1894,7 +1894,7 @@ void Lookahead::slicetypeAnalyse(Lowres **frames,
bool bKeyframe)

     if (!framecnt)
     {
-        if (m_param->rc.cuTree)
+        if (m_param->rc.cuTree && !m_param->analysisLoad)
             cuTree(frames, 0, bKeyframe);
         return;
     }
diff --git a/source/x265.h b/source/x265.h
index f44040ba7..d6a828539 100644
--- a/source/x265.h
+++ b/source/x265.h
@@ -145,6 +145,7 @@ typedef struct x265_analysis_intra_data
     char*     partSizes;
     uint8_t*  chromaModes;
     int8_t*    cuQPOff;
+    int8_t*   cuQPOffReuse;
 }x265_analysis_intra_data;

 typedef struct x265_analysis_MV
@@ -170,6 +171,7 @@ typedef struct x265_analysis_inter_data
     x265_analysis_MV*         mv[2];
     int64_t*     sadCost;
     int8_t*    cuQPOff;
+    int8_t*    cuQPOffReuse;
 }x265_analysis_inter_data;

 typedef struct x265_weight_param
-- 
2.23.0.windows.1
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20201117/637ef75e/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: correct_reusing_cutree_qpoffsets.diff
Type: application/octet-stream
Size: 27434 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20201117/637ef75e/attachment-0001.obj>


More information about the x265-devel mailing list