[x265] [X265] [Patch] [Master, Release 3.5] fix: corrects output mismatch for cutree enabled analysis save/load encodes with reuse-levels in between 1 to 10 for similar encoder settings.

Srikanth Kurapati srikanth.kurapati at multicorewareinc.com
Mon Feb 1 14:21:04 UTC 2021


>From cd1f18eb201c50a63bf15a5ac1d972ae1ca8ceb4 Mon Sep 17 00:00:00 2001
From: Srikanth Kurapati <srikanth.kurapati at multicorewareinc.com>
Date: Wed, 30 Dec 2020 17:00:08 +0530
Subject: [PATCH] fix: corrects output mismatch for cutree enabled analysis
 save/load encodes with reuse-levels in between 1 to 10 for similar encoder
 settings.

- updates the documentation for the analysis save/load options.
---
 doc/reST/cli.rst             |  34 ++++-----
 source/abrEncApp.cpp         |  16 ++++-
 source/common/common.h       |   4 ++
 source/common/cudata.h       |   2 +-
 source/encoder/analysis.cpp  |  32 ++++++++-
 source/encoder/analysis.h    |   1 +
 source/encoder/api.cpp       |  28 +++++++-
 source/encoder/encoder.cpp   | 130 ++++++++++++++++++++++++++---------
 source/encoder/slicetype.cpp |   2 +-
 source/x265.h                |   4 +-
 10 files changed, 190 insertions(+), 63 deletions(-)

diff --git a/doc/reST/cli.rst b/doc/reST/cli.rst
index 94c2a5175..bb1396e8a 100755
--- a/doc/reST/cli.rst
+++ b/doc/reST/cli.rst
@@ -934,14 +934,14 @@ will not reuse analysis if slice type parameters do
not match.
 .. option:: --analysis-save <filename>

  Encoder outputs analysis information of each frame. Analysis data from
save mode is
- written to the file specified. Requires cutree, pmode to be off. Default
disabled.
+ written to the file specified. Requires pmode to be off. Default disabled.

  The amount of analysis data stored is determined by
:option:`--analysis-save-reuse-level`.

 .. option:: --analysis-load <filename>

  Encoder reuses analysis information from the file specified. By reading
the analysis data written by
- an earlier encode of the same sequence, substantial redundant work may be
avoided. Requires cutree, pmode
+ an earlier encode of the same sequence, substantial redundant work may be
avoided. Requires pmode
  to be off. Default disabled.

  The amount of analysis data reused is determined by
:option:`--analysis-load-reuse-level`.
@@ -961,21 +961,21 @@ will not reuse analysis if slice type parameters do
not match.
  Note that :option:`--analysis-save-reuse-level` and
:option:`--analysis-load-reuse-level` must be paired
  with :option:`--analysis-save` and :option:`--analysis-load` respectively.

- +--------------+------------------------------------------+
- | Level        | Description                              |
- +==============+==========================================+
- | 1            | Lookahead information                    |
- +--------------+------------------------------------------+
- | 2 to 4       | Level 1 + intra/inter modes, ref's       |
- +--------------+------------------------------------------+
- | 5 and 6      | Level 2 + rect-amp                       |
- +--------------+------------------------------------------+
- | 7            | Level 5 + AVC size CU refinement         |
- +--------------+------------------------------------------+
- | 8 and 9      | Level 5 + AVC size Full CU analysis-info |
- +--------------+------------------------------------------+
- | 10           | Level 5 + Full CU analysis-info          |
- +--------------+------------------------------------------+
+ +--------------+----------------------------------------------------+
+ | Level        | Description                                        |
+ +==============+====================================================+
+ | 1            | Lookahead information                              |
+ +--------------+----------------------------------------------------+
+ | 2 to 4       | Level 1 + intra/inter modes, ref's, cutree offsets |
+ +--------------+----------------------------------------------------+
+ | 5 and 6      | Level 2 + rect-amp                                 |
+ +--------------+----------------------------------------------------+
+ | 7            | Level 5 + AVC size CU refinement                   |
+ +--------------+----------------------------------------------------+
+ | 8 and 9      | Level 5 + AVC size Full CU analysis-info           |
+ +--------------+----------------------------------------------------+
+ | 10           | Level 5 + Full CU analysis-info                    |
+ +--------------+----------------------------------------------------+

 .. option:: --refine-mv-type <string>

diff --git a/source/abrEncApp.cpp b/source/abrEncApp.cpp
index fa62ebf63..4c6964cfb 100644
--- a/source/abrEncApp.cpp
+++ b/source/abrEncApp.cpp
@@ -99,6 +99,8 @@ namespace X265_NS {
             }

             m_analysisBuffer[pass] = X265_MALLOC(x265_analysis_data,
m_queueSize);
+            if (m_analysisBuffer[pass])
+                memset(m_analysisBuffer[pass], 0,
sizeof(x265_analysis_data) * m_queueSize);
             m_picIdxReadCnt[pass] = new ThreadSafeInteger[m_queueSize];
             m_analysisWrite[pass] = new ThreadSafeInteger[m_queueSize];
             m_analysisRead[pass] = new ThreadSafeInteger[m_queueSize];
@@ -340,7 +342,12 @@ namespace X265_NS {
             memcpy(intraDst->partSizes, intraSrc->partSizes, sizeof(char)
* src->depthBytes);
             memcpy(intraDst->chromaModes, intraSrc->chromaModes,
sizeof(uint8_t) * src->depthBytes);
             if (m_param->rc.cuTree)
-                memcpy(intraDst->cuQPOff, intraSrc->cuQPOff,
sizeof(int8_t) * src->depthBytes);
+            {
+                if (m_param->analysisSaveReuseLevel == 10)
+                    memcpy(intraDst->cuQPOff, intraSrc->cuQPOff,
sizeof(int8_t) * src->depthBytes);
+                else
+                    memcpy(intraDst->cuQPOff, intraSrc->cuQPOff,
sizeof(int8_t) * src->numCUsInFrame *
X265_MAX_CTU_SPLITS(m_param->maxCUSize, m_param->minCUSize));
+            }
         }
         else
         {
@@ -355,7 +362,12 @@ namespace X265_NS {
             memcpy(interDst->depth, interSrc->depth, sizeof(uint8_t) *
src->depthBytes);
             memcpy(interDst->modes, interSrc->modes, sizeof(uint8_t) *
src->depthBytes);
             if (m_param->rc.cuTree)
-                memcpy(interDst->cuQPOff, interSrc->cuQPOff,
sizeof(int8_t) * src->depthBytes);
+            {
+                if (m_param->analysisSaveReuseLevel == 10)
+                    memcpy(interDst->cuQPOff, interSrc->cuQPOff,
sizeof(int8_t) * src->depthBytes);
+                else
+                    memcpy(interDst->cuQPOff, interSrc->cuQPOff,
sizeof(int8_t) * src->numCUsInFrame *
X265_MAX_CTU_SPLITS(m_param->maxCUSize, m_param->minCUSize));
+            }
             if (m_param->analysisSaveReuseLevel > 4)
             {
                 memcpy(interDst->partSize, interSrc->partSize,
sizeof(uint8_t) * src->depthBytes);
diff --git a/source/common/common.h b/source/common/common.h
index 8c06cd79e..a8f3ae71a 100644
--- a/source/common/common.h
+++ b/source/common/common.h
@@ -343,6 +343,10 @@ typedef int16_t  coeff_t;      // transform coefficient

 namespace X265_NS {

+const uint8_t g_maxCtuSplits[MIN_LOG2_CU_SIZE + 1] = { 1, 5, 21, 85 }; /*
max ctu partitions as per min max cu configurations */
+
+#define X265_MAX_CTU_SPLITS(maxcusize, mincusize)
(g_maxCtuSplits[g_log2Size[maxcusize] - g_log2Size[mincusize]])
+
 enum { SAO_NUM_OFFSET = 4 };

 enum SaoMergeMode
diff --git a/source/common/cudata.h b/source/common/cudata.h
index 8397f0568..c7d9a1972 100644
--- a/source/common/cudata.h
+++ b/source/common/cudata.h
@@ -371,7 +371,7 @@ struct CUDataMemPool
             CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL) *
numInstances);
         }
         else
-        {
+        {
             uint32_t sizeC = sizeL >> (CHROMA_H_SHIFT(csp) +
CHROMA_V_SHIFT(csp));
             CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL + sizeC * 2) *
numInstances);
         }
diff --git a/source/encoder/analysis.cpp b/source/encoder/analysis.cpp
index aabf386ca..6e1d5c730 100644
--- a/source/encoder/analysis.cpp
+++ b/source/encoder/analysis.cpp
@@ -74,6 +74,7 @@ Analysis::Analysis()
 {
     m_reuseInterDataCTU = NULL;
     m_reuseRef = NULL;
+    m_reuseQPOff = NULL;
     m_bHD = false;
     m_modeFlag[0] = false;
     m_modeFlag[1] = false;
@@ -220,6 +221,9 @@ Mode& Analysis::compressCTU(CUData& ctu, Frame& frame,
const CUGeom& cuGeom, con
         if (m_param->analysisSave && !m_param->analysisLoad)
             for (int i = 0; i < X265_MAX_PRED_MODE_PER_CTU * numPredDir;
i++)
                 m_reuseRef[i] = -1;
+
+        if (m_param->rc.cuTree)
+            m_reuseQPOff = &m_reuseInterDataCTU->cuQPOff[ctu.m_cuAddr *
X265_MAX_CTU_SPLITS(m_param->maxCUSize, m_param->minCUSize)];
     }
     ProfileCUScope(ctu, totalCTUTime, totalCTUs);

@@ -233,6 +237,8 @@ Mode& Analysis::compressCTU(CUData& ctu, Frame& frame,
const CUGeom& cuGeom, con
             memcpy(ctu.m_partSize, &intraDataCTU->partSizes[ctu.m_cuAddr *
numPartition], sizeof(char) * numPartition);
             memcpy(ctu.m_chromaIntraDir,
&intraDataCTU->chromaModes[ctu.m_cuAddr * numPartition], sizeof(uint8_t) *
numPartition);
         }
+        if (m_param->rc.cuTree && reuseLevel > 1 && reuseLevel < 10)
+            m_reuseQPOff = &intraDataCTU->cuQPOff[ctu.m_cuAddr *
X265_MAX_CTU_SPLITS(m_param->maxCUSize, m_param->minCUSize)];
         compressIntraCU(ctu, cuGeom, qp);
     }
     else
@@ -520,6 +526,9 @@ uint64_t Analysis::compressIntraCU(const CUData&
parentCTU, const CUGeom& cuGeom
     bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
     bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);

+    if (m_param->rc.cuTree  && m_param->analysisSaveReuseLevel > 1 &&
m_param->analysisSaveReuseLevel < 10)
+        m_reuseQPOff[cuGeom.geomRecurId] = (int8_t)(qp -
(int32_t)(m_frame->m_encData->m_cuStat[parentCTU.m_cuAddr].baseQp + 0.5));
+
     bool bAlreadyDecided = m_param->intraRefine != 4 &&
parentCTU.m_lumaIntraDir[cuGeom.absPartIdx] != (uint8_t)ALL_IDX &&
!(m_param->bAnalysisType == HEVC_INFO);
     bool bDecidedDepth = m_param->intraRefine != 4 &&
parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth;
     int split = 0;
@@ -870,6 +879,9 @@ uint32_t Analysis::compressInterCU_dist(const CUData&
parentCTU, const CUGeom& c
     uint32_t minDepth = m_param->rdLevel <= 4 ? topSkipMinDepth(parentCTU,
cuGeom) : 0;
     uint32_t splitRefs[4] = { 0, 0, 0, 0 };

+    if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 &&
m_param->analysisSaveReuseLevel < 10)
+        m_reuseQPOff[cuGeom.geomRecurId] = (int8_t)(qp -
(int32_t)(m_frame->m_encData->m_cuStat[parentCTU.m_cuAddr].baseQp + 0.5));
+
     X265_CHECK(m_param->rdLevel >= 2, "compressInterCU_dist does not
support RD 0 or 1\n");

     PMODE pmode(*this, cuGeom);
@@ -1152,6 +1164,8 @@ SplitData Analysis::compressInterCU_rd0_4(const
CUData& parentCTU, const CUGeom&
     uint32_t cuAddr = parentCTU.m_cuAddr;
     ModeDepth& md = m_modeDepth[depth];

+    if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 &&
m_param->analysisSaveReuseLevel < 10)
+        m_reuseQPOff[cuGeom.geomRecurId] = (int8_t)(qp -
(int32_t)(m_frame->m_encData->m_cuStat[parentCTU.m_cuAddr].baseQp + 0.5));

     if (m_param->searchMethod == X265_SEA)
     {
@@ -1856,6 +1870,9 @@ SplitData Analysis::compressInterCU_rd5_6(const
CUData& parentCTU, const CUGeom&
     ModeDepth& md = m_modeDepth[depth];
     md.bestMode = NULL;

+    if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 &&
m_param->analysisSaveReuseLevel < 10)
+        m_reuseQPOff[cuGeom.geomRecurId] = (int8_t)(qp -
(int32_t)(m_frame->m_encData->m_cuStat[parentCTU.m_cuAddr].baseQp + 0.5));
+
     if (m_param->searchMethod == X265_SEA)
     {
         int numPredDir = m_slice->isInterP() ? 1 : 2;
@@ -3647,11 +3664,20 @@ int Analysis::calculateQpforCuSize(const CUData&
ctu, const CUGeom& cuGeom, int3

     if (m_param->analysisLoadReuseLevel >= 2 && m_param->rc.cuTree)
     {
-        int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) +
cuGeom.absPartIdx;
+        int cuIdx;
+        int8_t cuQPOffSet = 0;
+
+        if (m_param->scaleFactor == 2 || m_param->analysisLoadReuseLevel
== 10)
+            cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) +
cuGeom.absPartIdx;
+        else
+            cuIdx = (ctu.m_cuAddr *
X265_MAX_CTU_SPLITS(m_param->maxCUSize, m_param->minCUSize)) +
cuGeom.geomRecurId;
+
         if (ctu.m_slice->m_sliceType == I_SLICE)
-            return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
(int32_t)(qp + 0.5 +
((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx]));
+            cuQPOffSet =
((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx];
         else
-            return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
(int32_t)(qp + 0.5 +
((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx]));
+            cuQPOffSet =
((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx];
+
+        return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
(int32_t)(qp + 0.5 + cuQPOffSet));
     }
     if (m_param->rc.hevcAq)
     {
diff --git a/source/encoder/analysis.h b/source/encoder/analysis.h
index 3bcb56bc3..de01f9789 100644
--- a/source/encoder/analysis.h
+++ b/source/encoder/analysis.h
@@ -126,6 +126,7 @@ protected:
     int32_t*                   m_reuseRef;
     uint8_t*                   m_reuseDepth;
     uint8_t*                   m_reuseModes;
+    int8_t*                    m_reuseQPOff; // array of QP values for
analysis reuse at reuse levels > 1 and < 10 when cutree is enabled
     uint8_t*                   m_reusePartSize;
     uint8_t*                   m_reuseMergeFlag;
     x265_analysis_MV*          m_reuseMv[2];
diff --git a/source/encoder/api.cpp b/source/encoder/api.cpp
index a986355e0..c263e2a87 100644
--- a/source/encoder/api.cpp
+++ b/source/encoder/api.cpp
@@ -825,7 +825,16 @@ void x265_alloc_analysis_data(x265_param *param,
x265_analysis_data* analysis)
         CHECKED_MALLOC_ZERO(intraData->partSizes, char,
analysis->numPartitions * analysis->numCUsInFrame);
         CHECKED_MALLOC_ZERO(intraData->chromaModes, uint8_t,
analysis->numPartitions * analysis->numCUsInFrame);
         if (param->rc.cuTree)
-            CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t,
analysis->numPartitions * analysis->numCUsInFrame);
+        {
+            if (maxReuseLevel == 10)
+            {
+                CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t,
analysis->numPartitions * analysis->numCUsInFrame);
+            }
+            else
+            {
+                CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t,
X265_MAX_CTU_SPLITS(param->maxCUSize, param->minCUSize) *
analysis->numCUsInFrame);
+            }
+        }
     }
     analysis->intraData = intraData;

@@ -837,7 +846,16 @@ void x265_alloc_analysis_data(x265_param *param,
x265_analysis_data* analysis)
         CHECKED_MALLOC_ZERO(interData->modes, uint8_t,
analysis->numPartitions * analysis->numCUsInFrame);

         if (param->rc.cuTree && !isMultiPassOpt)
-            CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t,
analysis->numPartitions * analysis->numCUsInFrame);
+        {
+            if (maxReuseLevel == 10)
+            {
+                CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t,
analysis->numPartitions * analysis->numCUsInFrame);
+            }
+            else
+            {
+                CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t,
X265_MAX_CTU_SPLITS(param->maxCUSize, param->minCUSize) *
analysis->numCUsInFrame);
+            }
+        }
         CHECKED_MALLOC_ZERO(interData->mvpIdx[0], uint8_t,
analysis->numPartitions * analysis->numCUsInFrame);
         CHECKED_MALLOC_ZERO(interData->mvpIdx[1], uint8_t,
analysis->numPartitions * analysis->numCUsInFrame);
         CHECKED_MALLOC_ZERO(interData->mv[0], x265_analysis_MV,
analysis->numPartitions * analysis->numCUsInFrame);
@@ -919,7 +937,9 @@ void x265_free_analysis_data(x265_param *param,
x265_analysis_data* analysis)
             X265_FREE((analysis->intraData)->partSizes);
             X265_FREE((analysis->intraData)->chromaModes);
             if (param->rc.cuTree)
-                X265_FREE((analysis->intraData)->cuQPOff);
+            {
+                X265_FREE_ZERO((analysis->intraData)->cuQPOff);
+            }
         }
         X265_FREE(analysis->intraData);
         analysis->intraData = NULL;
@@ -931,7 +951,9 @@ void x265_free_analysis_data(x265_param *param,
x265_analysis_data* analysis)
         X265_FREE((analysis->interData)->depth);
         X265_FREE((analysis->interData)->modes);
         if (!isMultiPassOpt && param->rc.cuTree)
+        {
             X265_FREE((analysis->interData)->cuQPOff);
+        }
         X265_FREE((analysis->interData)->mvpIdx[0]);
         X265_FREE((analysis->interData)->mvpIdx[1]);
         X265_FREE((analysis->interData)->mv[0]);
diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp
index 1f710e1ce..d64b07848 100644
--- a/source/encoder/encoder.cpp
+++ b/source/encoder/encoder.cpp
@@ -4444,6 +4444,17 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
             }
         }
     }
+
+    int8_t *cuQPBuf = NULL;
+    uint32_t reuseBufSize = 0;
+    if (m_param->rc.cuTree)
+    {
+        if (m_param->analysisLoadReuseLevel == 10)
+            cuQPBuf = X265_MALLOC(int8_t, depthBytes);
+        else if (m_param->analysisLoadReuseLevel > 1)
+            reuseBufSize = X265_MAX_CTU_SPLITS(m_param->maxCUSize,
m_param->minCUSize) * analysis->numCUsInFrame;
+    }
+
     if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType ==
X265_TYPE_I)
     {
         if (m_param->bAnalysisType == HEVC_INFO)
@@ -4452,19 +4463,26 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
             return;

         uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
*partSizes = NULL;
-        int8_t *cuQPBuf = NULL;

         tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);
         depthBuf = tempBuf;
         modeBuf = tempBuf + depthBytes;
         partSizes = tempBuf + 2 * depthBytes;
-        if (m_param->rc.cuTree)
-            cuQPBuf = X265_MALLOC(int8_t, depthBytes);

         X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
m_analysisFileIn, intraPic->depth);
         X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn,
intraPic->chromaModes);
         X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
m_analysisFileIn, intraPic->partSizes);
-        if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
depthBytes, m_analysisFileIn, intraPic->cuQPOff); }
+        if (m_param->rc.cuTree)
+        {
+            if (m_param->analysisLoadReuseLevel == 10)
+            {
+                X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
m_analysisFileIn, intraPic->cuQPOff);
+            }
+            else if (m_param->analysisLoadReuseLevel > 1)
+            {
+                X265_FREAD(analysis->intraData->cuQPOff, sizeof(int8_t),
reuseBufSize, m_analysisFileIn, intraPic->cuQPOff);
+            }
+        }

         size_t count = 0;
         for (uint32_t d = 0; d < depthBytes; d++)
@@ -4480,7 +4498,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
             memset(&(analysis->intraData)->depth[count], depthBuf[d],
bytes);
             memset(&(analysis->intraData)->chromaModes[count], modeBuf[d],
bytes);
             memset(&(analysis->intraData)->partSizes[count], partSizes[d],
bytes);
-            if (m_param->rc.cuTree)
+            if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel ==
10)
                 memset(&(analysis->intraData)->cuQPOff[count], cuQPBuf[d],
bytes);
             count += bytes;
         }
@@ -4497,7 +4515,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
                 memset(&(analysis->intraData)->modes[cnt],
tempLumaBuf[ctu32Idx], factor);
             X265_FREE(tempLumaBuf);
         }
-        if (m_param->rc.cuTree)
+        if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel == 10)
             X265_FREE(cuQPBuf);
         X265_FREE(tempBuf);
         consumedBytes += frameRecordSize;
@@ -4515,7 +4533,6 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
         uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];
         MV* mv[2];
         int8_t* refIdx[2];
-        int8_t* cuQPBuf = NULL;

         int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2;
         bool bIntraInInter = false;
@@ -4535,12 +4552,20 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
             tempBuf = X265_MALLOC(uint8_t, depthBytes * numBuf);
             depthBuf = tempBuf;
             modeBuf = tempBuf + depthBytes;
-            if (m_param->rc.cuTree)
-                cuQPBuf = X265_MALLOC(int8_t, depthBytes);

             X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
m_analysisFileIn, interPic->depth);
             X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
m_analysisFileIn, interPic->modes);
-            if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
depthBytes, m_analysisFileIn, interPic->cuQPOff); }
+            if (m_param->rc.cuTree)
+            {
+                if (m_param->analysisLoadReuseLevel == 10)
+                {
+                    X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
m_analysisFileIn, interPic->cuQPOff);
+                }
+                else if (m_param->analysisLoadReuseLevel > 1)
+                {
+                    X265_FREAD(analysis->interData->cuQPOff,
sizeof(int8_t), reuseBufSize, m_analysisFileIn, interPic->cuQPOff);
+                }
+            }

             if (m_param->analysisLoadReuseLevel > 4)
             {
@@ -4578,7 +4603,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
                     depthBuf[d] = 1;
                 memset(&(analysis->interData)->depth[count], depthBuf[d],
bytes);
                 memset(&(analysis->interData)->modes[count], modeBuf[d],
bytes);
-                if (m_param->rc.cuTree)
+                if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel
== 10)
                     memset(&(analysis->interData)->cuQPOff[count],
cuQPBuf[d], bytes);
                 if (m_param->analysisLoadReuseLevel > 4)
                 {
@@ -4612,7 +4637,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
                 count += bytes;
             }

-            if (m_param->rc.cuTree)
+            if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel ==
10)
                 X265_FREE(cuQPBuf);
             X265_FREE(tempBuf);
         }
@@ -4736,7 +4761,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
     int numPartitions = analysis->numPartitions;
     int numCUsInFrame = analysis->numCUsInFrame;
     int numCuInHeight = analysis->numCuInHeight;
-    /* Allocate memory for scaled resoultion's numPartitions and
numCUsInFrame*/
+    /* Allocate memory for scaled resolution's numPartitions and
numCUsInFrame */
     analysis->numPartitions = m_param->num4x4Partitions;
     analysis->numCUsInFrame = cuLoc.heightInCU * cuLoc.widthInCU;
     analysis->numCuInHeight = cuLoc.heightInCU;
@@ -4808,25 +4833,42 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
         X265_FREE(vbvCostBuf);
     }

+    uint32_t reuseBufSize = 0;
+    int8_t *cuQPBuf = NULL;
+    if (m_param->rc.cuTree)
+    {
+        if (m_param->analysisLoadReuseLevel == 10)
+            cuQPBuf = X265_MALLOC(int8_t, depthBytes);
+        else if (m_param->analysisLoadReuseLevel > 1)
+            reuseBufSize = (X265_MAX_CTU_SPLITS(m_param->maxCUSize,
m_param->minCUSize) / factor) * (analysis->numCUsInFrame);
+    }
+
     if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType ==
X265_TYPE_I)
     {
         if (m_param->analysisLoadReuseLevel < 2)
             return;

         uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
*partSizes = NULL;
-        int8_t *cuQPBuf = NULL;

         tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);
         depthBuf = tempBuf;
         modeBuf = tempBuf + depthBytes;
         partSizes = tempBuf + 2 * depthBytes;
-        if (m_param->rc.cuTree)
-            cuQPBuf = X265_MALLOC(int8_t, depthBytes);

         X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
m_analysisFileIn, intraPic->depth);
         X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn,
intraPic->chromaModes);
         X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
m_analysisFileIn, intraPic->partSizes);
-        if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
depthBytes, m_analysisFileIn, intraPic->cuQPOff); }
+        if (m_param->rc.cuTree)
+        {
+            if (m_param->analysisLoadReuseLevel == 10)
+            {
+                X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
m_analysisFileIn, intraPic->cuQPOff);
+            }
+            else if (m_param->analysisLoadReuseLevel > 1)
+            {
+                X265_FREAD(analysis->intraData->cuQPOff, sizeof(int8_t),
reuseBufSize, m_analysisFileIn, intraPic->cuQPOff);
+            }
+        }

         uint32_t count = 0;
         for (uint32_t d = 0; d < depthBytes; d++)
@@ -4848,7 +4890,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
                 memset(&(analysis->intraData)->depth[count], depthBuf[d],
bytes);
                 memset(&(analysis->intraData)->chromaModes[count],
modeBuf[d], bytes);
                 memset(&(analysis->intraData)->partSizes[count],
partSizes[d], bytes);
-                if (m_param->rc.cuTree)
+                if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel
== 10)
                     memset(&(analysis->intraData)->cuQPOff[count],
cuQPBuf[d], bytes);
                 count += bytes;
                 d += getCUIndex(&cuLoc, &count, bytes, 1);
@@ -4868,7 +4910,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
             ctu32Idx += getCUIndex(&cuLoc, &cnt, factor, 0);
         }
         X265_FREE(tempLumaBuf);
-        if (m_param->rc.cuTree)
+        if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel == 10)
             X265_FREE(cuQPBuf);
         X265_FREE(tempBuf);
         consumedBytes += frameRecordSize;
@@ -4886,7 +4928,6 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
         uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];
         MV* mv[2];
         int8_t* refIdx[2];
-        int8_t* cuQPBuf = NULL;

         int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2;
         bool bIntraInInter = false;
@@ -4900,12 +4941,21 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
         tempBuf = X265_MALLOC(uint8_t, depthBytes * numBuf);
         depthBuf = tempBuf;
         modeBuf = tempBuf + depthBytes;
-        if (m_param->rc.cuTree)
-            cuQPBuf = X265_MALLOC(int8_t, depthBytes);

         X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
m_analysisFileIn, interPic->depth);
         X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn,
interPic->modes);
-        if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
depthBytes, m_analysisFileIn, interPic->cuQPOff); }
+        if (m_param->rc.cuTree)
+        {
+            if (m_param->analysisLoadReuseLevel == 10)
+            {
+                X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
m_analysisFileIn, interPic->cuQPOff);
+            }
+            else if (m_param->analysisLoadReuseLevel > 1)
+            {
+                X265_FREAD(analysis->interData->cuQPOff, sizeof(int8_t),
reuseBufSize, m_analysisFileIn, interPic->cuQPOff);
+            }
+        }
+
         if (m_param->analysisLoadReuseLevel > 4)
         {
             partSize = modeBuf + depthBytes;
@@ -4954,7 +5004,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
             {
                 memset(&(analysis->interData)->depth[count], writeDepth,
bytes);
                 memset(&(analysis->interData)->modes[count], modeBuf[d],
bytes);
-                if (m_param->rc.cuTree)
+                if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel
== 10)
                     memset(&(analysis->interData)->cuQPOff[count],
cuQPBuf[d], bytes);
                 if (m_param->analysisLoadReuseLevel == 10 && bIntraInInter)
                     memset(&(analysis->intraData)->chromaModes[count],
chromaDir[d], bytes);
@@ -5016,7 +5066,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
             }
         }

-        if (m_param->rc.cuTree)
+        if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel == 10)
             X265_FREE(cuQPBuf);
         X265_FREE(tempBuf);

@@ -5046,7 +5096,9 @@ void Encoder::readAnalysisFile(x265_analysis_data*
analysis, int curPoc, const x
             }
         }
         else
+        {
             X265_FREAD((analysis->interData)->ref, sizeof(int32_t),
analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir,
m_analysisFileIn, interPic->ref);
+        }

         consumedBytes += frameRecordSize;
         if (numDir == 1)
@@ -5510,8 +5562,11 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
analysis, FrameData &curEncD
         analysis->frameRecordSize += analysis->numCUsInFrame *
sizeof(sse_t);
     }

+    uint32_t reuseQPBufsize = 0;
     if (m_param->analysisSaveReuseLevel > 1)
     {
+        if (m_param->rc.cuTree)
+            reuseQPBufsize = X265_MAX_CTU_SPLITS(m_param->maxCUSize,
m_param->minCUSize) * analysis->numCUsInFrame;

         if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType ==
X265_TYPE_I)
         {
@@ -5536,12 +5591,15 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
analysis, FrameData &curEncD
                     partSize = ctu->m_partSize[absPartIdx];
                     intraDataCTU->partSizes[depthBytes] = partSize;

-                    if (m_param->rc.cuTree)
+                    if (m_param->rc.cuTree &&
m_param->analysisSaveReuseLevel == 10)
                         intraDataCTU->cuQPOff[depthBytes] =
(int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP);
                     absPartIdx += ctu->m_numPartitions >> (depth * 2);
                 }
+
                 memcpy(&intraDataCTU->modes[ctu->m_cuAddr *
ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)*
ctu->m_numPartitions);
             }
+            if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel ==
10)
+                reuseQPBufsize = depthBytes;
         }
         else
         {
@@ -5567,7 +5625,7 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
analysis, FrameData &curEncD
                         predMode = 4; // used as indicator if the block is
coded as bidir

                     interDataCTU->modes[depthBytes] = predMode;
-                    if (m_param->rc.cuTree)
+                    if (m_param->rc.cuTree &&
m_param->analysisSaveReuseLevel == 10)
                         interDataCTU->cuQPOff[depthBytes] =
(int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP);

                     if (m_param->analysisSaveReuseLevel > 4)
@@ -5599,21 +5657,25 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
analysis, FrameData &curEncD
                     }
                     absPartIdx += ctu->m_numPartitions >> (depth * 2);
                 }
+
                 if (m_param->analysisSaveReuseLevel == 10 && bIntraInInter)
                     memcpy(&intraDataCTU->modes[ctu->m_cuAddr *
ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)*
ctu->m_numPartitions);
             }
+            if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel ==
10)
+                reuseQPBufsize = depthBytes;
         }

-        if ((analysis->sliceType == X265_TYPE_IDR || analysis->sliceType
== X265_TYPE_I) && m_param->rc.cuTree)
-            analysis->frameRecordSize += sizeof(uint8_t)*
analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 +
(sizeof(int8_t) * depthBytes);
+        if ((analysis->sliceType == X265_TYPE_IDR || analysis->sliceType
== X265_TYPE_I) && (m_param->rc.cuTree && m_param->analysisSaveReuseLevel >
1))
+            analysis->frameRecordSize += sizeof(uint8_t)*
analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 +
(sizeof(int8_t) * reuseQPBufsize);
         else if (analysis->sliceType == X265_TYPE_IDR ||
analysis->sliceType == X265_TYPE_I)
             analysis->frameRecordSize += sizeof(uint8_t)*
analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3;
         else
         {
             /* Add sizeof depth, modes, partSize, cuQPOffset, mergeFlag */
             analysis->frameRecordSize += depthBytes * 2;
-            if (m_param->rc.cuTree)
-            analysis->frameRecordSize += (sizeof(int8_t) * depthBytes);
+            if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1)
+                analysis->frameRecordSize += (sizeof(int8_t) *
reuseQPBufsize);
+
             if (m_param->analysisSaveReuseLevel > 4)
                 analysis->frameRecordSize += (depthBytes * 2);

@@ -5669,7 +5731,7 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
analysis, FrameData &curEncD
         X265_FWRITE((analysis->intraData)->chromaModes, sizeof(uint8_t),
depthBytes, m_analysisFileOut);
         X265_FWRITE((analysis->intraData)->partSizes, sizeof(char),
depthBytes, m_analysisFileOut);
         if (m_param->rc.cuTree)
-            X265_FWRITE((analysis->intraData)->cuQPOff, sizeof(int8_t),
depthBytes, m_analysisFileOut);
+            X265_FWRITE((analysis->intraData)->cuQPOff, sizeof(int8_t),
reuseQPBufsize, m_analysisFileOut);
         X265_FWRITE((analysis->intraData)->modes, sizeof(uint8_t),
analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileOut);
     }
     else
@@ -5677,7 +5739,7 @@ void Encoder::writeAnalysisFile(x265_analysis_data*
analysis, FrameData &curEncD
         X265_FWRITE((analysis->interData)->depth, sizeof(uint8_t),
depthBytes, m_analysisFileOut);
         X265_FWRITE((analysis->interData)->modes, sizeof(uint8_t),
depthBytes, m_analysisFileOut);
         if (m_param->rc.cuTree)
-            X265_FWRITE((analysis->interData)->cuQPOff, sizeof(int8_t),
depthBytes, m_analysisFileOut);
+            X265_FWRITE((analysis->interData)->cuQPOff, sizeof(int8_t),
reuseQPBufsize, m_analysisFileOut);
         if (m_param->analysisSaveReuseLevel > 4)
         {
             X265_FWRITE((analysis->interData)->partSize, sizeof(uint8_t),
depthBytes, m_analysisFileOut);
@@ -5762,7 +5824,7 @@ void
Encoder::writeAnalysisFileRefine(x265_analysis_data* analysis, FrameData &c
                     interData->mv[1][depthBytes].word =
ctu->m_mv[1][absPartIdx].word;
                     interData->mvpIdx[1][depthBytes] =
ctu->m_mvpIdx[1][absPartIdx];
                     ref[1][depthBytes] = ctu->m_refIdx[1][absPartIdx];
-                    predMode = 4; // used as indiacator if the block is
coded as bidir
+                    predMode = 4; // used as indicator if the block is
coded as bidir
                 }
                 interData->modes[depthBytes] = predMode;

diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
index 0adb0d0db..9bee58192 100644
--- a/source/encoder/slicetype.cpp
+++ b/source/encoder/slicetype.cpp
@@ -1894,7 +1894,7 @@ void Lookahead::slicetypeAnalyse(Lowres **frames,
bool bKeyframe)

     if (!framecnt)
     {
-        if (m_param->rc.cuTree)
+        if (m_param->rc.cuTree && (!m_param->analysisLoad ||
(m_param->analysisLoad && m_param->analysisLoadReuseLevel == 1)))
             cuTree(frames, 0, bKeyframe);
         return;
     }
diff --git a/source/x265.h b/source/x265.h
index f44040ba7..8d7a75826 100644
--- a/source/x265.h
+++ b/source/x265.h
@@ -144,7 +144,7 @@ typedef struct x265_analysis_intra_data
     uint8_t*  modes;
     char*     partSizes;
     uint8_t*  chromaModes;
-    int8_t*    cuQPOff;
+    int8_t*   cuQPOff;
 }x265_analysis_intra_data;

 typedef struct x265_analysis_MV
@@ -167,7 +167,7 @@ typedef struct x265_analysis_inter_data
     uint8_t*    interDir;
     uint8_t*    mvpIdx[2];
     int8_t*     refIdx[2];
-    x265_analysis_MV*         mv[2];
+    x265_analysis_MV* mv[2];
     int64_t*     sadCost;
     int8_t*    cuQPOff;
 }x265_analysis_inter_data;
-- 
2.20.1.windows.1


-- 
*With Regards,*
*Srikanth Kurapati.*
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20210201/2c100c44/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-fix-corrects-output-mismatch-for-cutree-enabled-anal.patch
Type: application/octet-stream
Size: 36517 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20210201/2c100c44/attachment-0001.obj>


More information about the x265-devel mailing list