[x265] [PATCH] x265 - cuTree offset analysis reuse

Akil akil at multicorewareinc.com
Mon Dec 10 06:47:00 CET 2018


# HG changeset patch
# User Akil <akil at multicorewareinc.com>
# Date 1539754718 -19800
#      Wed Oct 17 11:08:38 2018 +0530
# Node ID 4eda86eadc691bfb9d86425d2bc7621dee06525e
# Parent  f74003e88622dafc62f6c3c50720872df4d928bc
Cutree offset for analysis reuse

diff -r f74003e88622 -r 4eda86eadc69 source/common/cudata.cpp
--- a/source/common/cudata.cpp Thu Nov 22 15:02:08 2018 +0530
+++ b/source/common/cudata.cpp Wed Oct 17 11:08:38 2018 +0530
@@ -193,6 +193,7 @@
         uint8_t *charBuf = dataPool.charMemBlock + (m_numPartitions *
(BytesPerPartition - 4)) * instance;

         m_qp        = (int8_t*)charBuf; charBuf += m_numPartitions;
+        m_qpAnalysis = (int8_t*)charBuf; charBuf += m_numPartitions;
         m_log2CUSize         = charBuf; charBuf += m_numPartitions;
         m_lumaIntraDir       = charBuf; charBuf += m_numPartitions;
         m_tqBypass           = charBuf; charBuf += m_numPartitions;
@@ -233,6 +234,7 @@
         uint8_t *charBuf = dataPool.charMemBlock + (m_numPartitions *
BytesPerPartition) * instance;

         m_qp        = (int8_t*)charBuf; charBuf += m_numPartitions;
+        m_qpAnalysis = (int8_t*)charBuf; charBuf += m_numPartitions;
         m_log2CUSize         = charBuf; charBuf += m_numPartitions;
         m_lumaIntraDir       = charBuf; charBuf += m_numPartitions;
         m_tqBypass           = charBuf; charBuf += m_numPartitions;
@@ -291,6 +293,7 @@

     /* sequential memsets */
     m_partSet((uint8_t*)m_qp, (uint8_t)qp);
+    m_partSet((uint8_t*)m_qpAnalysis, (uint8_t)qp);
     m_partSet(m_log2CUSize,   (uint8_t)m_slice->m_param->maxLog2CUSize);
     m_partSet(m_lumaIntraDir, (uint8_t)ALL_IDX);
     m_partSet(m_chromaIntraDir, (uint8_t)ALL_IDX);
@@ -304,7 +307,7 @@
     X265_CHECK(!(frame.m_encData->m_param->bLossless &&
!m_slice->m_pps->bTransquantBypassEnabled), "lossless enabled without
TQbypass in PPS\n");

     /* initialize the remaining CU data in one memset */
-    memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ?
BytesPerPartition - 11 : BytesPerPartition - 7) * m_numPartitions);
+    memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ?
BytesPerPartition - 12 : BytesPerPartition - 8) * m_numPartitions);

     for (int8_t i = 0; i < NUM_TU_DEPTH; i++)
         m_refTuDepth[i] = -1;
@@ -344,6 +347,7 @@
     X265_CHECK(m_numPartitions == cuGeom.numPartitions, "initSubCU() size
mismatch\n");

     m_partSet((uint8_t*)m_qp, (uint8_t)qp);
+    m_partSet((uint8_t*)m_qpAnalysis, (uint8_t)qp);

     m_partSet(m_log2CUSize,   (uint8_t)cuGeom.log2CUSize);
     m_partSet(m_lumaIntraDir, (uint8_t)ALL_IDX);
@@ -354,7 +358,7 @@
     m_partSet(m_cuDepth,      (uint8_t)cuGeom.depth);

     /* initialize the remaining CU data in one memset */
-    memset(m_predMode, 0, (ctu.m_chromaFormat == X265_CSP_I400 ?
BytesPerPartition - 12 : BytesPerPartition - 8) * m_numPartitions);
+    memset(m_predMode, 0, (ctu.m_chromaFormat == X265_CSP_I400 ?
BytesPerPartition - 13 : BytesPerPartition - 9) * m_numPartitions);
     memset(m_distortion, 0, m_numPartitions * sizeof(sse_t));
 }

@@ -369,6 +373,7 @@
     m_bLastCuInSlice = subCU.m_bLastCuInSlice;

     m_subPartCopy((uint8_t*)m_qp + offset, (uint8_t*)subCU.m_qp);
+    m_subPartCopy((uint8_t*)m_qpAnalysis + offset,
(uint8_t*)subCU.m_qpAnalysis);
     m_subPartCopy(m_log2CUSize + offset, subCU.m_log2CUSize);
     m_subPartCopy(m_lumaIntraDir + offset, subCU.m_lumaIntraDir);
     m_subPartCopy(m_tqBypass + offset, subCU.m_tqBypass);
@@ -469,6 +474,7 @@
     CUData& ctu = *m_encData->getPicCTU(m_cuAddr);

     m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp);
+    m_partCopy((uint8_t*)ctu.m_qpAnalysis + m_absIdxInCTU,
(uint8_t*)m_qpAnalysis);
     m_partCopy(ctu.m_log2CUSize + m_absIdxInCTU, m_log2CUSize);
     m_partCopy(ctu.m_lumaIntraDir + m_absIdxInCTU, m_lumaIntraDir);
     m_partCopy(ctu.m_tqBypass + m_absIdxInCTU, m_tqBypass);
@@ -523,7 +529,11 @@
     m_numPartitions = cuGeom.numPartitions;

     /* copy out all prediction info for this part */
-    if (copyQp) m_partCopy((uint8_t*)m_qp, (uint8_t*)ctu.m_qp +
m_absIdxInCTU);
+    if (copyQp)
+    {
+        m_partCopy((uint8_t*)m_qp, (uint8_t*)ctu.m_qp + m_absIdxInCTU);
+        m_partCopy((uint8_t*)m_qpAnalysis, (uint8_t*)ctu.m_qpAnalysis +
m_absIdxInCTU);
+    }

     m_partCopy(m_log2CUSize,   ctu.m_log2CUSize + m_absIdxInCTU);
     m_partCopy(m_lumaIntraDir, ctu.m_lumaIntraDir + m_absIdxInCTU);
@@ -566,6 +576,7 @@
     CUData& ctu = *m_encData->getPicCTU(m_cuAddr);

     m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp);
+    m_partCopy((uint8_t*)ctu.m_qpAnalysis + m_absIdxInCTU,
(uint8_t*)m_qpAnalysis);
     m_partCopy(ctu.m_transformSkip[0] + m_absIdxInCTU, m_transformSkip[0]);
     m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode);
     m_partCopy(ctu.m_tuDepth + m_absIdxInCTU, m_tuDepth);
diff -r f74003e88622 -r 4eda86eadc69 source/common/cudata.h
--- a/source/common/cudata.h Thu Nov 22 15:02:08 2018 +0530
+++ b/source/common/cudata.h Wed Oct 17 11:08:38 2018 +0530
@@ -191,6 +191,7 @@

     /* Per-part data, stored contiguously */
     int8_t*       m_qp;               // array of QP values
+    int8_t*       m_qpAnalysis;       // array of QP values for analysis
reuse
     uint8_t*      m_log2CUSize;       // array of cu log2Size TODO: seems
redundant to depth
     uint8_t*      m_lumaIntraDir;     // array of intra directions (luma)
     uint8_t*      m_tqBypass;         // array of CU lossless flags
@@ -206,7 +207,7 @@
     uint8_t*      m_transformSkip[3]; // array of transform skipping flags
per plane
     uint8_t*      m_cbf[3];           // array of coded block flags (CBF)
per plane
     uint8_t*      m_chromaIntraDir;   // array of intra directions (chroma)
-    enum { BytesPerPartition = 23 };  // combined sizeof() of all per-part
data
+    enum { BytesPerPartition = 24 };  // combined sizeof() of all per-part
data

     sse_t*        m_distortion;
     coeff_t*      m_trCoeff[3];       // transformed coefficient buffer
per plane
diff -r f74003e88622 -r 4eda86eadc69 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Thu Nov 22 15:02:08 2018 +0530
+++ b/source/encoder/analysis.cpp Wed Oct 17 11:08:38 2018 +0530
@@ -3568,6 +3568,14 @@
             qp += distortionData->offset[ctu.m_cuAddr];
     }

+    if (m_param->analysisLoad && m_param->analysisReuseLevel == 10 &&
m_param->rc.cuTree)
+    {
+        int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) +
cuGeom.absPartIdx;
+        if (ctu.m_slice->m_sliceType == I_SLICE)
+            return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
(int32_t)(qp + 0.5 +
((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx]));
+        else
+            return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
(int32_t)(qp + 0.5 +
((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx]));
+    }
     int loopIncr = (m_param->rc.qgSize == 8) ? 8 : 16;

     /* Use cuTree offsets if cuTree enabled and frame is referenced, else
use AQ offsets */
diff -r f74003e88622 -r 4eda86eadc69 source/encoder/api.cpp
--- a/source/encoder/api.cpp Thu Nov 22 15:02:08 2018 +0530
+++ b/source/encoder/api.cpp Wed Oct 17 11:08:38 2018 +0530
@@ -453,6 +453,8 @@
     CHECKED_MALLOC(intraData->modes, uint8_t, analysis->numPartitions *
analysis->numCUsInFrame);
     CHECKED_MALLOC(intraData->partSizes, char, analysis->numPartitions *
analysis->numCUsInFrame);
     CHECKED_MALLOC(intraData->chromaModes, uint8_t,
analysis->numPartitions * analysis->numCUsInFrame);
+    if (param->rc.cuTree)
+        CHECKED_MALLOC(intraData->cuQPOff, int8_t, analysis->numPartitions
* analysis->numCUsInFrame);
     analysis->intraData = intraData;

     //Allocate memory for interData pointer based on ReuseLevels
@@ -460,6 +462,8 @@
     CHECKED_MALLOC(interData->depth, uint8_t, analysis->numPartitions *
analysis->numCUsInFrame);
     CHECKED_MALLOC(interData->modes, uint8_t, analysis->numPartitions *
analysis->numCUsInFrame);

+    if (param->rc.cuTree)
+        CHECKED_MALLOC(interData->cuQPOff, int8_t, analysis->numPartitions
* analysis->numCUsInFrame);
     CHECKED_MALLOC_ZERO(interData->mvpIdx[0], uint8_t,
analysis->numPartitions * analysis->numCUsInFrame);
     CHECKED_MALLOC_ZERO(interData->mvpIdx[1], uint8_t,
analysis->numPartitions * analysis->numCUsInFrame);
     CHECKED_MALLOC_ZERO(interData->mv[0], x265_analysis_MV,
analysis->numPartitions * analysis->numCUsInFrame);
@@ -537,6 +541,8 @@
         X265_FREE((analysis->intraData)->modes);
         X265_FREE((analysis->intraData)->partSizes);
         X265_FREE((analysis->intraData)->chromaModes);
+        if (param->rc.cuTree)
+            X265_FREE((analysis->intraData)->cuQPOff);
         X265_FREE(analysis->intraData);
         analysis->intraData = NULL;
     }
@@ -546,6 +552,8 @@
     {
         X265_FREE((analysis->interData)->depth);
         X265_FREE((analysis->interData)->modes);
+        if (param->rc.cuTree)
+            X265_FREE((analysis->interData)->cuQPOff);
         X265_FREE((analysis->interData)->mvpIdx[0]);
         X265_FREE((analysis->interData)->mvpIdx[1]);
         X265_FREE((analysis->interData)->mv[0]);
diff -r f74003e88622 -r 4eda86eadc69 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Thu Nov 22 15:02:08 2018 +0530
+++ b/source/encoder/encoder.cpp Wed Oct 17 11:08:38 2018 +0530
@@ -2745,12 +2745,6 @@
         p->bDistributeMotionEstimation = p->bDistributeModeAnalysis = 0;
     }

-    if ((p->analysisLoad || p->analysisSave) && p->rc.cuTree)
-    {
-        x265_log(p, X265_LOG_WARNING, "Analysis load/save options works
only with cu-tree off, Disabling cu-tree\n");
-        p->rc.cuTree = 0;
-    }
-
     if ((p->analysisLoad || p->analysisSave) &&
(p->analysisMultiPassRefine || p->analysisMultiPassDistortion))
     {
         x265_log(p, X265_LOG_WARNING, "Cannot use Analysis load/save
option and multi-pass-opt-analysis/multi-pass-opt-distortion together,"
@@ -3287,15 +3281,20 @@
             return;

         uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
*partSizes = NULL;
+        int8_t *cuQPBuf = NULL;

         tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);
         depthBuf = tempBuf;
         modeBuf = tempBuf + depthBytes;
         partSizes = tempBuf + 2 * depthBytes;
+        if (m_param->rc.cuTree)
+            cuQPBuf = X265_MALLOC(int8_t, depthBytes);

         X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
m_analysisFileIn, intraPic->depth);
         X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn,
intraPic->chromaModes);
         X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
m_analysisFileIn, intraPic->partSizes);
+        if (m_param->rc.cuTree)
+            X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
m_analysisFileIn, intraPic->cuQPOff);

         size_t count = 0;
         for (uint32_t d = 0; d < depthBytes; d++)
@@ -3311,6 +3310,8 @@
             memset(&(analysis->intraData)->depth[count], depthBuf[d],
bytes);
             memset(&(analysis->intraData)->chromaModes[count], modeBuf[d],
bytes);
             memset(&(analysis->intraData)->partSizes[count], partSizes[d],
bytes);
+            if (m_param->rc.cuTree)
+                memset(&(analysis->intraData)->cuQPOff[count], cuQPBuf[d],
bytes);
             count += bytes;
         }

@@ -3326,6 +3327,8 @@
                 memset(&(analysis->intraData)->modes[cnt],
tempLumaBuf[ctu32Idx], factor);
             X265_FREE(tempLumaBuf);
         }
+        if (m_param->rc.cuTree)
+            X265_FREE(cuQPBuf);
         X265_FREE(tempBuf);
         consumedBytes += frameRecordSize;
     }
@@ -3342,6 +3345,7 @@
         uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];
         MV* mv[2];
         int8_t* refIdx[2];
+        int8_t* cuQPBuf = NULL;

         int numBuf = m_param->analysisReuseLevel > 4 ? 4 : 2;
         bool bIntraInInter = false;
@@ -3355,9 +3359,13 @@
         tempBuf = X265_MALLOC(uint8_t, depthBytes * numBuf);
         depthBuf = tempBuf;
         modeBuf = tempBuf + depthBytes;
+        if (m_param->rc.cuTree)
+            cuQPBuf = X265_MALLOC(int8_t, depthBytes);

         X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
m_analysisFileIn, interPic->depth);
         X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn,
interPic->modes);
+        if (m_param->rc.cuTree)
+            X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
m_analysisFileIn, interPic->cuQPOff);

         if (m_param->analysisReuseLevel > 4)
         {
@@ -3395,6 +3403,8 @@
                 depthBuf[d] = 1;
             memset(&(analysis->interData)->depth[count], depthBuf[d],
bytes);
             memset(&(analysis->interData)->modes[count], modeBuf[d],
bytes);
+            if (m_param->rc.cuTree)
+                memset(&(analysis->interData)->cuQPOff[count], cuQPBuf[d],
bytes);
             if (m_param->analysisReuseLevel > 4)
             {
                 if (m_param->scaleFactor && modeBuf[d] == MODE_INTRA &&
partSize[d] == SIZE_NxN)
@@ -3427,6 +3437,8 @@
             count += bytes;
         }

+        if (m_param->rc.cuTree)
+            X265_FREE(cuQPBuf);
         X265_FREE(tempBuf);

         if (m_param->analysisReuseLevel == 10)
@@ -3605,15 +3617,20 @@
             return;

         uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
*partSizes = NULL;
+        int8_t *cuQPBuf = NULL;

         tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);
         depthBuf = tempBuf;
         modeBuf = tempBuf + depthBytes;
         partSizes = tempBuf + 2 * depthBytes;
+        if (m_param->rc.cuTree)
+            cuQPBuf = X265_MALLOC(int8_t, depthBytes);

         X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
m_analysisFileIn, intraPic->depth);
         X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn,
intraPic->chromaModes);
         X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
m_analysisFileIn, intraPic->partSizes);
+        if (m_param->rc.cuTree)
+            X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
m_analysisFileIn, intraPic->cuQPOff);

         uint32_t count = 0;
         for (uint32_t d = 0; d < depthBytes; d++)
@@ -3635,6 +3652,8 @@
                 memset(&(analysis->intraData)->depth[count], depthBuf[d],
bytes);
                 memset(&(analysis->intraData)->chromaModes[count],
modeBuf[d], bytes);
                 memset(&(analysis->intraData)->partSizes[count],
partSizes[d], bytes);
+                if (m_param->rc.cuTree)
+                    memset(&(analysis->intraData)->cuQPOff[count],
cuQPBuf[d], bytes);
                 count += bytes;
                 d += getCUIndex(&cuLoc, &count, bytes, 1);
             }
@@ -3653,6 +3672,8 @@
             ctu32Idx += getCUIndex(&cuLoc, &cnt, factor, 0);
         }
         X265_FREE(tempLumaBuf);
+        if (m_param->rc.cuTree)
+            X265_FREE(cuQPBuf);
         X265_FREE(tempBuf);
         consumedBytes += frameRecordSize;
     }
@@ -3669,6 +3690,7 @@
         uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];
         MV* mv[2];
         int8_t* refIdx[2];
+        int8_t* cuQPBuf = NULL;

         int numBuf = m_param->analysisReuseLevel > 4 ? 4 : 2;
         bool bIntraInInter = false;
@@ -3682,9 +3704,13 @@
         tempBuf = X265_MALLOC(uint8_t, depthBytes * numBuf);
         depthBuf = tempBuf;
         modeBuf = tempBuf + depthBytes;
+        if (m_param->rc.cuTree)
+            cuQPBuf = X265_MALLOC(int8_t, depthBytes);

         X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
m_analysisFileIn, interPic->depth);
         X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn,
interPic->modes);
+        if (m_param->rc.cuTree)
+            X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes,
m_analysisFileIn, interPic->cuQPOff);
         if (m_param->analysisReuseLevel > 4)
         {
             partSize = modeBuf + depthBytes;
@@ -3733,6 +3759,8 @@
             {
                 memset(&(analysis->interData)->depth[count], writeDepth,
bytes);
                 memset(&(analysis->interData)->modes[count], modeBuf[d],
bytes);
+                if (m_param->rc.cuTree)
+                    memset(&(analysis->interData)->cuQPOff[count],
cuQPBuf[d], bytes);
                 if (m_param->analysisReuseLevel == 10 && bIntraInInter)
                     memset(&(analysis->intraData)->chromaModes[count],
chromaDir[d], bytes);

@@ -3793,6 +3821,8 @@
             }
         }

+        if (m_param->rc.cuTree)
+            X265_FREE(cuQPBuf);
         X265_FREE(tempBuf);

         if (m_param->analysisReuseLevel == 10)
@@ -3894,6 +3924,7 @@
     X265_PARAM_VALIDATE(saveParam->lookaheadDepth, sizeof(int), 1,
&m_param->lookaheadDepth, rc - lookahead);
     X265_PARAM_VALIDATE(saveParam->chunkStart, sizeof(int), 1,
&m_param->chunkStart, chunk-start);
     X265_PARAM_VALIDATE(saveParam->chunkEnd, sizeof(int), 1,
&m_param->chunkEnd, chunk-end);
+
X265_PARAM_VALIDATE(saveParam->cuTree,sizeof(int),1,&m_param->rc.cuTree,
cutree - offset);

     int sourceHeight, sourceWidth;
     if (writeFlag)
@@ -4223,6 +4254,7 @@

                 CUData* ctu = curEncData.getPicCTU(cuAddr);
                 x265_analysis_intra_data* intraDataCTU =
analysis->intraData;
+                int baseQP = (int)(ctu->m_encData->m_cuStat[cuAddr].baseQp
+ 0.5);

                 for (uint32_t absPartIdx = 0; absPartIdx <
ctu->m_numPartitions; depthBytes++)
                 {
@@ -4235,6 +4267,8 @@
                     partSize = ctu->m_partSize[absPartIdx];
                     intraDataCTU->partSizes[depthBytes] = partSize;

+                    if (m_param->rc.cuTree)
+                        intraDataCTU->cuQPOff[depthBytes] =
(int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP);
                     absPartIdx += ctu->m_numPartitions >> (depth * 2);
                 }
                 memcpy(&intraDataCTU->modes[ctu->m_cuAddr *
ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)*
ctu->m_numPartitions);
@@ -4252,6 +4286,7 @@
                 CUData* ctu = curEncData.getPicCTU(cuAddr);
                 x265_analysis_inter_data* interDataCTU =
analysis->interData;
                 x265_analysis_intra_data* intraDataCTU =
analysis->intraData;
+                int baseQP = (int)(ctu->m_encData->m_cuStat[cuAddr].baseQp
+ 0.5);

                 for (uint32_t absPartIdx = 0; absPartIdx <
ctu->m_numPartitions; depthBytes++)
                 {
@@ -4260,9 +4295,11 @@

                     predMode = ctu->m_predMode[absPartIdx];
                     if (m_param->analysisReuseLevel != 10 &&
ctu->m_refIdx[1][absPartIdx] != -1)
-                        predMode = 4; // used as indiacator if the block
is coded as bidir
+                        predMode = 4; // used as indicator if the block is
coded as bidir

                     interDataCTU->modes[depthBytes] = predMode;
+                    if (m_param->rc.cuTree)
+                        interDataCTU->cuQPOff[depthBytes] =
(int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP);

                     if (m_param->analysisReuseLevel > 4)
                     {
@@ -4298,12 +4335,16 @@
             }
         }

-        if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType ==
X265_TYPE_I)
+        if ((analysis->sliceType == X265_TYPE_IDR || analysis->sliceType
== X265_TYPE_I) && m_param->rc.cuTree)
+            analysis->frameRecordSize += sizeof(uint8_t)*
analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 +
(sizeof(int8_t) * depthBytes);
+        else if (analysis->sliceType == X265_TYPE_IDR ||
analysis->sliceType == X265_TYPE_I)
             analysis->frameRecordSize += sizeof(uint8_t)*
analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3;
         else
         {
-            /* Add sizeof depth, modes, partSize, mergeFlag */
+            /* Add sizeof depth, modes, partSize, cuQPOffset, mergeFlag */
             analysis->frameRecordSize += depthBytes * 2;
+            if (m_param->rc.cuTree)
+            analysis->frameRecordSize += (sizeof(int8_t) * depthBytes);
             if (m_param->analysisReuseLevel > 4)
                 analysis->frameRecordSize += (depthBytes * 2);

@@ -4345,12 +4386,16 @@
         X265_FWRITE((analysis->intraData)->depth, sizeof(uint8_t),
depthBytes, m_analysisFileOut);
         X265_FWRITE((analysis->intraData)->chromaModes, sizeof(uint8_t),
depthBytes, m_analysisFileOut);
         X265_FWRITE((analysis->intraData)->partSizes, sizeof(char),
depthBytes, m_analysisFileOut);
+        if (m_param->rc.cuTree)
+            X265_FWRITE((analysis->intraData)->cuQPOff, sizeof(int8_t),
depthBytes, m_analysisFileOut);
         X265_FWRITE((analysis->intraData)->modes, sizeof(uint8_t),
analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileOut);
     }
     else
     {
         X265_FWRITE((analysis->interData)->depth, sizeof(uint8_t),
depthBytes, m_analysisFileOut);
         X265_FWRITE((analysis->interData)->modes, sizeof(uint8_t),
depthBytes, m_analysisFileOut);
+        if (m_param->rc.cuTree)
+            X265_FWRITE((analysis->interData)->cuQPOff, sizeof(int8_t),
depthBytes, m_analysisFileOut);
         if (m_param->analysisReuseLevel > 4)
         {
             X265_FWRITE((analysis->interData)->partSize, sizeof(uint8_t),
depthBytes, m_analysisFileOut);
diff -r f74003e88622 -r 4eda86eadc69 source/x265.h
--- a/source/x265.h Thu Nov 22 15:02:08 2018 +0530
+++ b/source/x265.h Wed Oct 17 11:08:38 2018 +0530
@@ -129,6 +129,7 @@
     int     lookaheadDepth;
     int     chunkStart;
     int     chunkEnd;
+    int     cuTree;
 }x265_analysis_validate;

 /* Stores intra analysis data for a single frame. This struct needs better
packing */
@@ -138,6 +139,7 @@
     uint8_t*  modes;
     char*     partSizes;
     uint8_t*  chromaModes;
+    int8_t*    cuQPOff;
 }x265_analysis_intra_data;

 typedef struct x265_analysis_MV
@@ -162,6 +164,7 @@
     int8_t*     refIdx[2];
     x265_analysis_MV*         mv[2];
     int64_t*     sadCost;
+    int8_t*    cuQPOff;
 }x265_analysis_inter_data;

 typedef struct x265_weight_param
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20181210/7b868074/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: x265- cuTree offset reuse final.patch
Type: application/octet-stream
Size: 22173 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20181210/7b868074/attachment-0001.obj>


More information about the x265-devel mailing list