[x265] [PATCH 3 of 7] add maxCUDepth to param to replace global g_maxCUDepth

Wed Jun 21 08:44:53 CEST 2017

# HG changeset patch
# User Kavitha Sampath <kavitha at multicorewareinc.com>
# Date 1496121350 -19800
#      Tue May 30 10:45:50 2017 +0530
# Node ID 3b0883b24cd21a2aea3fc4bf82467df717c2e39c
# Parent  624374a3b21cba624bee8002bcbdb4fa0917a6fe
add maxCUDepth to param to replace global g_maxCUDepth

diff -r 624374a3b21c -r 3b0883b24cd2 source/CMakeLists.txt

--- a/source/CMakeLists.txt	Mon Jun 12 17:21:57 2017 +0530
+++ b/source/CMakeLists.txt	Tue May 30 10:45:50 2017 +0530
@@ -29,7 +29,7 @@
 option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
 mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
 # X265_BUILD must be incremented each time the public API is changed
-set(X265_BUILD 123)
+set(X265_BUILD 124)
 configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
                "${PROJECT_BINARY_DIR}/x265.def")
 configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
diff -r 624374a3b21c -r 3b0883b24cd2 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Mon Jun 12 17:21:57 2017 +0530
+++ b/source/encoder/analysis.cpp	Tue May 30 10:45:50 2017 +0530
@@ -93,7 +93,7 @@
     uint32_t cuSize = m_param->maxCUSize;
 
     bool ok = true;
-    for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++, cuSize >>= 1)
+    for (uint32_t depth = 0; depth <= m_param->maxCUDepth; depth++, cuSize >>= 1)
     {
         ModeDepth &md = m_modeDepth[depth];
 
@@ -116,7 +116,7 @@
 
 void Analysis::destroy()
 {
-    for (uint32_t i = 0; i <= g_maxCUDepth; i++)
+    for (uint32_t i = 0; i <= m_param->maxCUDepth; i++)
     {
         m_modeDepth[i].cuMemPool.destroy();
         m_modeDepth[i].fencYuv.destroy();
@@ -296,7 +296,7 @@
         depth = ctu.m_cuDepth[absPartIdx];
         partSize = ctu.m_partSize[absPartIdx];
         uint32_t numPU = nbPartsTable[(int)partSize];
-        int shift = 2 * (g_maxCUDepth + 1 - depth);
+        int shift = 2 * (m_param->maxCUDepth + 1 - depth);
         for (uint32_t puIdx = 0; puIdx < numPU; puIdx++)
         {
             PredictionUnit pu(ctu, cuGeom, puIdx);
@@ -2937,7 +2937,7 @@
 
 void Analysis::encodeResidue(const CUData& ctu, const CUGeom& cuGeom)
 {
-    if (cuGeom.depth < ctu.m_cuDepth[cuGeom.absPartIdx] && cuGeom.depth < g_maxCUDepth)
+    if (cuGeom.depth < ctu.m_cuDepth[cuGeom.absPartIdx] && cuGeom.depth < ctu.m_encData->m_param->maxCUDepth)
     {
         for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
         {
diff -r 624374a3b21c -r 3b0883b24cd2 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Mon Jun 12 17:21:57 2017 +0530
+++ b/source/encoder/encoder.cpp	Tue May 30 10:45:50 2017 +0530
@@ -1424,7 +1424,7 @@
     /* Summarize stats from all frame encoders */
     CUStats cuStats;
     for (int i = 0; i < m_param->frameNumThreads; i++)
-        cuStats.accumulate(m_frameEncoder[i]->m_cuStats);
+        cuStats.accumulate(m_frameEncoder[i]->m_cuStats, m_param);
 
     if (!cuStats.totalCTUTime)
         return;
@@ -1445,7 +1445,7 @@
 
     int64_t interRDOTotalTime = 0, intraRDOTotalTime = 0;
     uint64_t interRDOTotalCount = 0, intraRDOTotalCount = 0;
-    for (uint32_t i = 0; i <= g_maxCUDepth; i++)
+    for (uint32_t i = 0; i <= m_param->maxCUDepth; i++)
     {
         interRDOTotalTime += cuStats.interRDOElapsedTime[i];
         intraRDOTotalTime += cuStats.intraRDOElapsedTime[i];
@@ -1771,7 +1771,7 @@
         frameStats->minChromaVLevel = curFrame->m_fencPic->m_minChromaVLevel;
         frameStats->avgChromaVLevel = curFrame->m_fencPic->m_avgChromaVLevel;
 
-        for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+        for (uint32_t depth = 0; depth <= m_param->maxCUDepth; depth++)
         {
             frameStats->cuStats.percentSkipCu[depth]  = curFrame->m_encData->m_frameStats.percentSkipCu[depth];
             frameStats->cuStats.percentMergeCu[depth] = curFrame->m_encData->m_frameStats.percentMergeCu[depth];
@@ -1786,7 +1786,7 @@
             frameStats->puStats.percentNxN = 0;
         else
             frameStats->puStats.percentNxN = (double)(curFrame->m_encData->m_frameStats.cnt4x4 / (double)curFrame->m_encData->m_frameStats.totalPu[4]) * 100;
-        for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+        for (uint32_t depth = 0; depth <= m_param->maxCUDepth; depth++)
         {
             if (curFrame->m_encData->m_frameStats.totalPu[depth] == 0)
             {
@@ -1988,8 +1988,8 @@
     sps->numPartitions = NUM_4x4_PARTITIONS;
     sps->numPartInCUSize = 1 << g_unitSizeDepth;
 
-    sps->log2MinCodingBlockSize = m_param->maxLog2CUSize - g_maxCUDepth;
-    sps->log2DiffMaxMinCodingBlockSize = g_maxCUDepth;
+    sps->log2MinCodingBlockSize = m_param->maxLog2CUSize - m_param->maxCUDepth;
+    sps->log2DiffMaxMinCodingBlockSize = m_param->maxCUDepth;
     uint32_t maxLog2TUSize = (uint32_t)g_log2Size[m_param->maxTUSize];
     sps->quadtreeTULog2MaxSize = X265_MIN((uint32_t)m_param->maxLog2CUSize, maxLog2TUSize);
     sps->quadtreeTULog2MinSize = 2;
@@ -1999,7 +1999,7 @@
     sps->bUseSAO = m_param->bEnableSAO;
 
     sps->bUseAMP = m_param->bEnableAMP;
-    sps->maxAMPDepth = m_param->bEnableAMP ? g_maxCUDepth : 0;
+    sps->maxAMPDepth = m_param->bEnableAMP ? m_param->maxCUDepth : 0;
 
     sps->maxTempSubLayers = m_param->bEnableTemporalSubLayers ? 2 : 1;
     sps->maxDecPicBuffering = m_vps.maxDecPicBuffering;
@@ -2643,6 +2643,7 @@
         }
     }
     p->maxLog2CUSize = g_log2Size[p->maxCUSize];
+    p->maxCUDepth    = p->maxLog2CUSize - g_log2Size[p->minCUSize];
 }
 
 void Encoder::allocAnalysis(x265_analysis_data* analysis)
diff -r 624374a3b21c -r 3b0883b24cd2 source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp	Mon Jun 12 17:21:57 2017 +0530
+++ b/source/encoder/entropy.cpp	Tue May 30 10:45:50 2017 +0530
@@ -783,7 +783,7 @@
     if (cuSplitFlag) 
         codeSplitFlag(ctu, absPartIdx, depth);
 
-    if (depth < ctu.m_cuDepth[absPartIdx] && depth < g_maxCUDepth)
+    if (depth < ctu.m_cuDepth[absPartIdx] && depth < ctu.m_encData->m_param->maxCUDepth)
     {
         uint32_t qNumParts = cuGeom.numPartitions >> 2;
         if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
@@ -863,7 +863,7 @@
     case SIZE_nRx2N:
         bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
         bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
-        if (depth == g_maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
+        if (depth == cu.m_encData->m_param->maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
             bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
         if (cu.m_slice->m_sps->maxAMPDepth > depth)
         {
@@ -1512,7 +1512,7 @@
 
     if (cu.isIntra(absPartIdx))
     {
-        if (depth == g_maxCUDepth)
+        if (depth == cu.m_encData->m_param->maxCUDepth)
             encodeBin(partSize == SIZE_2Nx2N ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX]);
         return;
     }
@@ -1541,7 +1541,7 @@
     case SIZE_nRx2N:
         encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
         encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
-        if (depth == g_maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
+        if (depth == cu.m_encData->m_param->maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
             encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
         if (cu.m_slice->m_sps->maxAMPDepth > depth)
         {
diff -r 624374a3b21c -r 3b0883b24cd2 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Mon Jun 12 17:21:57 2017 +0530
+++ b/source/encoder/frameencoder.cpp	Tue May 30 10:45:50 2017 +0530
@@ -887,7 +887,7 @@
         m_frame->m_encData->m_frameStats.psyEnergy        += m_rows[i].rowStats.psyEnergy;
         m_frame->m_encData->m_frameStats.ssimEnergy       += m_rows[i].rowStats.ssimEnergy;
         m_frame->m_encData->m_frameStats.resEnergy        += m_rows[i].rowStats.resEnergy;
-        for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+        for (uint32_t depth = 0; depth <= m_param->maxCUDepth; depth++)
         {
             m_frame->m_encData->m_frameStats.cntSkipCu[depth] += m_rows[i].rowStats.cntSkipCu[depth];
             m_frame->m_encData->m_frameStats.cntMergeCu[depth] += m_rows[i].rowStats.cntMergeCu[depth];
@@ -903,7 +903,7 @@
     m_frame->m_encData->m_frameStats.avgSsimEnergy       = (double)(m_frame->m_encData->m_frameStats.ssimEnergy) / m_frame->m_encData->m_frameStats.totalCtu;
     m_frame->m_encData->m_frameStats.avgResEnergy        = (double)(m_frame->m_encData->m_frameStats.resEnergy) / m_frame->m_encData->m_frameStats.totalCtu;
     m_frame->m_encData->m_frameStats.percentIntraNxN     = (double)(m_frame->m_encData->m_frameStats.cntIntraNxN * 100) / m_frame->m_encData->m_frameStats.totalCu;
-    for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+    for (uint32_t depth = 0; depth <= m_param->maxCUDepth; depth++)
     {
         m_frame->m_encData->m_frameStats.percentSkipCu[depth]  = (double)(m_frame->m_encData->m_frameStats.cntSkipCu[depth] * 100) / m_frame->m_encData->m_frameStats.totalCu;
         m_frame->m_encData->m_frameStats.percentMergeCu[depth] = (double)(m_frame->m_encData->m_frameStats.cntMergeCu[depth] * 100) / m_frame->m_encData->m_frameStats.totalCu;
@@ -1101,7 +1101,7 @@
     /* Accumulate CU statistics from each worker thread, we could report
      * per-frame stats here, but currently we do not. */
     for (int i = 0; i < numTLD; i++)
-        m_cuStats.accumulate(m_tld[i].analysis.m_stats[m_jpId]);
+        m_cuStats.accumulate(m_tld[i].analysis.m_stats[m_jpId], m_param);
 #endif
 
     m_endFrameTime = x265_mdate();
@@ -1213,7 +1213,6 @@
     const uint32_t row = (uint32_t)intRow;
     CTURow& curRow = m_rows[row];
 
-    tld.analysis.m_param = m_param;
     if (m_param->bEnableWavefront)
     {
         ScopedLock self(curRow.lock);
@@ -1478,10 +1477,10 @@
             curRow.rowStats.coeffBits += best.coeffBits;
             curRow.rowStats.miscBits  += best.totalBits - (best.mvBits + best.coeffBits);
 
-            for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+            for (uint32_t depth = 0; depth <= m_param->maxCUDepth; depth++)
             {
                 /* 1 << shift == number of 8x8 blocks at current depth */
-                int shift = 2 * (g_maxCUDepth - depth);
+                int shift = 2 * (m_param->maxCUDepth - depth);
                 int cuSize = m_param->maxCUSize >> depth;
 
                 if (cuSize == 8)
@@ -1501,7 +1500,7 @@
         curRow.rowStats.resEnergy        += best.resEnergy;
         curRow.rowStats.cntIntraNxN      += frameLog.cntIntraNxN;
         curRow.rowStats.totalCu          += frameLog.totalCu;
-        for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+        for (uint32_t depth = 0; depth <= m_param->maxCUDepth; depth++)
         {
             curRow.rowStats.cntSkipCu[depth] += frameLog.cntSkipCu[depth];
             curRow.rowStats.cntMergeCu[depth] += frameLog.cntMergeCu[depth];
@@ -1734,7 +1733,6 @@
         }
     }
 
-    tld.analysis.m_param = NULL;
     curRow.busy = false;
 
     // CHECK_ME: Does it always FALSE condition?
diff -r 624374a3b21c -r 3b0883b24cd2 source/encoder/search.cpp
--- a/source/encoder/search.cpp	Mon Jun 12 17:21:57 2017 +0530
+++ b/source/encoder/search.cpp	Tue May 30 10:45:50 2017 +0530
@@ -136,7 +136,7 @@
     }
 
     /* the rest of these buffers are indexed per-depth */
-    for (uint32_t i = 0; i <= g_maxCUDepth; i++)
+    for (uint32_t i = 0; i <= m_param->maxCUDepth; i++)
     {
         int cuSize = param.maxCUSize >> i;
         ok &= m_rqt[i].tmpResiYuv.create(cuSize, param.internalCsp);
@@ -186,7 +186,7 @@
         m_rqt[i].resiQtYuv.destroy();
     }
 
-    for (uint32_t i = 0; i <= g_maxCUDepth; i++)
+    for (uint32_t i = 0; i <= m_param->maxCUDepth; i++)
     {
         m_rqt[i].tmpResiYuv.destroy();
         m_rqt[i].tmpPredYuv.destroy();
diff -r 624374a3b21c -r 3b0883b24cd2 source/encoder/search.h
--- a/source/encoder/search.h	Mon Jun 12 17:21:57 2017 +0530
+++ b/source/encoder/search.h	Tue May 30 10:45:50 2017 +0530
@@ -204,9 +204,9 @@
         memset(this, 0, sizeof(*this));
     }
 
-    void accumulate(CUStats& other)
+    void accumulate(CUStats& other, x265_param& param)
     {
-        for (uint32_t i = 0; i <= g_maxCUDepth; i++)
+        for (uint32_t i = 0; i <= param.maxCUDepth; i++)
         {
             intraRDOElapsedTime[i] += other.intraRDOElapsedTime[i];
             interRDOElapsedTime[i] += other.interRDOElapsedTime[i];
diff -r 624374a3b21c -r 3b0883b24cd2 source/x265-extras.cpp
--- a/source/x265-extras.cpp	Mon Jun 12 17:21:57 2017 +0530
+++ b/source/x265-extras.cpp	Tue May 30 10:45:50 2017 +0530
@@ -69,7 +69,7 @@
                 fprintf(csvfp, "Latency, ");
                 fprintf(csvfp, "List 0, List 1");
                 uint32_t size = param.maxCUSize;
-                for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+                for (uint32_t depth = 0; depth <= param.maxCUDepth; depth++)
                 {
                     fprintf(csvfp, ", Intra %dx%d DC, Intra %dx%d Planar, Intra %dx%d Ang", size, size, size, size, size, size);
                     size /= 2;
@@ -78,7 +78,7 @@
                 size = param.maxCUSize;
                 if (param.bEnableRectInter)
                 {
-                    for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+                    for (uint32_t depth = 0; depth <= param.maxCUDepth; depth++)
                     {
                         fprintf(csvfp, ", Inter %dx%d, Inter %dx%d (Rect)", size, size, size, size);
                         if (param.bEnableAMP)
@@ -88,20 +88,20 @@
                 }
                 else
                 {
-                    for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+                    for (uint32_t depth = 0; depth <= param.maxCUDepth; depth++)
                     {
                         fprintf(csvfp, ", Inter %dx%d", size, size);
                         size /= 2;
                     }
                 }
                 size = param.maxCUSize;
-                for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+                for (uint32_t depth = 0; depth <= param.maxCUDepth; depth++)
                 {
                     fprintf(csvfp, ", Skip %dx%d", size, size);
                     size /= 2;
                 }
                 size = param.maxCUSize;
-                for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+                for (uint32_t depth = 0; depth <= param.maxCUDepth; depth++)
                 {
                     fprintf(csvfp, ", Merge %dx%d", size, size);
                     size /= 2;
@@ -184,14 +184,14 @@
         else
             fputs(" -,", csvfp);
     }
-    for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+    for (uint32_t depth = 0; depth <= param.maxCUDepth; depth++)
         fprintf(csvfp, "%5.2lf%%, %5.2lf%%, %5.2lf%%,", frameStats->cuStats.percentIntraDistribution[depth][0],
                                                         frameStats->cuStats.percentIntraDistribution[depth][1],
                                                         frameStats->cuStats.percentIntraDistribution[depth][2]);
     fprintf(csvfp, "%5.2lf%%", frameStats->cuStats.percentIntraNxN);
     if (param.bEnableRectInter)
     {
-        for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+        for (uint32_t depth = 0; depth <= param.maxCUDepth; depth++)
         {
             fprintf(csvfp, ", %5.2lf%%, %5.2lf%%", frameStats->cuStats.percentInterDistribution[depth][0],
                                                    frameStats->cuStats.percentInterDistribution[depth][1]);
@@ -201,12 +201,12 @@
     }
     else
     {
-        for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+        for (uint32_t depth = 0; depth <= param.maxCUDepth; depth++)
             fprintf(csvfp, ", %5.2lf%%", frameStats->cuStats.percentInterDistribution[depth][0]);
     }
-    for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+    for (uint32_t depth = 0; depth <= param.maxCUDepth; depth++)
         fprintf(csvfp, ", %5.2lf%%", frameStats->cuStats.percentSkipCu[depth]);
-    for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
+    for (uint32_t depth = 0; depth <= param.maxCUDepth; depth++)
         fprintf(csvfp, ", %5.2lf%%", frameStats->cuStats.percentMergeCu[depth]);
 
     fprintf(csvfp, ", %.2lf, %.2lf, %.2lf, %.2lf ", frameStats->avgLumaDistortion,
diff -r 624374a3b21c -r 3b0883b24cd2 source/x265.h
--- a/source/x265.h	Mon Jun 12 17:21:57 2017 +0530
+++ b/source/x265.h	Tue May 30 10:45:50 2017 +0530
@@ -1461,6 +1461,9 @@
 
     /* Log of maximum CTU size */
     uint32_t  maxLog2CUSize;
+
+    /* Actual CU depth with respect to config depth */
+    uint32_t  maxCUDepth;
 } x265_param;
 
 /* x265_param_alloc: