[x265] [PATCH 2 of 4] limitTU : use spatial and temporal CUs' TU depth to limit recursion

bhavna at multicorewareinc.com bhavna at multicorewareinc.com
Fri Nov 18 13:06:27 CET 2016


# HG changeset patch
# User Bhavna Hariharan <bhavna at multicorewareinc.com>
# Date 1479450968 -19800
#      Fri Nov 18 12:06:08 2016 +0530
# Node ID 40a0a322b26fc0516a72d4de9a941e18b5bb97b9
# Parent  c5295126f248411481a8361acfd2bc8b0636cedc
limitTU : use spatial and temporal CUs' TU depth to limit recursion

diff -r c5295126f248 -r 40a0a322b26f doc/reST/cli.rst
--- a/doc/reST/cli.rst	Fri Nov 18 11:49:05 2016 +0530
+++ b/doc/reST/cli.rst	Fri Nov 18 12:06:08 2016 +0530
@@ -869,13 +869,15 @@
 	partitions, in which case a TU split is implied and thus the
 	residual quad-tree begins one layer below the CU quad-tree.
 
-.. option:: --limit-tu <0|1|2>
+.. option:: --limit-tu <0..3>
 
 	Enables early exit from TU depth recursion, for inter coded blocks.
 	Level 1 - decides to recurse to next higher depth based on cost 
 	comparison of full size TU and split TU.
 	Level 2 - based on first split subTU's depth, limits recursion of
 	other split subTUs.
+	Level 3 - based on the average depth of the co-located and the neighbor
+	CUs' TU depth, limits recursion of the current CU.
 
 	Default: 0
 
diff -r c5295126f248 -r 40a0a322b26f source/common/cudata.cpp
--- a/source/common/cudata.cpp	Fri Nov 18 11:49:05 2016 +0530
+++ b/source/common/cudata.cpp	Fri Nov 18 12:06:08 2016 +0530
@@ -296,6 +296,9 @@
     /* initialize the remaining CU data in one memset */
     memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ? BytesPerPartition - 11 : BytesPerPartition - 7) * m_numPartitions);
 
+    for (int8_t i = 0; i < NUM_TU_DEPTH; i++)
+        m_refTuDepth[i] = -1;
+
     uint32_t widthInCU = m_slice->m_sps->numCuInWidth;
     m_cuLeft = (m_cuAddr % widthInCU) ? m_encData->getPicCTU(m_cuAddr - 1) : NULL;
     m_cuAbove = (m_cuAddr >= widthInCU) && !m_bFirstRowInSlice ? m_encData->getPicCTU(m_cuAddr - widthInCU) : NULL;
diff -r c5295126f248 -r 40a0a322b26f source/common/cudata.h
--- a/source/common/cudata.h	Fri Nov 18 11:49:05 2016 +0530
+++ b/source/common/cudata.h	Fri Nov 18 12:06:08 2016 +0530
@@ -28,6 +28,8 @@
 #include "slice.h"
 #include "mv.h"
 
+#define NUM_TU_DEPTH 21
+
 namespace X265_NS {
 // private namespace
 
@@ -204,6 +206,7 @@
     enum { BytesPerPartition = 21 };  // combined sizeof() of all per-part data
 
     coeff_t*      m_trCoeff[3];       // transformed coefficient buffer per plane
+    int8_t        m_refTuDepth[NUM_TU_DEPTH];   // TU depth of CU at depths 0, 1 and 2
 
     MV*           m_mv[2];            // array of motion vectors per list
     MV*           m_mvd[2];           // array of coded motion vector deltas per list
diff -r c5295126f248 -r 40a0a322b26f source/common/param.cpp
--- a/source/common/param.cpp	Fri Nov 18 11:49:05 2016 +0530
+++ b/source/common/param.cpp	Fri Nov 18 12:06:08 2016 +0530
@@ -1126,7 +1126,7 @@
           "QuadtreeTUMaxDepthInter must be less than or equal to the difference between log2(maxCUSize) and QuadtreeTULog2MinSize plus 1");
     CHECK((param->maxTUSize != 32 && param->maxTUSize != 16 && param->maxTUSize != 8 && param->maxTUSize != 4),
           "max TU size must be 4, 8, 16, or 32");
-    CHECK(param->limitTU > 2, "Invalid limit-tu option, limit-TU must be 0, 1 or 2");
+    CHECK(param->limitTU > 3, "Invalid limit-tu option, limit-TU must be between 0 and 3");
     CHECK(param->maxNumMergeCand < 1, "MaxNumMergeCand must be 1 or greater.");
     CHECK(param->maxNumMergeCand > 5, "MaxNumMergeCand must be 5 or smaller.");
 
diff -r c5295126f248 -r 40a0a322b26f source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Fri Nov 18 11:49:05 2016 +0530
+++ b/source/encoder/analysis.cpp	Fri Nov 18 12:06:08 2016 +0530
@@ -203,6 +203,57 @@
     return *m_modeDepth[0].bestMode;
 }
 
+int32_t Analysis::loadTUDepth(CUGeom cuGeom, CUData parentCTU)
+{
+    float predDepth = 0;
+    CUData* neighbourCU;
+    uint8_t count = 0;
+    int32_t maxTUDepth = -1;
+    neighbourCU = m_slice->m_refFrameList[0][0]->m_encData->m_picCTU;
+    predDepth += neighbourCU->m_refTuDepth[cuGeom.geomRecurId];
+    count++;
+    if (m_slice->isInterB())
+    {
+        neighbourCU = m_slice->m_refFrameList[1][0]->m_encData->m_picCTU;
+        predDepth += neighbourCU->m_refTuDepth[cuGeom.geomRecurId];
+        count++;
+    }
+    if (parentCTU.m_cuAbove)
+    {
+        predDepth += parentCTU.m_cuAbove->m_refTuDepth[cuGeom.geomRecurId];
+        count++;
+        if (parentCTU.m_cuAboveLeft)
+        {
+            predDepth += parentCTU.m_cuAboveLeft->m_refTuDepth[cuGeom.geomRecurId];
+            count++;
+        }
+        if (parentCTU.m_cuAboveRight)
+        {
+            predDepth += parentCTU.m_cuAboveRight->m_refTuDepth[cuGeom.geomRecurId];
+            count++;
+        }
+    }
+    if (parentCTU.m_cuLeft)
+    {
+        predDepth += parentCTU.m_cuLeft->m_refTuDepth[cuGeom.geomRecurId];
+        count++;
+    }
+    predDepth /= count;
+
+    if (predDepth == 0)
+        maxTUDepth = 0;
+    else if (predDepth < 1)
+        maxTUDepth = 1;
+    else if (predDepth >= 1 && predDepth <= 1.5)
+        maxTUDepth = 2;
+    else if (predDepth > 1.5 && predDepth <= 2.5)
+        maxTUDepth = 3;
+    else
+        maxTUDepth = -1;
+
+    return maxTUDepth;
+}
+
 void Analysis::tryLossless(const CUGeom& cuGeom)
 {
     ModeDepth& md = m_modeDepth[cuGeom.depth];
@@ -326,6 +377,15 @@
             checkBestMode(md.pred[PRED_INTRA_NxN], depth);
         }
 
+        if ((m_limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
+        {
+            CUData* ctu = md.bestMode->cu.m_encData->getPicCTU(parentCTU.m_cuAddr);
+            int8_t maxTUDepth = -1;
+            for (uint32_t i = 0; i < cuGeom.numPartitions; i++)
+                maxTUDepth = X265_MAX(maxTUDepth, md.bestMode->cu.m_tuDepth[i]);
+            ctu->m_refTuDepth[cuGeom.geomRecurId] = maxTUDepth;
+        }
+
         if (m_bTryLossless)
             tryLossless(cuGeom);
 
@@ -894,6 +954,9 @@
     bool skipRectAmp = false;
     bool chooseMerge = false;
 
+    if ((m_limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
+        m_maxTUDepth = loadTUDepth(cuGeom, parentCTU);
+
     SplitData splitData[4];
     splitData[0].initSplitCUData();
     splitData[1].initSplitCUData();
@@ -1400,6 +1463,18 @@
     if (m_param->rdLevel)
         md.bestMode->reconYuv.copyToPicYuv(reconPic, cuAddr, cuGeom.absPartIdx);
 
+    if ((m_limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
+    {
+        if (mightNotSplit)
+        {
+            CUData* ctu = md.bestMode->cu.m_encData->getPicCTU(parentCTU.m_cuAddr);
+            int8_t maxTUDepth = -1;
+            for (uint32_t i = 0; i < cuGeom.numPartitions; i++)
+                maxTUDepth = X265_MAX(maxTUDepth, md.bestMode->cu.m_tuDepth[i]);
+            ctu->m_refTuDepth[cuGeom.geomRecurId] = maxTUDepth;
+        }
+    }
+
     return splitCUData;
 }
 
@@ -1424,6 +1499,9 @@
         md.pred[PRED_2Nx2N].rdCost = 0;
     }
 
+    if ((m_limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
+        m_maxTUDepth = loadTUDepth(cuGeom, parentCTU);
+
     SplitData splitData[4];
     splitData[0].initSplitCUData();
     splitData[1].initSplitCUData();
@@ -1751,6 +1829,18 @@
             addSplitFlagCost(*md.bestMode, cuGeom.depth);
     }
 
+    if ((m_limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
+    {
+        if (mightNotSplit)
+        {
+            CUData* ctu = md.bestMode->cu.m_encData->getPicCTU(parentCTU.m_cuAddr);
+            int8_t maxTUDepth = -1;
+            for (uint32_t i = 0; i < cuGeom.numPartitions; i++)
+                maxTUDepth = X265_MAX(maxTUDepth, md.bestMode->cu.m_tuDepth[i]);
+            ctu->m_refTuDepth[cuGeom.geomRecurId] = maxTUDepth;
+        }
+    }
+
     /* compare split RD cost against best cost */
     if (mightSplit && !skipRecursion)
         checkBestMode(md.pred[PRED_SPLIT], depth);
diff -r c5295126f248 -r 40a0a322b26f source/encoder/analysis.h
--- a/source/encoder/analysis.h	Fri Nov 18 11:49:05 2016 +0530
+++ b/source/encoder/analysis.h	Fri Nov 18 12:06:08 2016 +0530
@@ -116,6 +116,7 @@
     void destroy();
 
     Mode& compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, const Entropy& initialContext);
+    int32_t loadTUDepth(CUGeom cuGeom, CUData parentCTU);
 
 protected:
     /* Analysis data for save/load mode, writes/reads data based on absPartIdx */
diff -r c5295126f248 -r 40a0a322b26f source/encoder/search.cpp
--- a/source/encoder/search.cpp	Fri Nov 18 11:49:05 2016 +0530
+++ b/source/encoder/search.cpp	Fri Nov 18 12:06:08 2016 +0530
@@ -67,7 +67,7 @@
     m_param = NULL;
     m_slice = NULL;
     m_frame = NULL;
-    m_maxTUDepth = 0;
+    m_maxTUDepth = -1;
 }
 
 bool Search::initSearch(const x265_param& param, ScalingList& scalingList)
@@ -97,10 +97,12 @@
     m_limitTU = 0;
     if (m_param->limitTU)
     {
-        if (m_param->limitTU == 1)
-            m_limitTU = X265_TU_LIMIT_BFS;
-        else if (m_param->limitTU == 2)
+        if (m_param->limitTU == 1)
+            m_limitTU = X265_TU_LIMIT_BFS;
+        else if (m_param->limitTU == 2)
             m_limitTU = X265_TU_LIMIT_DFS;
+        else if (m_param->limitTU == 3)
+            m_limitTU = X265_TU_LIMIT_NEIGH;
     }
 
     /* these are indexed by qtLayer (log2size - 2) so nominally 0=4x4, 1=8x8, 2=16x16, 3=32x32
@@ -2628,11 +2630,17 @@
     uint32_t tuDepthRange[2];
     cu.getInterTUQtDepthRange(tuDepthRange, 0);
 
+    if (m_limitTU & X265_TU_LIMIT_NEIGH)
+    {
+        int32_t maxLog2CUSize = g_log2Size[m_param->maxCUSize];
+        m_maxTUDepth = x265_clip3(maxLog2CUSize - (int32_t)tuDepthRange[1], maxLog2CUSize - (int32_t)tuDepthRange[0], m_maxTUDepth);
+    }
+
     m_entropyCoder.load(m_rqt[depth].cur);
 
-    if (m_param->limitTU & X265_TU_LIMIT_DFS)
-        m_maxTUDepth = 0;
-    else if (m_param->limitTU & X265_TU_LIMIT_BFS)
+    if (m_limitTU & X265_TU_LIMIT_DFS)
+        m_maxTUDepth = -1;
+    else if (m_limitTU & X265_TU_LIMIT_BFS)
         memset(&m_cacheTU, 0, sizeof(TUInfoCache));
 
     Cost costs;
@@ -2895,7 +2903,7 @@
     uint32_t ycbf = 0, ucbf = 0, vcbf = 0;
     for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx, qPartIdx += qNumParts)
     {
-        if ((m_param->limitTU & X265_TU_LIMIT_DFS) && tuDepth == 0 && qIdx == 1)
+        if ((m_limitTU & X265_TU_LIMIT_DFS) && tuDepth == 0 && qIdx == 1)
         {
             // Fetch maximum TU depth of first sub partition to limit recursion of others
             for (uint32_t i = 0; i < cuGeom.numPartitions / 4; i++)
@@ -2946,12 +2954,7 @@
     bool bSaveTUData = false, bLoadTUData = false;
     uint32_t idx = 0;
 
-    if ((m_param->limitTU & X265_TU_LIMIT_DFS) && m_maxTUDepth)
-    {
-        uint32_t log2MaxTrSize = cuGeom.log2CUSize - m_maxTUDepth;
-        bCheckSplit = log2TrSize > log2MaxTrSize;
-    }
-    else if ((m_param->limitTU & X265_TU_LIMIT_BFS) && splitMore >= 0)
+    if ((m_limitTU & X265_TU_LIMIT_BFS) && splitMore >= 0)
     {
         if (bCheckSplit && bCheckFull && tuDepth)
         {
@@ -2970,6 +2973,14 @@
             }
         }
     }
+    else if (m_limitTU & X265_TU_LIMIT_DFS || m_limitTU & X265_TU_LIMIT_NEIGH)
+    {
+        if (bCheckSplit && m_maxTUDepth >= 0)
+        {
+            uint32_t log2MaxTrSize = cuGeom.log2CUSize - m_maxTUDepth;
+            bCheckSplit = log2TrSize > log2MaxTrSize;
+        }
+    }
 
     bool bSplitPresentFlag = bCheckSplit && bCheckFull;
 
@@ -3497,7 +3508,7 @@
         {
             if (splitCost.rdcost < fullCost.rdcost)
             {
-                if (m_param->limitTU & X265_TU_LIMIT_BFS)
+                if (m_limitTU & X265_TU_LIMIT_BFS)
                 {
                     uint32_t nextlog2TrSize = cuGeom.log2CUSize - (tuDepth + 1);
                     bool nextSplit = nextlog2TrSize > depthRange[0];
diff -r c5295126f248 -r 40a0a322b26f source/encoder/search.h
--- a/source/encoder/search.h	Fri Nov 18 11:49:05 2016 +0530
+++ b/source/encoder/search.h	Fri Nov 18 12:06:08 2016 +0530
@@ -277,7 +277,7 @@
     uint32_t        m_numLayers;
     uint32_t        m_refLagPixels;
 
-    uint32_t        m_maxTUDepth;
+    int32_t         m_maxTUDepth;
     uint16_t        m_limitTU;
 
     int16_t         m_sliceMaxY;
diff -r c5295126f248 -r 40a0a322b26f source/x265.h
--- a/source/x265.h	Fri Nov 18 11:49:05 2016 +0530
+++ b/source/x265.h	Fri Nov 18 12:06:08 2016 +0530
@@ -357,6 +357,7 @@
 
 #define X265_TU_LIMIT_BFS       1
 #define X265_TU_LIMIT_DFS       2
+#define X265_TU_LIMIT_NEIGH     4
 
 #define X265_BFRAME_MAX         16
 #define X265_MAX_FRAME_THREADS  16


More information about the x265-devel mailing list