[x265] [PATCH] limitTU: cleanup

kavitha at multicorewareinc.com kavitha at multicorewareinc.com
Thu Oct 6 10:26:51 CEST 2016


# HG changeset patch
# User Kavitha Sampath <kavitha at multicorewareinc.com>
# Date 1475666503 -19800
#      Wed Oct 05 16:51:43 2016 +0530
# Node ID b8dd640a7f1784802f6f9859d3a79420a00661fe
# Parent  3851e1e905ef75add638aa1da2be84840741a9e9
limitTU: cleanup

diff -r 3851e1e905ef -r b8dd640a7f17 doc/reST/cli.rst
--- a/doc/reST/cli.rst	Fri Sep 30 19:59:24 2016 +0530
+++ b/doc/reST/cli.rst	Wed Oct 05 16:51:43 2016 +0530
@@ -869,13 +869,13 @@
 	partitions, in which case a TU split is implied and thus the
 	residual quad-tree begins one layer below the CU quad-tree.
 
-.. option:: --limit-TU <0|1|2>
+.. option:: --limit-tu <0|1|2>
 
-	Enables early exit from TU depth recursion. It has 2 levels.
-	Level 1 - decides to recurse to next higher depth based on cost comparison of
-	full size TU and split TU.
-	Level 2 - based on first split subTU's depth, limits recursion of other split
-	subTUs.
+	Enables early exit from TU depth recursion, for inter coded blocks.
+	Level 1 - decides to recurse to next higher depth based on cost 
+	comparison of full size TU and split TU.
+	Level 2 - based on first split subTU's depth, limits recursion of
+	other split subTUs.
 
 	Default: 0
 
diff -r 3851e1e905ef -r b8dd640a7f17 source/common/param.cpp
--- a/source/common/param.cpp	Fri Sep 30 19:59:24 2016 +0530
+++ b/source/common/param.cpp	Wed Oct 05 16:51:43 2016 +0530
@@ -909,7 +909,7 @@
         OPT("vui-timing-info") p->bEmitVUITimingInfo = atobool(value);
         OPT("vui-hrd-info") p->bEmitVUIHRDInfo = atobool(value);
         OPT("slices") p->maxSlices = atoi(value);
-        OPT("limit-TU") p->limitTU = atoi(value);
+        OPT("limit-tu") p->limitTU = atoi(value);
         else
             return X265_PARAM_BAD_NAME;
     }
@@ -1116,7 +1116,7 @@
           "QuadtreeTUMaxDepthInter must be less than or equal to the difference between log2(maxCUSize) and QuadtreeTULog2MinSize plus 1");
     CHECK((param->maxTUSize != 32 && param->maxTUSize != 16 && param->maxTUSize != 8 && param->maxTUSize != 4),
           "max TU size must be 4, 8, 16, or 32");
-    CHECK(param->limitTU > 2, "Invalid limit-TU option, limit-TU must be 0, 1 or 2");
+    CHECK(param->limitTU > 2, "Invalid limit-tu option, limit-TU must be 0, 1 or 2");
     CHECK(param->maxNumMergeCand < 1, "MaxNumMergeCand must be 1 or greater.");
     CHECK(param->maxNumMergeCand > 5, "MaxNumMergeCand must be 5 or smaller.");
 
@@ -1390,7 +1390,7 @@
     TOOLVAL(param->noiseReductionInter, "nr-inter=%d");
     TOOLOPT(param->bEnableTSkipFast, "tskip-fast");
     TOOLOPT(!param->bEnableTSkipFast && param->bEnableTransformSkip, "tskip");
-    TOOLVAL(param->limitTU , "limitTU=%d");
+    TOOLVAL(param->limitTU , "limit-tu=%d");
     TOOLOPT(param->bCULossless, "cu-lossless");
     TOOLOPT(param->bEnableSignHiding, "signhide");
     TOOLOPT(param->bEnableTemporalMvp, "tmvp");
@@ -1482,7 +1482,7 @@
     s += sprintf(s, " rdoq-level=%d", p->rdoqLevel);
     s += sprintf(s, " psy-rdoq=%.2f", p->psyRdoq);
     s += sprintf(s, " log2-max-poc-lsb=%d", p->log2MaxPocLsb);
-    s += sprintf(s, " limit-TU=%d", p->limitTU);
+    s += sprintf(s, " limit-tu=%d", p->limitTU);
     BOOL(p->bEnableRdRefine, "rd-refine");
     BOOL(p->bEnableSignHiding, "signhide");
     BOOL(p->bEnableLoopFilter, "deblock");
diff -r 3851e1e905ef -r b8dd640a7f17 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Fri Sep 30 19:59:24 2016 +0530
+++ b/source/encoder/encoder.cpp	Wed Oct 05 16:51:43 2016 +0530
@@ -1915,7 +1915,7 @@
     if (p->limitTU && p->tuQTMaxInterDepth < 2)
     {
         p->limitTU = 0;
-        x265_log(p, X265_LOG_WARNING, "limit TU disabled, requires tu-inter-depth > 1\n");
+        x265_log(p, X265_LOG_WARNING, "limit-tu disabled, requires tu-inter-depth > 1\n");
     }
     bool bIsVbv = m_param->rc.vbvBufferSize > 0 && m_param->rc.vbvMaxBitrate > 0;
     if (!m_param->bLossless && (m_param->rc.aqMode || bIsVbv))
diff -r 3851e1e905ef -r b8dd640a7f17 source/encoder/search.cpp
--- a/source/encoder/search.cpp	Fri Sep 30 19:59:24 2016 +0530
+++ b/source/encoder/search.cpp	Wed Oct 05 16:51:43 2016 +0530
@@ -2620,10 +2620,11 @@
 
     if (m_param->limitTU == X265_TU_LIMIT_DFS)
         m_maxTUDepth = 0;
-    cacheTUInfo cache;
+    else if (m_param->limitTU == X265_TU_LIMIT_BFS)
+        memset(&m_cacheTU, 0, sizeof(TUInfoCache));
 
     Cost costs;
-    estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs, tuDepthRange, cache);
+    estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs, tuDepthRange);
 
     uint32_t tqBypass = cu.m_tqBypass[0];
     if (!tqBypass)
@@ -2872,7 +2873,7 @@
         return m_rdCost.calcRdCost(dist, nullBits);
 }
 
-bool Search::splitTU(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth, ShortYuv& resiYuv, Cost& splitCost, const uint32_t depthRange[2], cacheTUInfo& cache, int32_t splitMore)
+bool Search::splitTU(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth, ShortYuv& resiYuv, Cost& splitCost, const uint32_t depthRange[2], int32_t splitMore)
 {
     CUData& cu = mode.cu;
     uint32_t depth = cuGeom.depth + tuDepth;
@@ -2888,7 +2889,7 @@
             for (uint32_t i = 0; i < cuGeom.numPartitions / 4; i++)
                 m_maxTUDepth = X265_MAX(m_maxTUDepth, cu.m_tuDepth[i]);
         }
-        estimateResidualQT(mode, cuGeom, qPartIdx, tuDepth + 1, resiYuv, splitCost, depthRange, cache, splitMore);
+        estimateResidualQT(mode, cuGeom, qPartIdx, tuDepth + 1, resiYuv, splitCost, depthRange, splitMore);
         ycbf |= cu.getCbf(qPartIdx, TEXT_LUMA,     tuDepth + 1);
         if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400)
         {
@@ -2921,7 +2922,7 @@
     return ycbf || ucbf || vcbf;
 }
 
-void Search::estimateResidualQT(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth, ShortYuv& resiYuv, Cost& outCosts, const uint32_t depthRange[2], cacheTUInfo& cache, int32_t splitMore)
+void Search::estimateResidualQT(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth, ShortYuv& resiYuv, Cost& outCosts, const uint32_t depthRange[2], int32_t splitMore)
 {
     CUData& cu = mode.cu;
     uint32_t depth = cuGeom.depth + tuDepth;
@@ -3437,12 +3438,12 @@
             {
                 for(int part = 0; part < (m_csp == X265_CSP_I422) + 1; part++)
                 {
-                    cache.bestTransformMode[idx][plane][part] = bestTransformMode[plane][part];
-                    cache.cbfFlag[idx][plane][part] = cbfFlag[plane][part];
+                    m_cacheTU.bestTransformMode[idx][plane][part] = bestTransformMode[plane][part];
+                    m_cacheTU.cbfFlag[idx][plane][part] = cbfFlag[plane][part];
                 }
             }
-            cache.cost[idx] = fullCost;
-            m_entropyCoder.store(cache.rqtStore[idx]);
+            m_cacheTU.cost[idx] = fullCost;
+            m_entropyCoder.store(m_cacheTU.rqtStore[idx]);
         }
     }
     if (bLoadTUData)
@@ -3451,12 +3452,12 @@
         {
             for(int part = 0; part < (m_csp == X265_CSP_I422) + 1; part++)
             {
-                bestTransformMode[plane][part] = cache.bestTransformMode[idx][plane][part];
-                cbfFlag[plane][part] = cache.cbfFlag[idx][plane][part];
+                bestTransformMode[plane][part] = m_cacheTU.bestTransformMode[idx][plane][part];
+                cbfFlag[plane][part] = m_cacheTU.cbfFlag[idx][plane][part];
             }
         }
-        fullCost = cache.cost[idx];
-        m_entropyCoder.load(cache.rqtStore[idx]);
+        fullCost = m_cacheTU.cost[idx];
+        m_entropyCoder.load(m_cacheTU.rqtStore[idx]);
         bCheckFull = true;
     }
 
@@ -3478,7 +3479,7 @@
             splitCost.bits = m_entropyCoder.getNumberOfWrittenBits();
         }
 
-        bool yCbCrCbf = splitTU(mode, cuGeom, absPartIdx, tuDepth, resiYuv, splitCost, depthRange, cache, 0);
+        bool yCbCrCbf = splitTU(mode, cuGeom, absPartIdx, tuDepth, resiYuv, splitCost, depthRange, 0);
         if (yCbCrCbf || !bCheckFull)
         {
             if (splitCost.rdcost < fullCost.rdcost)
@@ -3498,7 +3499,7 @@
                             m_entropyCoder.codeTransformSubdivFlag(1, 5 - log2TrSize);
                             splitCost.bits = m_entropyCoder.getNumberOfWrittenBits();
                         }
-                        splitTU(mode, cuGeom, absPartIdx, tuDepth, resiYuv, splitCost, depthRange, cache, 1);
+                        splitTU(mode, cuGeom, absPartIdx, tuDepth, resiYuv, splitCost, depthRange, 1);
                     }
                 }
                 outCosts.distortion += splitCost.distortion;
diff -r 3851e1e905ef -r b8dd640a7f17 source/encoder/search.h
--- a/source/encoder/search.h	Fri Sep 30 19:59:24 2016 +0530
+++ b/source/encoder/search.h	Wed Oct 05 16:51:43 2016 +0530
@@ -380,17 +380,17 @@
         Cost() { rdcost = 0; bits = 0; distortion = 0; energy = 0; }
     };
 
-    struct cacheTUInfo
+    struct TUInfoCache
     {
         Cost cost[NUM_SUBPART];
         uint32_t bestTransformMode[NUM_SUBPART][MAX_NUM_COMPONENT][2];
         uint8_t cbfFlag[NUM_SUBPART][MAX_NUM_COMPONENT][2];
         Entropy rqtStore[NUM_SUBPART];
-    };
+    } m_cacheTU;
 
     uint64_t estimateNullCbfCost(sse_t dist, uint32_t psyEnergy, uint32_t tuDepth, TextType compId);
-    bool     splitTU(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth, ShortYuv& resiYuv, Cost& splitCost, const uint32_t depthRange[2], cacheTUInfo& cache, int32_t splitMore);
-    void     estimateResidualQT(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, ShortYuv& resiYuv, Cost& costs, const uint32_t depthRange[2], cacheTUInfo& cache, int32_t splitMore = -1);
+    bool     splitTU(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t tuDepth, ShortYuv& resiYuv, Cost& splitCost, const uint32_t depthRange[2], int32_t splitMore);
+    void     estimateResidualQT(Mode& mode, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, ShortYuv& resiYuv, Cost& costs, const uint32_t depthRange[2], int32_t splitMore = -1);
 
     // generate prediction, generate residual and recon. if bAllowSplit, find optimal RQT splits
     void     codeIntraLumaQT(Mode& mode, const CUGeom& cuGeom, uint32_t tuDepth, uint32_t absPartIdx, bool bAllowSplit, Cost& costs, const uint32_t depthRange[2]);
diff -r 3851e1e905ef -r b8dd640a7f17 source/x265.h
--- a/source/x265.h	Fri Sep 30 19:59:24 2016 +0530
+++ b/source/x265.h	Wed Oct 05 16:51:43 2016 +0530
@@ -827,8 +827,8 @@
      * compressed by the DCT transforms, at the expense of much more compute */
     uint32_t  tuQTMaxIntraDepth;
 
-    /* Enable early exit decisions for TU to avoid recursing to higher depths.
-     * Default: 0 */
+    /* Enable early exit decisions for inter coded blocks to avoid recursing to
+     * higher TU depths. Default: 0 */
     uint32_t  limitTU;
 
     /* Set the amount of rate-distortion analysis to use within quant. 0 implies
diff -r 3851e1e905ef -r b8dd640a7f17 source/x265cli.h
--- a/source/x265cli.h	Fri Sep 30 19:59:24 2016 +0530
+++ b/source/x265cli.h	Wed Oct 05 16:51:43 2016 +0530
@@ -85,7 +85,7 @@
     { "max-tu-size",    required_argument, NULL, 0 },
     { "tu-intra-depth", required_argument, NULL, 0 },
     { "tu-inter-depth", required_argument, NULL, 0 },
-    { "limit-TU",       required_argument, NULL, 0 },
+    { "limit-tu",       required_argument, NULL, 0 },
     { "me",             required_argument, NULL, 0 },
     { "subme",          required_argument, NULL, 'm' },
     { "merange",        required_argument, NULL, 0 },
@@ -320,7 +320,7 @@
     H0("   --max-tu-size <32|16|8|4>     Maximum TU size (WxH). Default %d\n", param->maxTUSize);
     H0("   --tu-intra-depth <integer>    Max TU recursive depth for intra CUs. Default %d\n", param->tuQTMaxIntraDepth);
     H0("   --tu-inter-depth <integer>    Max TU recursive depth for inter CUs. Default %d\n", param->tuQTMaxInterDepth);
-    H0("   --limit-TU <integer>          Enable early exit from TU recursion. Default %d\n", param->limitTU);
+    H0("   --limit-tu <integer>          Enable early exit from TU recursion for inter coded blocks. Default %d\n", param->limitTU);
     H0("\nAnalysis:\n");
     H0("   --rd <1..6>                   Level of RDO in mode decision 1:least....6:full RDO. Default %d\n", param->rdLevel);
     H0("   --[no-]psy-rd <0..5.0>        Strength of psycho-visual rate distortion optimization, 0 to disable. Default %.1f\n", param->psyRd);


More information about the x265-devel mailing list