[x265] [PATCH] level: add --high-tier and auto-configure VBV if --crf N --level M (refs #61)

Steve Borho steve at borho.org
Tue Jul 22 04:06:17 CEST 2014


# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1405990159 18000
#      Mon Jul 21 19:49:19 2014 -0500
# Node ID 14ecc7996a6c344945eee7877fc2f8abf505ba42
# Parent  5835fa3a82811e227c95906760292844be367ed9
level: add --high-tier and auto-configure VBV if --crf N --level M (refs #61)

This patch is a major overhaul of the level enforcement logic. The first obvious
difference is that the user may specify the tier. The second difference is that
x265 will no longer run any configuration that might generate non-compliant
bitstreams.

Any of these conditions will cause the encode to abort if a minimum decoder
level was specified:

* picture size is too large
* frame rate is too high
* constant QP rate control is configured
* the specified level does not exist

Further, if CRF was specified, we now configure VBV using the maximum CPB size
and bitrate for their level/tier (and issue a warning that the output may now
be non-deterministic).

Note that the encoder will still encode the minimum decoder level which covers
the encode parameters.  So even if you specify --level 5.1, we may signal the
stream as level 4.0-High if the stream should be decodable at that level.

This further fixes the CLI to allow --level-idc or the shortened --level, just
as it now also supports --high-tier or the shortened --high.

diff -r 5835fa3a8281 -r 14ecc7996a6c source/common/param.cpp
--- a/source/common/param.cpp	Mon Jul 21 20:03:32 2014 -0500
+++ b/source/common/param.cpp	Mon Jul 21 19:49:19 2014 -0500
@@ -112,7 +112,7 @@
     /* Source specifications */
     param->internalBitDepth = x265_max_bit_depth;
     param->internalCsp = X265_CSP_I420;
-    param->levelIdc = -1;
+    param->levelIdc = 0;
 
     /* CU definitions */
     param->maxCUSize = 64;
@@ -575,6 +575,7 @@
         else
             p->levelIdc = atoi(value);
     }
+    OPT("high-tier") p->bHighTier = atobool(value);
     OPT2("log-level", "log")
     {
         p->logLevel = atoi(value);
diff -r 5835fa3a8281 -r 14ecc7996a6c source/encoder/api.cpp
--- a/source/encoder/api.cpp	Mon Jul 21 20:03:32 2014 -0500
+++ b/source/encoder/api.cpp	Mon Jul 21 19:49:19 2014 -0500
@@ -55,9 +55,17 @@
     Encoder *encoder = new Encoder;
     if (encoder)
     {
-        // these may change params for auto-detect, etc
+        // may change params for auto-detect, etc
         encoder->configure(param);
-        enforceLevel(*param, encoder->m_vps);
+        
+        // may change rate control and CPB params
+        if (!enforceLevel(*param, encoder->m_vps))
+        {
+            delete encoder;
+            return NULL;
+        }
+
+        // will detect and set profile/tier/level in VPS
         determineLevel(*param, encoder->m_vps);
 
         x265_print_params(param);
diff -r 5835fa3a8281 -r 14ecc7996a6c source/encoder/level.cpp
--- a/source/encoder/level.cpp	Mon Jul 21 20:03:32 2014 -0500
+++ b/source/encoder/level.cpp	Mon Jul 21 19:49:19 2014 -0500
@@ -32,6 +32,8 @@
     uint32_t maxLumaSamplesPerSecond;
     uint32_t maxBitrateMain;
     uint32_t maxBitrateHigh;
+    uint32_t maxCpbSizeMain;
+    uint32_t maxCpbSizeHigh;
     uint32_t minCompressionRatio;
     Level::Name levelEnum;
     const char* name;
@@ -40,19 +42,19 @@
 
 LevelSpec levels[] =
 {
-    { 36864,    552960,     128,      MAX_UINT, 2, Level::LEVEL1,   "1",   10 },
-    { 122880,   3686400,    1500,     MAX_UINT, 2, Level::LEVEL2,   "2",   20 },
-    { 245760,   7372800,    3000,     MAX_UINT, 2, Level::LEVEL2_1, "2.1", 21 },
-    { 552960,   16588800,   6000,     MAX_UINT, 2, Level::LEVEL3,   "3",   30 },
-    { 983040,   33177600,   10000,    MAX_UINT, 2, Level::LEVEL3_1, "3.1", 31 },
-    { 2228224,  66846720,   12000,    30000,    4, Level::LEVEL4,   "4",   40 },
-    { 2228224,  133693440,  20000,    50000,    4, Level::LEVEL4_1, "4.1", 41 },
-    { 8912896,  267386880,  25000,    100000,   6, Level::LEVEL5,   "5",   50 },
-    { 8912896,  534773760,  40000,    160000,   8, Level::LEVEL5_1, "5.1", 51 },
-    { 8912896,  1069547520, 60000,    240000,   8, Level::LEVEL5_2, "5.2", 52 },
-    { 35651584, 1069547520, 60000,    240000,   8, Level::LEVEL6,   "6",   60 },
-    { 35651584, 2139095040, 120000,   480000,   8, Level::LEVEL6_1, "6.1", 61 },
-    { 35651584, 4278190080U, 240000,  800000,   6, Level::LEVEL6_2, "6.2", 62 },
+    { 36864,    552960,     128,      MAX_UINT, 350,    MAX_UINT, 2, Level::LEVEL1,   "1",   10 },
+    { 122880,   3686400,    1500,     MAX_UINT, 1500,   MAX_UINT, 2, Level::LEVEL2,   "2",   20 },
+    { 245760,   7372800,    3000,     MAX_UINT, 3000,   MAX_UINT, 2, Level::LEVEL2_1, "2.1", 21 },
+    { 552960,   16588800,   6000,     MAX_UINT, 6000,   MAX_UINT, 2, Level::LEVEL3,   "3",   30 },
+    { 983040,   33177600,   10000,    MAX_UINT, 10000,  MAX_UINT, 2, Level::LEVEL3_1, "3.1", 31 },
+    { 2228224,  66846720,   12000,    30000,    12000,  30000,    4, Level::LEVEL4,   "4",   40 },
+    { 2228224,  133693440,  20000,    50000,    20000,  50000,    4, Level::LEVEL4_1, "4.1", 41 },
+    { 8912896,  267386880,  25000,    100000,   25000,  100000,   6, Level::LEVEL5,   "5",   50 },
+    { 8912896,  534773760,  40000,    160000,   40000,  160000,   8, Level::LEVEL5_1, "5.1", 51 },
+    { 8912896,  1069547520, 60000,    240000,   60000,  240000,   8, Level::LEVEL5_2, "5.2", 52 },
+    { 35651584, 1069547520, 60000,    240000,   60000,  240000,   8, Level::LEVEL6,   "6",   60 },
+    { 35651584, 2139095040, 120000,   480000,   120000, 480000,   8, Level::LEVEL6_1, "6.1", 61 },
+    { 35651584, 4278190080U, 240000,  800000,   240000, 800000,   6, Level::LEVEL6_2, "6.2", 62 },
 };
 
 /* determine minimum decoder level required to decode the described video */
@@ -69,7 +71,6 @@
          * Technically, Mainstillpicture implies one picture per bitstream but
          * we do not enforce this limit. We do repeat SPS, PPS, and VPS each
          * frame */
-
         if (param.internalBitDepth == 8 && param.keyframeMax == 1)
             vps.ptl.profileIdc = Profile::MAINSTILLPICTURE;
         if (param.internalBitDepth == 8)
@@ -81,7 +82,7 @@
 
     uint32_t lumaSamples = param.sourceWidth * param.sourceHeight;
     uint32_t samplesPerSec = (uint32_t)(lumaSamples * ((double)param.fpsNum / param.fpsDenom));
-    uint32_t bitrate = param.rc.bitrate ? param.rc.bitrate : param.rc.vbvMaxBitrate;
+    uint32_t bitrate = param.rc.vbvMaxBitrate ? param.rc.vbvMaxBitrate : param.rc.bitrate;
 
     const uint32_t MaxDpbPicBuf = 6;
     vps.ptl.levelIdc = Level::NONE;
@@ -95,6 +96,8 @@
             continue;
         else if (samplesPerSec > levels[i].maxLumaSamplesPerSecond)
             continue;
+        else if (bitrate > levels[i].maxBitrateMain && levels[i].maxBitrateHigh == MAX_UINT)
+            continue;
         else if (bitrate > levels[i].maxBitrateHigh)
             continue;
         else if (param.sourceWidth > sqrt(levels[i].maxLumaSamples * 8.0f))
@@ -117,7 +120,12 @@
 
         /* For level 5 and higher levels, the value of CtbSizeY shall be equal to 32 or 64 */
         if (levels[i].levelEnum >= Level::LEVEL5 && param.maxCUSize < 32)
-            continue;
+            x265_log(&param, X265_LOG_WARNING, "CTU size is too small, stream will be non-compliant for level %s\n", levels[i].name);
+
+        /* The value of NumPocTotalCurr shall be less than or equal to 8 */
+        int numPocTotalCurr = param.maxNumReferences + !!param.bframes;
+        if (numPocTotalCurr > 8)
+            x265_log(&param, X265_LOG_WARNING, "Too many reference frames, stream will be non-compliant for level %s\n", levels[i].name);
 
         vps.ptl.levelIdc = levels[i].levelEnum;
         if (bitrate > levels[i].maxBitrateMain && bitrate <= levels[i].maxBitrateHigh &&
@@ -125,20 +133,9 @@
             vps.ptl.tierFlag = Level::HIGH;
         else
             vps.ptl.tierFlag = Level::MAIN;
-        /* TODO: The value of NumPocTotalCurr shall be less than or equal to 8 */
         break;
     }
 
-    /* if the user supplied no bitrate, but supplied a level which is higher
-     * than the current detected level, assume the user knows that the bitrate
-     * will be high and use their specified level */
-    if (!param.rc.bitrate && i + 1 < NumLevels && levels[i].levelIdc < param.levelIdc)
-    {
-        while (i + 1 < NumLevels && levels[i].levelIdc < param.levelIdc)
-            i++;
-        vps.ptl.levelIdc = levels[i].levelEnum;
-    }
-
     static const char *profiles[] = { "None", "Main", "Main10", "Mainstillpicture" };
     static const char *tiers[]    = { "Main", "High" };
     x265_log(&param, X265_LOG_INFO, "%s profile, Level-%s (%s tier)\n",
@@ -151,31 +148,35 @@
  * decoder meeting this level of requirement.  Some parameters (resolution and
  * frame rate) are non-negotiable and thus this function may fail. In those
  * circumstances it will be quite noisy */
-void enforceLevel(x265_param& param, VPS& vps)
+bool enforceLevel(x265_param& param, VPS& vps)
 {
     vps.numReorderPics = (param.bBPyramid && param.bframes > 1) ? 2 : 1;
     vps.maxDecPicBuffering = X265_MIN(MAX_NUM_REF, X265_MAX(vps.numReorderPics + 1, (uint32_t)param.maxNumReferences) + vps.numReorderPics);
 
-    if (param.levelIdc < 0)
-        return;
+    /* no level specified by user, just auto-detect from the configuration */
+    if (param.levelIdc <= 0)
+        return true;
+
+    int level = 0;
+    while (levels[level].levelIdc != param.levelIdc && level + 1 < sizeof(levels) / sizeof(levels[0]))
+        level++;
+    if (levels[level].levelIdc != param.levelIdc)
+    {
+        x265_log(&param, X265_LOG_WARNING, "specified level %d does not exist\n", param.levelIdc);
+        return false;
+    }
+
+    LevelSpec& l = levels[level];
+    bool highTier = !!param.bHighTier;
+    if (highTier && l.maxBitrateHigh == MAX_UINT)
+    {
+        highTier = false;
+        x265_log(&param, X265_LOG_WARNING, "Level %s has no High tier, using Main tier\n", l.name);
+    }
 
     uint32_t lumaSamples = param.sourceWidth * param.sourceHeight;
     uint32_t samplesPerSec = (uint32_t)(lumaSamples * ((double)param.fpsNum / param.fpsDenom));
-    int level = 1;
-    while (levels[level].levelIdc < param.levelIdc && levels[level].levelIdc)
-        level++;
-    LevelSpec& l = levels[level];
-
-    if (!l.levelIdc)
-    {
-        x265_log(&param, X265_LOG_WARNING, "specified level does not exist\n");
-        return;
-    }
-    if (l.levelIdc != param.levelIdc)
-        x265_log(&param, X265_LOG_WARNING, "Using nearest matching level %s\n", l.name);
-
     bool ok = true;
-
     if (lumaSamples > l.maxLumaSamples)
         ok = false;
     else if (param.sourceWidth > sqrt(l.maxLumaSamples * 8.0f))
@@ -183,16 +184,60 @@
     else if (param.sourceHeight > sqrt(l.maxLumaSamples * 8.0f))
         ok = false;
     if (!ok)
+    {
         x265_log(&param, X265_LOG_WARNING, "picture dimensions are out of range for specified level\n");
+        return false;
+    }
     else if (samplesPerSec > l.maxLumaSamplesPerSecond)
+    {
         x265_log(&param, X265_LOG_WARNING, "frame rate is out of range for specified level\n");
-
-    if (param.rc.bitrate > (int)l.maxBitrateHigh && l.maxBitrateHigh != MAX_UINT)
-    {
-        param.rc.bitrate = l.maxBitrateHigh;
-        x265_log(&param, X265_LOG_INFO, "Lowering target bitrate to High tier limit of %dKbps\n", param.rc.bitrate);
+        return false;
     }
 
+    if ((uint32_t)param.rc.vbvMaxBitrate > (highTier ? l.maxBitrateHigh : l.maxBitrateMain))
+    {
+        param.rc.vbvMaxBitrate = highTier ? l.maxBitrateHigh : l.maxBitrateMain;
+        x265_log(&param, X265_LOG_INFO, "lowering VBV max bitrate to %dKbps\n", param.rc.vbvMaxBitrate);
+    }
+    if ((uint32_t)param.rc.vbvBufferSize > (highTier ? l.maxCpbSizeHigh : l.maxCpbSizeMain))
+    {
+        param.rc.vbvMaxBitrate = highTier ? l.maxCpbSizeHigh : l.maxCpbSizeMain;
+        x265_log(&param, X265_LOG_INFO, "lowering VBV buffer size to %dKb\n", param.rc.vbvBufferSize);
+    }
+
+    switch (param.rc.rateControlMode)
+    {
+    case X265_RC_ABR:
+        if ((uint32_t)param.rc.bitrate > (highTier ? l.maxBitrateHigh : l.maxBitrateMain))
+        {
+            param.rc.bitrate = l.maxBitrateHigh;
+            x265_log(&param, X265_LOG_INFO, "lowering target bitrate to High tier limit of %dKbps\n", param.rc.bitrate);
+        }
+        break;
+
+    case X265_RC_CQP:
+        x265_log(&param, X265_LOG_WARNING, "Constant QP is inconsistent with specifying a decoder level, no bitrate guarantee is possible.\n");
+        return false;
+
+    case X265_RC_CRF:
+        if (!param.rc.vbvBufferSize || !param.rc.vbvMaxBitrate)
+        {
+            if (!param.rc.vbvMaxBitrate)
+                param.rc.vbvMaxBitrate = highTier ? l.maxBitrateHigh : l.maxBitrateMain;
+            if (!param.rc.vbvBufferSize)
+                param.rc.vbvBufferSize = highTier ? l.maxCpbSizeHigh : l.maxCpbSizeMain;
+            x265_log(&param, X265_LOG_WARNING, "Specifying a decoder level with constant rate factor rate-control requires\n");
+            x265_log(&param, X265_LOG_WARNING, "enabling VBV with vbv-bufsize=%dkb vbv-maxrate=%dkbps. VBV outputs are non-deterministic!\n",
+                     param.rc.vbvBufferSize, param.rc.vbvMaxBitrate);
+        }
+        break;
+
+    default:
+        x265_log(&param, X265_LOG_ERROR, "Unknown rate control mode is inconsistent with specifying a decoder level\n");
+        return false;
+    }
+
+    /* The value of sps_max_dec_pic_buffering_minus1[ HighestTid ] + 1 shall be less than or equal to MaxDpbSize */
     const uint32_t MaxDpbPicBuf = 6;
     uint32_t maxDpbSize = MaxDpbPicBuf;
     if (lumaSamples <= (l.maxLumaSamples >> 2))
@@ -208,8 +253,24 @@
         param.maxNumReferences--;
         vps.maxDecPicBuffering = X265_MIN(MAX_NUM_REF, X265_MAX(vps.numReorderPics + 1, (uint32_t)param.maxNumReferences) + vps.numReorderPics);
     }
-
     if (param.maxNumReferences != savedRefCount)
         x265_log(&param, X265_LOG_INFO, "Lowering max references to %d to meet level requirement\n", param.maxNumReferences);
+
+    /* For level 5 and higher levels, the value of CtbSizeY shall be equal to 32 or 64 */
+    if (param.levelIdc >= 50 && param.maxCUSize < 32)
+    {
+        param.maxCUSize = 32;
+        x265_log(&param, X265_LOG_INFO, "Levels 5.0 and above require a maximum CTU size of at least 32, using --ctu 32\n");
+    }
+
+    /* The value of NumPocTotalCurr shall be less than or equal to 8 */
+    int numPocTotalCurr = param.maxNumReferences + !!param.bframes;
+    if (numPocTotalCurr > 8)
+    {
+        param.maxNumReferences = 8 - !!param.bframes;
+        x265_log(&param, X265_LOG_INFO, "Lowering max references to %d to meet numPocTotalCurr requirement\n", param.maxNumReferences);
+    }
+
+    return true;
 }
 }
diff -r 5835fa3a8281 -r 14ecc7996a6c source/encoder/level.h
--- a/source/encoder/level.h	Mon Jul 21 20:03:32 2014 -0500
+++ b/source/encoder/level.h	Mon Jul 21 19:49:19 2014 -0500
@@ -32,7 +32,7 @@
 
 struct VPS;
 void determineLevel(const x265_param &param, VPS& vps);
-void enforceLevel(x265_param& param, VPS& vps);
+bool enforceLevel(x265_param& param, VPS& vps);
 
 }
 
diff -r 5835fa3a8281 -r 14ecc7996a6c source/x265.cpp
--- a/source/x265.cpp	Mon Jul 21 20:03:32 2014 -0500
+++ b/source/x265.cpp	Mon Jul 21 19:49:19 2014 -0500
@@ -70,7 +70,9 @@
     { "frame-threads",  required_argument, NULL, 'F' },
     { "log-level",      required_argument, NULL, 0 },
     { "profile",        required_argument, NULL, 0 },
-    { "level",          required_argument, NULL, 0 },
+    { "level-idc",      required_argument, NULL, 0 },
+    { "high-tier",            no_argument, NULL, 0 },
+    { "no-high-tier",         no_argument, NULL, 0 },
     { "csv",            required_argument, NULL, 0 },
     { "no-cu-stats",          no_argument, NULL, 0 },
     { "cu-stats",             no_argument, NULL, 0 },
@@ -333,7 +335,8 @@
     H0("   --input-res WxH               Source picture size [w x h], auto-detected if Y4M\n");
     H0("   --input-csp <string>          Source color space: i420, i444 or i422, auto-detected if Y4M. Default: i420\n");
     H0("   --profile <string>            Specify the encode profile: main, main10, mainstillpicture\n");
-    H0("   --level <integer|float>       Force a minumum required decoder level (as '5.0' or '50')\n");
+    H0("   --level-idc <integer|float>   Force a minumum required decoder level (as '5.0' or '50')\n");
+    H0("   --high-tier                   If a decoder level is specified, this modifier selects High tier of that level\n");
     H0("   --fps <float|rational>        Source frame rate (float or num/denom), auto-detected if Y4M\n");
     H0("   --[no-]interlace <bff|tff>    Indicate input pictures are interlace fields in temporal order. Default progressive\n");
     H0("   --seek <integer>              First frame to encode\n");
diff -r 5835fa3a8281 -r 14ecc7996a6c source/x265.h
--- a/source/x265.h	Mon Jul 21 20:03:32 2014 -0500
+++ b/source/x265.h	Mon Jul 21 19:49:19 2014 -0500
@@ -390,7 +390,7 @@
      * minimum requirement. All valid HEVC heights are supported */
     int       sourceHeight;
 
-    /* Minimum decoder requirement level. Defaults to -1, which implies auto-
+    /* Minimum decoder requirement level. Defaults to 0, which implies auto-
      * detection by the encoder. If specified, the encoder will attempt to bring
      * the encode specifications within that specified level. If the encoder is
      * unable to reach the level it issues a warning and emits the actual
@@ -400,6 +400,10 @@
      * "5.1" is specified as 51, and level "5.0" is specified as 50. */
     int       levelIdc;
 
+    /* if levelIdc is specified (non-zero) this flag will differentiate between
+     * Main (0) and High (1) tier. Default is Main tier (0) */
+    int       bHighTier;
+
     /* Interlace type of source pictures. 0 - progressive pictures (default).
      * 1 - top field first, 2 - bottom field first. HEVC encodes interlaced
      * content as fields, they must be provided to the encoder in the correct


More information about the x265-devel mailing list