[x265] [PATCH] encoder: Add support for Temporal Layering of the encoded bitstream

aarthi at multicorewareinc.com aarthi at multicorewareinc.com
Mon Feb 2 15:14:12 CET 2015


# HG changeset patch
# User Aarthi Thirumalai
# Date 1422885719 -19800
#      Mon Feb 02 19:31:59 2015 +0530
# Node ID e241b255b992fdf5c83c1744240ff5a6beaa188b
# Parent  1760823cdd46572b5db364cb93495bbff6908e17
encoder: Add support for Temporal Layering of the encoded bitstream.

use --temporal-layers to enable Temporal Sub Layers while encoding, signals NAL units of coded slices
with their temporalId. Output bitstreams can be extracted either at the base temporal layer
(layer 0) with roughly half the frame rate or at a higher temporal layer (layer 1)
that decodes all the frames in the sequence.

diff -r 1760823cdd46 -r e241b255b992 doc/reST/cli.rst
--- a/doc/reST/cli.rst	Mon Feb 02 16:27:07 2015 +0530
+++ b/doc/reST/cli.rst	Mon Feb 02 19:31:59 2015 +0530
@@ -1048,15 +1048,6 @@
 	target bitrate in CBR mode. Bitrate adherence is prioritised
 	over quality. Rate tolerance is reduced to 50%. Default disabled.
 	
-	This option is for use-cases which require the final average bitrate 
-	to be within very strict limits of the target - preventing overshoots 
-	completely, and achieve bitrates within 5% of target bitrate, 
-	especially in short segment encodes. Typically, the encoder stays 
-	conservative, waiting until there is enough feedback in terms of 
-	encoded frames to control QP. strict-cbr allows the encoder to be 
-	more aggressive in hitting the target bitrate even for short segment 
-	videos. Experimental.
-	
 .. option:: --cbqpoffs <integer>
 
 	Offset of Cb chroma QP from the luma QP selected by rate control.
@@ -1097,6 +1088,12 @@
 	The maximum single adjustment in QP allowed to rate control. Default
 	4
 
+.. option:: --ratetol <float>
+
+	The degree of rate fluctuation that x265 tolerates. Rate tolerance
+	is used along with overflow (difference between actual and target
+	bitrate), to adjust qp. Default is 1.0
+
 .. option:: --qblur <float>
 
 	Temporally blur quants. Default 0.5
@@ -1374,6 +1371,12 @@
 	1. MD5
 	2. CRC
 	3. Checksum
+	
+.. option:: --temporal-layers,--no-temporal-layers
+
+	Enable Temporal Sub Layers in the bitstream and signal the temporal layer ids
+	in the VPS, SPS and coded slice NAL unit headers. As of now, 
+	maxTemporalSubLayers that can be enabled = 2 when this option is turned on. 
 
 Debugging options
 =================
diff -r 1760823cdd46 -r e241b255b992 source/common/param.cpp
--- a/source/common/param.cpp	Mon Feb 02 16:27:07 2015 +0530
+++ b/source/common/param.cpp	Mon Feb 02 19:31:59 2015 +0530
@@ -181,6 +181,7 @@
     param->bIntraInBFrames = 0;
     param->bLossless = 0;
     param->bCULossless = 0;
+    param->bEnableTemporalSubLayers = 1;
 
     /* Rate control options */
     param->rc.vbvMaxBitrate = 0;
@@ -806,6 +807,10 @@
     OPT("scaling-list") p->scalingLists = strdup(value);
     OPT("lambda-file") p->rc.lambdaFileName = strdup(value);
     OPT("analysis-file") p->analysisFileName = strdup(value);
+    OPT("temporal-layers")
+    {
+        p->bEnableTemporalSubLayers = atobool(value);
+    }
     else
         return X265_PARAM_BAD_NAME;
 #undef OPT
diff -r 1760823cdd46 -r e241b255b992 source/common/slice.h
--- a/source/common/slice.h	Mon Feb 02 16:27:07 2015 +0530
+++ b/source/common/slice.h	Mon Feb 02 19:31:59 2015 +0530
@@ -149,6 +149,7 @@
 
 struct VPS
 {
+    uint32_t         maxTempSubLayers;
     uint32_t         numReorderPics;
     uint32_t         maxDecPicBuffering;
     HRDInfo          hrdParameters;
@@ -228,6 +229,7 @@
     bool     bUseAMP; // use param
     uint32_t maxAMPDepth;
 
+    uint32_t maxTempSubLayers; // max number of Temporal Sub layers
     uint32_t maxDecPicBuffering; // these are dups of VPS values
     int      numReorderPics;
     int      maxLatencyIncrease;
diff -r 1760823cdd46 -r e241b255b992 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Mon Feb 02 16:27:07 2015 +0530
+++ b/source/encoder/encoder.cpp	Mon Feb 02 19:31:59 2015 +0530
@@ -1331,6 +1331,7 @@
     sps->bUseAMP = m_param->bEnableAMP;
     sps->maxAMPDepth = m_param->bEnableAMP ? g_maxCUDepth : 0;
 
+    sps->maxTempSubLayers = m_param->bEnableTemporalSubLayers ? 2 : 1;
     sps->maxDecPicBuffering = m_vps.maxDecPicBuffering;
     sps->numReorderPics = m_vps.numReorderPics;
     sps->maxLatencyIncrease = m_param->bframes;
@@ -1540,6 +1541,16 @@
         p->bDistributeMotionEstimation = p->bDistributeModeAnalysis = 0;
     }
 
+    if (p->bEnableTemporalSubLayers)
+    {
+        if (p->bFrameAdaptive)
+            x265_log(p, X265_LOG_WARNING, "Scalable Video Coding needs fixed GOP structure, requires --b-adapt 0\n");
+        p->bFrameAdaptive = 0;
+        if (p->scenecutThreshold)
+            x265_log(p, X265_LOG_WARNING, "Scalable Video Coding needs fixed GOP structure, requires --scenecut 0\n");
+        p->scenecutThreshold = 0;
+    }
+
     m_bframeDelay = p->bframes ? (p->bBPyramid ? 2 : 1) : 0;
 
     p->bFrameBias = X265_MIN(X265_MAX(-90, p->bFrameBias), 100);
diff -r 1760823cdd46 -r e241b255b992 source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp	Mon Feb 02 16:27:07 2015 +0530
+++ b/source/encoder/entropy.cpp	Mon Feb 02 19:31:59 2015 +0530
@@ -51,17 +51,20 @@
     WRITE_CODE(0,       4, "vps_video_parameter_set_id");
     WRITE_CODE(3,       2, "vps_reserved_three_2bits");
     WRITE_CODE(0,       6, "vps_reserved_zero_6bits");
-    WRITE_CODE(0,       3, "vps_max_sub_layers_minus1");
-    WRITE_FLAG(1,          "vps_temporal_id_nesting_flag");
+    WRITE_CODE(vps.maxTempSubLayers - 1,       3, "vps_max_sub_layers_minus1");
+    WRITE_FLAG(vps.maxTempSubLayers == 1 ? 1 : 0, "vps_temporal_id_nesting_flag");
     WRITE_CODE(0xffff, 16, "vps_reserved_ffff_16bits");
 
-    codeProfileTier(vps.ptl);
+    codeProfileTier(vps.ptl ,vps.maxTempSubLayers - 1);
 
     WRITE_FLAG(true, "vps_sub_layer_ordering_info_present_flag");
-    WRITE_UVLC(vps.maxDecPicBuffering - 1, "vps_max_dec_pic_buffering_minus1[i]");
-    WRITE_UVLC(vps.numReorderPics,         "vps_num_reorder_pics[i]");
+    for(uint32_t i = 0; i <= vps.maxTempSubLayers - 1; i++)
+    {
+        WRITE_UVLC(vps.maxDecPicBuffering - 1, "vps_max_dec_pic_buffering_minus1[i]");
+        WRITE_UVLC(vps.numReorderPics,         "vps_num_reorder_pics[i]");
+        WRITE_UVLC(0,    "vps_max_latency_increase_plus1[i]");
+    }
 
-    WRITE_UVLC(0,    "vps_max_latency_increase_plus1[i]");
     WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
     WRITE_UVLC(0,    "vps_max_op_sets_minus1");
     WRITE_FLAG(0,    "vps_timing_info_present_flag"); /* we signal timing info in SPS-VUI */
@@ -71,10 +74,10 @@
 void Entropy::codeSPS(const SPS& sps, const ScalingList& scalingList, const ProfileTierLevel& ptl)
 {
     WRITE_CODE(0, 4, "sps_video_parameter_set_id");
-    WRITE_CODE(0, 3, "sps_max_sub_layers_minus1");
-    WRITE_FLAG(1,    "sps_temporal_id_nesting_flag");
+    WRITE_CODE(sps.maxTempSubLayers - 1, 3, "sps_max_sub_layers_minus1");
+    WRITE_FLAG(sps.maxTempSubLayers == 1 ? 1 : 0, "sps_temporal_id_nesting_flag");
 
-    codeProfileTier(ptl);
+    codeProfileTier(ptl, sps.maxTempSubLayers - 1);
 
     WRITE_UVLC(0, "sps_seq_parameter_set_id");
     WRITE_UVLC(sps.chromaFormatIdc, "chroma_format_idc");
@@ -101,9 +104,12 @@
     WRITE_UVLC(BITS_FOR_POC - 4, "log2_max_pic_order_cnt_lsb_minus4");
     WRITE_FLAG(true,             "sps_sub_layer_ordering_info_present_flag");
 
-    WRITE_UVLC(sps.maxDecPicBuffering - 1, "sps_max_dec_pic_buffering_minus1[i]");
-    WRITE_UVLC(sps.numReorderPics,         "sps_num_reorder_pics[i]");
-    WRITE_UVLC(sps.maxLatencyIncrease + 1, "sps_max_latency_increase_plus1[i]");
+    for(uint32_t i = 0; i <= sps.maxTempSubLayers - 1; i++)
+    {
+        WRITE_UVLC(sps.maxDecPicBuffering - 1, "sps_max_dec_pic_buffering_minus1[i]");
+        WRITE_UVLC(sps.numReorderPics,         "sps_num_reorder_pics[i]");
+        WRITE_UVLC(sps.maxLatencyIncrease + 1, "sps_max_latency_increase_plus1[i]");
+    }
 
     WRITE_UVLC(sps.log2MinCodingBlockSize - 3,    "log2_min_coding_block_size_minus3");
     WRITE_UVLC(sps.log2DiffMaxMinCodingBlockSize, "log2_diff_max_min_coding_block_size");
@@ -184,7 +190,7 @@
     WRITE_FLAG(0, "pps_extension_flag");
 }
 
-void Entropy::codeProfileTier(const ProfileTierLevel& ptl)
+void Entropy::codeProfileTier(const ProfileTierLevel& ptl, int maxTempSubLayerMinus1)
 {
     WRITE_CODE(0, 2,                "XXX_profile_space[]");
     WRITE_FLAG(ptl.tierFlag,        "XXX_tier_flag[]");
@@ -222,6 +228,13 @@
     }
 
     WRITE_CODE(ptl.levelIdc, 8, "general_level_idc");
+    if (maxTempSubLayerMinus1 > 0)
+    {
+      WRITE_FLAG(0, "sub_layer_profile_present_flag[i]");
+      WRITE_FLAG(0, "sub_layer_level_present_flag[i]");
+      for (int i = maxTempSubLayerMinus1; i < 8 ; i++)
+          WRITE_CODE(0, 2, "reserved_zero_2bits");
+    }
 }
 
 void Entropy::codeVUI(const VUI& vui)
@@ -331,24 +344,27 @@
 
 void Entropy::codeHrdParameters(const HRDInfo& hrd)
 {
-    WRITE_FLAG(1, "nal_hrd_parameters_present_flag");
-    WRITE_FLAG(0, "vcl_hrd_parameters_present_flag");
-    WRITE_FLAG(0, "sub_pic_hrd_params_present_flag");
+    for(int i = 0; i <= 1; i++)
+    {
+        WRITE_FLAG(1, "nal_hrd_parameters_present_flag");
+        WRITE_FLAG(0, "vcl_hrd_parameters_present_flag");
+        WRITE_FLAG(0, "sub_pic_hrd_params_present_flag");
 
-    WRITE_CODE(hrd.bitRateScale, 4, "bit_rate_scale");
-    WRITE_CODE(hrd.cpbSizeScale, 4, "cpb_size_scale");
+        WRITE_CODE(hrd.bitRateScale, 4, "bit_rate_scale");
+        WRITE_CODE(hrd.cpbSizeScale, 4, "cpb_size_scale");
 
-    WRITE_CODE(hrd.initialCpbRemovalDelayLength - 1, 5, "initial_cpb_removal_delay_length_minus1");
-    WRITE_CODE(hrd.cpbRemovalDelayLength - 1,        5, "au_cpb_removal_delay_length_minus1");
-    WRITE_CODE(hrd.dpbOutputDelayLength - 1,         5, "dpb_output_delay_length_minus1");
+        WRITE_CODE(hrd.initialCpbRemovalDelayLength - 1, 5, "initial_cpb_removal_delay_length_minus1");
+        WRITE_CODE(hrd.cpbRemovalDelayLength - 1,        5, "au_cpb_removal_delay_length_minus1");
+        WRITE_CODE(hrd.dpbOutputDelayLength - 1,         5, "dpb_output_delay_length_minus1");
 
-    WRITE_FLAG(1, "fixed_pic_rate_general_flag");
-    WRITE_UVLC(0, "elemental_duration_in_tc_minus1");
-    WRITE_UVLC(0, "cpb_cnt_minus1");
+        WRITE_FLAG(1, "fixed_pic_rate_general_flag");
+        WRITE_UVLC(0, "elemental_duration_in_tc_minus1");
+        WRITE_UVLC(0, "cpb_cnt_minus1");
 
-    WRITE_UVLC(hrd.bitRateValue - 1, "bit_rate_value_minus1");
-    WRITE_UVLC(hrd.cpbSizeValue - 1, "cpb_size_value_minus1");
-    WRITE_FLAG(hrd.cbrFlag, "cbr_flag");
+        WRITE_UVLC(hrd.bitRateValue - 1, "bit_rate_value_minus1");
+        WRITE_UVLC(hrd.cpbSizeValue - 1, "cpb_size_value_minus1");
+        WRITE_FLAG(hrd.cbrFlag, "cbr_flag");
+    }
 }
 
 void Entropy::codeAUD(const Slice& slice)
diff -r 1760823cdd46 -r e241b255b992 source/encoder/entropy.h
--- a/source/encoder/entropy.h	Mon Feb 02 16:27:07 2015 +0530
+++ b/source/encoder/entropy.h	Mon Feb 02 19:31:59 2015 +0530
@@ -230,7 +230,7 @@
     void writeEpExGolomb(uint32_t symbol, uint32_t count);
     void writeCoefRemainExGolomb(uint32_t symbol, const uint32_t absGoRice);
 
-    void codeProfileTier(const ProfileTierLevel& ptl);
+    void codeProfileTier(const ProfileTierLevel& ptl, int maxTempSubLayerMinus1);
     void codeScalingList(const ScalingList&);
     void codeScalingList(const ScalingList& scalingList, uint32_t sizeId, uint32_t listId);
 
diff -r 1760823cdd46 -r e241b255b992 source/encoder/level.cpp
--- a/source/encoder/level.cpp	Mon Feb 02 16:27:07 2015 +0530
+++ b/source/encoder/level.cpp	Mon Feb 02 19:31:59 2015 +0530
@@ -60,6 +60,7 @@
 /* determine minimum decoder level required to decode the described video */
 void determineLevel(const x265_param &param, VPS& vps)
 {
+    vps.maxTempSubLayers = param.bEnableTemporalSubLayers ? 2 : 1;
     if (param.bLossless)
         vps.ptl.profileIdc = Profile::NONE;
     else if (param.internalCsp == X265_CSP_I420)
diff -r 1760823cdd46 -r e241b255b992 source/encoder/nal.cpp
--- a/source/encoder/nal.cpp	Mon Feb 02 16:27:07 2015 +0530
+++ b/source/encoder/nal.cpp	Mon Feb 02 19:31:59 2015 +0530
@@ -107,6 +107,9 @@
      * nuh_reserved_zero_6bits  6-bits
      * nuh_temporal_id_plus1    3-bits */
     out[bytes++] = (uint8_t)nalUnitType << 1;
+    if (nalUnitType == NAL_UNIT_CODED_SLICE_TSA_N)
+        out[bytes++] = 2;
+    else
     out[bytes++] = 1;
 
     /* 7.4.1 ...
diff -r 1760823cdd46 -r e241b255b992 source/x265.h
--- a/source/x265.h	Mon Feb 02 16:27:07 2015 +0530
+++ b/source/x265.h	Mon Feb 02 19:31:59 2015 +0530
@@ -789,6 +789,12 @@
      * CU. */
     int       bCULossless;
 
+    /* Enable Temporal Sub Layers while encoding, signals NAL units of coded slices
+     * with their temporalId. Output bitstreams can be extracted either at the base temporal layer
+     * (layer 0) with roughly half the frame rate or at a higher temporal layer (layer 1)
+     * that decodes all the frames in the sequence. */
+    int       bEnableTemporalSubLayers;
+
     /*== Rate Control ==*/
 
     struct
diff -r 1760823cdd46 -r e241b255b992 source/x265cli.h
--- a/source/x265cli.h	Mon Feb 02 16:27:07 2015 +0530
+++ b/source/x265cli.h	Mon Feb 02 19:31:59 2015 +0530
@@ -193,6 +193,7 @@
     { "analysis-mode",  required_argument, NULL, 0 },
     { "analysis-file",  required_argument, NULL, 0 },
     { "strict-cbr",           no_argument, NULL, 0 },
+    { "temporal-layers",        no_argument, NULL, 0 },
     { 0, 0, 0, 0 },
     { 0, 0, 0, 0 },
     { 0, 0, 0, 0 },


More information about the x265-devel mailing list