[x265] [PATCH] encoder: Add support for Temporal Layering of the encoded bitstream

Mon Feb 2 19:38:19 CET 2015

On 02/02, aarthi at multicorewareinc.com wrote:
> # HG changeset patch
> # User Aarthi Thirumalai
> # Date 1422885719 -19800
> #      Mon Feb 02 19:31:59 2015 +0530
> # Node ID e241b255b992fdf5c83c1744240ff5a6beaa188b
> # Parent  1760823cdd46572b5db364cb93495bbff6908e17
> encoder: Add support for Temporal Layering of the encoded bitstream.
> 
> use --temporal-layers to enable Temporal Sub Layers while encoding, signals NAL units of coded slices
> with their temporalId. Output bitstreams can be extracted either at the base temporal layer
> (layer 0) with roughly half the frame rate or at a higher temporal layer (layer 1)
> that decodes all the frames in the sequence.

implementation looks mostly ok, lots of nits

> 
> diff -r 1760823cdd46 -r e241b255b992 doc/reST/cli.rst
> --- a/doc/reST/cli.rst	Mon Feb 02 16:27:07 2015 +0530
> +++ b/doc/reST/cli.rst	Mon Feb 02 19:31:59 2015 +0530
> @@ -1048,15 +1048,6 @@
>  	target bitrate in CBR mode. Bitrate adherence is prioritised
>  	over quality. Rate tolerance is reduced to 50%. Default disabled.
>  	
> -	This option is for use-cases which require the final average bitrate 
> -	to be within very strict limits of the target - preventing overshoots 
> -	completely, and achieve bitrates within 5% of target bitrate, 
> -	especially in short segment encodes. Typically, the encoder stays 
> -	conservative, waiting until there is enough feedback in terms of 
> -	encoded frames to control QP. strict-cbr allows the encoder to be 
> -	more aggressive in hitting the target bitrate even for short segment 
> -	videos. Experimental.

This doc change and the one below, if deliberate, should be a seperate patch

>  .. option:: --cbqpoffs <integer>
>  
>  	Offset of Cb chroma QP from the luma QP selected by rate control.
> @@ -1097,6 +1088,12 @@
>  	The maximum single adjustment in QP allowed to rate control. Default
>  	4
>  
> +.. option:: --ratetol <float>
> +
> +	The degree of rate fluctuation that x265 tolerates. Rate tolerance
> +	is used along with overflow (difference between actual and target
> +	bitrate), to adjust qp. Default is 1.0
> +
>  .. option:: --qblur <float>
>  
>  	Temporally blur quants. Default 0.5
> @@ -1374,6 +1371,12 @@
>  	1. MD5
>  	2. CRC
>  	3. Checksum
> +	
> +.. option:: --temporal-layers,--no-temporal-layers
> +
> +	Enable Temporal Sub Layers in the bitstream and signal the temporal layer ids
> +	in the VPS, SPS and coded slice NAL unit headers. As of now, 
> +	maxTemporalSubLayers that can be enabled = 2 when this option is turned on. 
>  
>  Debugging options
>  =================
> diff -r 1760823cdd46 -r e241b255b992 source/common/param.cpp
> --- a/source/common/param.cpp	Mon Feb 02 16:27:07 2015 +0530
> +++ b/source/common/param.cpp	Mon Feb 02 19:31:59 2015 +0530
> @@ -181,6 +181,7 @@
>      param->bIntraInBFrames = 0;
>      param->bLossless = 0;
>      param->bCULossless = 0;
> +    param->bEnableTemporalSubLayers = 1;
>  
>      /* Rate control options */
>      param->rc.vbvMaxBitrate = 0;
> @@ -806,6 +807,10 @@
>      OPT("scaling-list") p->scalingLists = strdup(value);
>      OPT("lambda-file") p->rc.lambdaFileName = strdup(value);
>      OPT("analysis-file") p->analysisFileName = strdup(value);
> +    OPT("temporal-layers")
> +    {
> +        p->bEnableTemporalSubLayers = atobool(value);
> +    }

no need for braces, and this option should probably be near the other
slicetype decision options like open-gop and scenecut

>      else
>          return X265_PARAM_BAD_NAME;
>  #undef OPT
> diff -r 1760823cdd46 -r e241b255b992 source/common/slice.h
> --- a/source/common/slice.h	Mon Feb 02 16:27:07 2015 +0530
> +++ b/source/common/slice.h	Mon Feb 02 19:31:59 2015 +0530
> @@ -149,6 +149,7 @@
>  
>  struct VPS
>  {
> +    uint32_t         maxTempSubLayers;
>      uint32_t         numReorderPics;
>      uint32_t         maxDecPicBuffering;
>      HRDInfo          hrdParameters;
> @@ -228,6 +229,7 @@
>      bool     bUseAMP; // use param
>      uint32_t maxAMPDepth;
>  
> +    uint32_t maxTempSubLayers; // max number of Temporal Sub layers
>      uint32_t maxDecPicBuffering; // these are dups of VPS values
>      int      numReorderPics;
>      int      maxLatencyIncrease;
> diff -r 1760823cdd46 -r e241b255b992 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp	Mon Feb 02 16:27:07 2015 +0530
> +++ b/source/encoder/encoder.cpp	Mon Feb 02 19:31:59 2015 +0530
> @@ -1331,6 +1331,7 @@
>      sps->bUseAMP = m_param->bEnableAMP;
>      sps->maxAMPDepth = m_param->bEnableAMP ? g_maxCUDepth : 0;
>  
> +    sps->maxTempSubLayers = m_param->bEnableTemporalSubLayers ? 2 : 1;
>      sps->maxDecPicBuffering = m_vps.maxDecPicBuffering;
>      sps->numReorderPics = m_vps.numReorderPics;
>      sps->maxLatencyIncrease = m_param->bframes;
> @@ -1540,6 +1541,16 @@
>          p->bDistributeMotionEstimation = p->bDistributeModeAnalysis = 0;
>      }
>  
> +    if (p->bEnableTemporalSubLayers)
> +    {
> +        if (p->bFrameAdaptive)
> +            x265_log(p, X265_LOG_WARNING, "Scalable Video Coding needs fixed GOP structure, requires --b-adapt 0\n");
> +        p->bFrameAdaptive = 0;
> +        if (p->scenecutThreshold)
> +            x265_log(p, X265_LOG_WARNING, "Scalable Video Coding needs fixed GOP structure, requires --scenecut 0\n");

nit: we should be recommending --no-scenecut

> +        p->scenecutThreshold = 0;
> +    }
> +
>      m_bframeDelay = p->bframes ? (p->bBPyramid ? 2 : 1) : 0;
>  
>      p->bFrameBias = X265_MIN(X265_MAX(-90, p->bFrameBias), 100);
> diff -r 1760823cdd46 -r e241b255b992 source/encoder/entropy.cpp
> --- a/source/encoder/entropy.cpp	Mon Feb 02 16:27:07 2015 +0530
> +++ b/source/encoder/entropy.cpp	Mon Feb 02 19:31:59 2015 +0530
> @@ -51,17 +51,20 @@
>      WRITE_CODE(0,       4, "vps_video_parameter_set_id");
>      WRITE_CODE(3,       2, "vps_reserved_three_2bits");
>      WRITE_CODE(0,       6, "vps_reserved_zero_6bits");
> -    WRITE_CODE(0,       3, "vps_max_sub_layers_minus1");
> -    WRITE_FLAG(1,          "vps_temporal_id_nesting_flag");
> +    WRITE_CODE(vps.maxTempSubLayers - 1,       3, "vps_max_sub_layers_minus1");
> +    WRITE_FLAG(vps.maxTempSubLayers == 1 ? 1 : 0, "vps_temporal_id_nesting_flag");

!!vps.maxTempSubLayers. We should probably ripoff x264's BOOLIFY macro

>      WRITE_CODE(0xffff, 16, "vps_reserved_ffff_16bits");
>  
> -    codeProfileTier(vps.ptl);
> +    codeProfileTier(vps.ptl ,vps.maxTempSubLayers - 1);
>  
>      WRITE_FLAG(true, "vps_sub_layer_ordering_info_present_flag");
> -    WRITE_UVLC(vps.maxDecPicBuffering - 1, "vps_max_dec_pic_buffering_minus1[i]");
> -    WRITE_UVLC(vps.numReorderPics,         "vps_num_reorder_pics[i]");
> +    for(uint32_t i = 0; i <= vps.maxTempSubLayers - 1; i++)

white-space

> +    {
> +        WRITE_UVLC(vps.maxDecPicBuffering - 1, "vps_max_dec_pic_buffering_minus1[i]");
> +        WRITE_UVLC(vps.numReorderPics,         "vps_num_reorder_pics[i]");
> +        WRITE_UVLC(0,    "vps_max_latency_increase_plus1[i]");
> +    }
>  
> -    WRITE_UVLC(0,    "vps_max_latency_increase_plus1[i]");
>      WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
>      WRITE_UVLC(0,    "vps_max_op_sets_minus1");
>      WRITE_FLAG(0,    "vps_timing_info_present_flag"); /* we signal timing info in SPS-VUI */
> @@ -71,10 +74,10 @@
>  void Entropy::codeSPS(const SPS& sps, const ScalingList& scalingList, const ProfileTierLevel& ptl)
>  {
>      WRITE_CODE(0, 4, "sps_video_parameter_set_id");
> -    WRITE_CODE(0, 3, "sps_max_sub_layers_minus1");
> -    WRITE_FLAG(1,    "sps_temporal_id_nesting_flag");
> +    WRITE_CODE(sps.maxTempSubLayers - 1, 3, "sps_max_sub_layers_minus1");
> +    WRITE_FLAG(sps.maxTempSubLayers == 1 ? 1 : 0, "sps_temporal_id_nesting_flag");

ditto

> -    codeProfileTier(ptl);
> +    codeProfileTier(ptl, sps.maxTempSubLayers - 1);
>  
>      WRITE_UVLC(0, "sps_seq_parameter_set_id");
>      WRITE_UVLC(sps.chromaFormatIdc, "chroma_format_idc");
> @@ -101,9 +104,12 @@
>      WRITE_UVLC(BITS_FOR_POC - 4, "log2_max_pic_order_cnt_lsb_minus4");
>      WRITE_FLAG(true,             "sps_sub_layer_ordering_info_present_flag");
>  
> -    WRITE_UVLC(sps.maxDecPicBuffering - 1, "sps_max_dec_pic_buffering_minus1[i]");
> -    WRITE_UVLC(sps.numReorderPics,         "sps_num_reorder_pics[i]");
> -    WRITE_UVLC(sps.maxLatencyIncrease + 1, "sps_max_latency_increase_plus1[i]");
> +    for(uint32_t i = 0; i <= sps.maxTempSubLayers - 1; i++)
> +    {
> +        WRITE_UVLC(sps.maxDecPicBuffering - 1, "sps_max_dec_pic_buffering_minus1[i]");
> +        WRITE_UVLC(sps.numReorderPics,         "sps_num_reorder_pics[i]");
> +        WRITE_UVLC(sps.maxLatencyIncrease + 1, "sps_max_latency_increase_plus1[i]");
> +    }

At first glance, it seems like these values would be different per
layer, but I am no expert on the subject.

>      WRITE_UVLC(sps.log2MinCodingBlockSize - 3,    "log2_min_coding_block_size_minus3");
>      WRITE_UVLC(sps.log2DiffMaxMinCodingBlockSize, "log2_diff_max_min_coding_block_size");
> @@ -184,7 +190,7 @@
>      WRITE_FLAG(0, "pps_extension_flag");
>  }
>  
> -void Entropy::codeProfileTier(const ProfileTierLevel& ptl)
> +void Entropy::codeProfileTier(const ProfileTierLevel& ptl, int maxTempSubLayerMinus1)
>  {
>      WRITE_CODE(0, 2,                "XXX_profile_space[]");
>      WRITE_FLAG(ptl.tierFlag,        "XXX_tier_flag[]");
> @@ -222,6 +228,13 @@
>      }
>  
>      WRITE_CODE(ptl.levelIdc, 8, "general_level_idc");
> +    if (maxTempSubLayerMinus1 > 0)

if (maxTempSubLayerMinus1)

> +    {
> +      WRITE_FLAG(0, "sub_layer_profile_present_flag[i]");
> +      WRITE_FLAG(0, "sub_layer_level_present_flag[i]");
> +      for (int i = maxTempSubLayerMinus1; i < 8 ; i++)
> +          WRITE_CODE(0, 2, "reserved_zero_2bits");

indent

> +    }
>  }
>  
>  void Entropy::codeVUI(const VUI& vui)
> @@ -331,24 +344,27 @@
>  
>  void Entropy::codeHrdParameters(const HRDInfo& hrd)
>  {
> -    WRITE_FLAG(1, "nal_hrd_parameters_present_flag");
> -    WRITE_FLAG(0, "vcl_hrd_parameters_present_flag");
> -    WRITE_FLAG(0, "sub_pic_hrd_params_present_flag");
> +    for(int i = 0; i <= 1; i++)
> +    {
> +        WRITE_FLAG(1, "nal_hrd_parameters_present_flag");
> +        WRITE_FLAG(0, "vcl_hrd_parameters_present_flag");
> +        WRITE_FLAG(0, "sub_pic_hrd_params_present_flag");
>  
> -    WRITE_CODE(hrd.bitRateScale, 4, "bit_rate_scale");
> -    WRITE_CODE(hrd.cpbSizeScale, 4, "cpb_size_scale");
> +        WRITE_CODE(hrd.bitRateScale, 4, "bit_rate_scale");
> +        WRITE_CODE(hrd.cpbSizeScale, 4, "cpb_size_scale");
>  
> -    WRITE_CODE(hrd.initialCpbRemovalDelayLength - 1, 5, "initial_cpb_removal_delay_length_minus1");
> -    WRITE_CODE(hrd.cpbRemovalDelayLength - 1,        5, "au_cpb_removal_delay_length_minus1");
> -    WRITE_CODE(hrd.dpbOutputDelayLength - 1,         5, "dpb_output_delay_length_minus1");
> +        WRITE_CODE(hrd.initialCpbRemovalDelayLength - 1, 5, "initial_cpb_removal_delay_length_minus1");
> +        WRITE_CODE(hrd.cpbRemovalDelayLength - 1,        5, "au_cpb_removal_delay_length_minus1");
> +        WRITE_CODE(hrd.dpbOutputDelayLength - 1,         5, "dpb_output_delay_length_minus1");
>  
> -    WRITE_FLAG(1, "fixed_pic_rate_general_flag");
> -    WRITE_UVLC(0, "elemental_duration_in_tc_minus1");
> -    WRITE_UVLC(0, "cpb_cnt_minus1");
> +        WRITE_FLAG(1, "fixed_pic_rate_general_flag");
> +        WRITE_UVLC(0, "elemental_duration_in_tc_minus1");
> +        WRITE_UVLC(0, "cpb_cnt_minus1");
>  
> -    WRITE_UVLC(hrd.bitRateValue - 1, "bit_rate_value_minus1");
> -    WRITE_UVLC(hrd.cpbSizeValue - 1, "cpb_size_value_minus1");
> -    WRITE_FLAG(hrd.cbrFlag, "cbr_flag");
> +        WRITE_UVLC(hrd.bitRateValue - 1, "bit_rate_value_minus1");
> +        WRITE_UVLC(hrd.cpbSizeValue - 1, "cpb_size_value_minus1");
> +        WRITE_FLAG(hrd.cbrFlag, "cbr_flag");
> +    }
>  }
>  
>  void Entropy::codeAUD(const Slice& slice)
> diff -r 1760823cdd46 -r e241b255b992 source/encoder/entropy.h
> --- a/source/encoder/entropy.h	Mon Feb 02 16:27:07 2015 +0530
> +++ b/source/encoder/entropy.h	Mon Feb 02 19:31:59 2015 +0530
> @@ -230,7 +230,7 @@
>      void writeEpExGolomb(uint32_t symbol, uint32_t count);
>      void writeCoefRemainExGolomb(uint32_t symbol, const uint32_t absGoRice);
>  
> -    void codeProfileTier(const ProfileTierLevel& ptl);
> +    void codeProfileTier(const ProfileTierLevel& ptl, int maxTempSubLayerMinus1);
>      void codeScalingList(const ScalingList&);
>      void codeScalingList(const ScalingList& scalingList, uint32_t sizeId, uint32_t listId);
>  
> diff -r 1760823cdd46 -r e241b255b992 source/encoder/level.cpp
> --- a/source/encoder/level.cpp	Mon Feb 02 16:27:07 2015 +0530
> +++ b/source/encoder/level.cpp	Mon Feb 02 19:31:59 2015 +0530
> @@ -60,6 +60,7 @@
>  /* determine minimum decoder level required to decode the described video */
>  void determineLevel(const x265_param &param, VPS& vps)
>  {
> +    vps.maxTempSubLayers = param.bEnableTemporalSubLayers ? 2 : 1;
>      if (param.bLossless)
>          vps.ptl.profileIdc = Profile::NONE;
>      else if (param.internalCsp == X265_CSP_I420)
> diff -r 1760823cdd46 -r e241b255b992 source/encoder/nal.cpp
> --- a/source/encoder/nal.cpp	Mon Feb 02 16:27:07 2015 +0530
> +++ b/source/encoder/nal.cpp	Mon Feb 02 19:31:59 2015 +0530
> @@ -107,6 +107,9 @@
>       * nuh_reserved_zero_6bits  6-bits
>       * nuh_temporal_id_plus1    3-bits */
>      out[bytes++] = (uint8_t)nalUnitType << 1;
> +    if (nalUnitType == NAL_UNIT_CODED_SLICE_TSA_N)
> +        out[bytes++] = 2;
> +    else
>      out[bytes++] = 1;

out[bytes++] = 1 + (nalUnitType == NAL_UNIT_CODED_SLICE_TSA_N);

>  
>      /* 7.4.1 ...
> diff -r 1760823cdd46 -r e241b255b992 source/x265.h
> --- a/source/x265.h	Mon Feb 02 16:27:07 2015 +0530
> +++ b/source/x265.h	Mon Feb 02 19:31:59 2015 +0530
> @@ -789,6 +789,12 @@
>       * CU. */
>      int       bCULossless;
>  
> +    /* Enable Temporal Sub Layers while encoding, signals NAL units of coded slices
> +     * with their temporalId. Output bitstreams can be extracted either at the base temporal layer
> +     * (layer 0) with roughly half the frame rate or at a higher temporal layer (layer 1)
> +     * that decodes all the frames in the sequence. */
> +    int       bEnableTemporalSubLayers;
> +
>      /*== Rate Control ==*/
>  
>      struct
> diff -r 1760823cdd46 -r e241b255b992 source/x265cli.h
> --- a/source/x265cli.h	Mon Feb 02 16:27:07 2015 +0530
> +++ b/source/x265cli.h	Mon Feb 02 19:31:59 2015 +0530
> @@ -193,6 +193,7 @@
>      { "analysis-mode",  required_argument, NULL, 0 },
>      { "analysis-file",  required_argument, NULL, 0 },
>      { "strict-cbr",           no_argument, NULL, 0 },
> +    { "temporal-layers",        no_argument, NULL, 0 },

w/s

>      { 0, 0, 0, 0 },
>      { 0, 0, 0, 0 },
>      { 0, 0, 0, 0 },
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho