[x265] [PATCH 1 of 2] encoder: Add support for Temporal Layering of the encoded bitstream

Tue Feb 3 18:07:29 CET 2015

On 02/03, aarthi at multicorewareinc.com wrote:
> # HG changeset patch
> # User Aarthi Thirumalai
> # Date 1422960681 -19800
> #      Tue Feb 03 16:21:21 2015 +0530
> # Node ID 830d29c97117c609585b7c18cc75f120a89ce79e
> # Parent  4583eda4cf55e9a7f5c11d1ea660367f3822af53
> encoder: Add support for Temporal Layering of the encoded bitstream.
> 
> Implements Temporal Sub Layers while encoding, signals NAL units of coded slices
> with their temporalId. Output bitstreams can be extracted either at the base temporal layer
> (layer 0) with roughly half the frame rate or at a higher temporal layer (layer 1)
> that decodes all the frames in the sequence.

queued, with some tweaks

> diff -r 4583eda4cf55 -r 830d29c97117 source/common/param.cpp
> --- a/source/common/param.cpp	Mon Feb 02 17:03:40 2015 +0530
> +++ b/source/common/param.cpp	Tue Feb 03 16:21:21 2015 +0530
> @@ -181,6 +181,7 @@
>      param->bIntraInBFrames = 0;
>      param->bLossless = 0;
>      param->bCULossless = 0;
> +    param->bEnableTemporalSubLayers = 0;
>  
>      /* Rate control options */
>      param->rc.vbvMaxBitrate = 0;
> diff -r 4583eda4cf55 -r 830d29c97117 source/common/slice.h
> --- a/source/common/slice.h	Mon Feb 02 17:03:40 2015 +0530
> +++ b/source/common/slice.h	Tue Feb 03 16:21:21 2015 +0530
> @@ -149,6 +149,7 @@
>  
>  struct VPS
>  {
> +    uint32_t         maxTempSubLayers;
>      uint32_t         numReorderPics;
>      uint32_t         maxDecPicBuffering;
>      HRDInfo          hrdParameters;
> @@ -228,6 +229,7 @@
>      bool     bUseAMP; // use param
>      uint32_t maxAMPDepth;
>  
> +    uint32_t maxTempSubLayers; // max number of Temporal Sub layers
>      uint32_t maxDecPicBuffering; // these are dups of VPS values
>      int      numReorderPics;
>      int      maxLatencyIncrease;
> diff -r 4583eda4cf55 -r 830d29c97117 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp	Mon Feb 02 17:03:40 2015 +0530
> +++ b/source/encoder/encoder.cpp	Tue Feb 03 16:21:21 2015 +0530
> @@ -669,6 +669,14 @@
>              /* determine references, setup RPS, etc */
>              m_dpb->prepareEncode(frameEnc);
>  
> +            /* If temporal Layers are enabled, set tempLayer and NAL Unit Type.
> +             * For now, there are a max of 2 layers, with the non Referenced B frames
> +             * forming the temporal sublayer and other referenced frames are on the base layer */

this comment is new and stale at the same time :)

> +            if (!frameEnc->m_encData->m_bHasReferences && m_param->bEnableTemporalSubLayers)
> +            {
> +                frameEnc->m_encData->m_slice->m_nalUnitType = NAL_UNIT_CODED_SLICE_TSA_N;
> +            }
> +

I've moved this into prepareEncode(), where NAL types are generally
decided.

>              if (m_param->rc.rateControlMode != X265_RC_CQP)
>                  m_lookahead->getEstimatedPictureCost(frameEnc);
>  
> @@ -1413,6 +1421,7 @@
>      sps->bUseAMP = m_param->bEnableAMP;
>      sps->maxAMPDepth = m_param->bEnableAMP ? g_maxCUDepth : 0;
>  
> +    sps->maxTempSubLayers = m_param->bEnableTemporalSubLayers ? 2 : 1;
>      sps->maxDecPicBuffering = m_vps.maxDecPicBuffering;
>      sps->numReorderPics = m_vps.numReorderPics;
>      sps->maxLatencyIncrease = m_param->bframes;
> @@ -1622,6 +1631,19 @@
>          p->bDistributeMotionEstimation = p->bDistributeModeAnalysis = 0;
>      }
>  
> +    if (p->bEnableTemporalSubLayers)
> +    {
> +        if (p->bFrameAdaptive)
> +            x265_log(p, X265_LOG_WARNING, "Scalable Video Coding needs fixed GOP structure, requires --b-adapt 0\n");
> +        p->bFrameAdaptive = 0;
> +        if (p->scenecutThreshold)
> +            x265_log(p, X265_LOG_WARNING, "Scalable Video Coding needs fixed GOP structure, requires --no -scenecut\n");
> +        p->scenecutThreshold = 0;
> +        if (p->bframes != 3)
> +            x265_log(p, X265_LOG_WARNING, "Ideally 3 bframes are needed to generate a base temporal layer bitstream with half the fps, requires --bframes 3\n");
> +        p->bframes = 3;

I don't see a reason to force bframes=3 here

> +    }
> +
>      m_bframeDelay = p->bframes ? (p->bBPyramid ? 2 : 1) : 0;
>  
>      p->bFrameBias = X265_MIN(X265_MAX(-90, p->bFrameBias), 100);
> diff -r 4583eda4cf55 -r 830d29c97117 source/encoder/entropy.cpp
> --- a/source/encoder/entropy.cpp	Mon Feb 02 17:03:40 2015 +0530
> +++ b/source/encoder/entropy.cpp	Tue Feb 03 16:21:21 2015 +0530
> @@ -51,17 +51,22 @@
>      WRITE_CODE(0,       4, "vps_video_parameter_set_id");
>      WRITE_CODE(3,       2, "vps_reserved_three_2bits");
>      WRITE_CODE(0,       6, "vps_reserved_zero_6bits");
> -    WRITE_CODE(0,       3, "vps_max_sub_layers_minus1");
> -    WRITE_FLAG(1,          "vps_temporal_id_nesting_flag");
> +    WRITE_CODE(vps.maxTempSubLayers - 1,       3, "vps_max_sub_layers_minus1");
> +    // Temporal Id Nesting is disabled only when maxTemporalLayers > 1
> +    WRITE_FLAG(!(vps.maxTempSubLayers - 1),       "vps_temporal_id_nesting_flag");
>      WRITE_CODE(0xffff, 16, "vps_reserved_ffff_16bits");
>  
> -    codeProfileTier(vps.ptl);
> +    codeProfileTier(vps.ptl ,vps.maxTempSubLayers - 1);
>  
>      WRITE_FLAG(true, "vps_sub_layer_ordering_info_present_flag");
> -    WRITE_UVLC(vps.maxDecPicBuffering - 1, "vps_max_dec_pic_buffering_minus1[i]");
> -    WRITE_UVLC(vps.numReorderPics,         "vps_num_reorder_pics[i]");
>  
> -    WRITE_UVLC(0,    "vps_max_latency_increase_plus1[i]");
> +    for (uint32_t i = 0; i <= vps.maxTempSubLayers - 1; i++)
> +    {
> +        WRITE_UVLC(vps.maxDecPicBuffering - 1, "vps_max_dec_pic_buffering_minus1[i]");
> +        WRITE_UVLC(vps.numReorderPics,         "vps_num_reorder_pics[i]");
> +        WRITE_UVLC(0,    "vps_max_latency_increase_plus1[i]");
> +    }
> +
>      WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
>      WRITE_UVLC(0,    "vps_max_op_sets_minus1");
>      WRITE_FLAG(0,    "vps_timing_info_present_flag"); /* we signal timing info in SPS-VUI */
> @@ -71,10 +76,11 @@
>  void Entropy::codeSPS(const SPS& sps, const ScalingList& scalingList, const ProfileTierLevel& ptl)
>  {
>      WRITE_CODE(0, 4, "sps_video_parameter_set_id");
> -    WRITE_CODE(0, 3, "sps_max_sub_layers_minus1");
> -    WRITE_FLAG(1,    "sps_temporal_id_nesting_flag");
> +    WRITE_CODE(sps.maxTempSubLayers - 1, 3, "sps_max_sub_layers_minus1");
> +    // Temporal Id Nesting is disabled only when maxTemporalLayers > 1
> +    WRITE_FLAG(!(sps.maxTempSubLayers - 1), "sps_temporal_id_nesting_flag");
>  
> -    codeProfileTier(ptl);
> +    codeProfileTier(ptl, sps.maxTempSubLayers - 1);
>  
>      WRITE_UVLC(0, "sps_seq_parameter_set_id");
>      WRITE_UVLC(sps.chromaFormatIdc, "chroma_format_idc");
> @@ -101,9 +107,12 @@
>      WRITE_UVLC(BITS_FOR_POC - 4, "log2_max_pic_order_cnt_lsb_minus4");
>      WRITE_FLAG(true,             "sps_sub_layer_ordering_info_present_flag");
>  
> -    WRITE_UVLC(sps.maxDecPicBuffering - 1, "sps_max_dec_pic_buffering_minus1[i]");
> -    WRITE_UVLC(sps.numReorderPics,         "sps_num_reorder_pics[i]");
> -    WRITE_UVLC(sps.maxLatencyIncrease + 1, "sps_max_latency_increase_plus1[i]");
> +    for (uint32_t i = 0; i <= sps.maxTempSubLayers - 1; i++)
> +    {
> +        WRITE_UVLC(sps.maxDecPicBuffering - 1, "sps_max_dec_pic_buffering_minus1[i]");
> +        WRITE_UVLC(sps.numReorderPics,         "sps_num_reorder_pics[i]");
> +        WRITE_UVLC(sps.maxLatencyIncrease + 1, "sps_max_latency_increase_plus1[i]");
> +    }
>  
>      WRITE_UVLC(sps.log2MinCodingBlockSize - 3,    "log2_min_coding_block_size_minus3");
>      WRITE_UVLC(sps.log2DiffMaxMinCodingBlockSize, "log2_diff_max_min_coding_block_size");
> @@ -184,7 +193,7 @@
>      WRITE_FLAG(0, "pps_extension_flag");
>  }
>  
> -void Entropy::codeProfileTier(const ProfileTierLevel& ptl)
> +void Entropy::codeProfileTier(const ProfileTierLevel& ptl, int maxTempSubLayerMinus1)
>  {
>      WRITE_CODE(0, 2,                "XXX_profile_space[]");
>      WRITE_FLAG(ptl.tierFlag,        "XXX_tier_flag[]");
> @@ -222,6 +231,14 @@
>      }
>  
>      WRITE_CODE(ptl.levelIdc, 8, "general_level_idc");
> +
> +    if (maxTempSubLayerMinus1)
> +    {
> +         WRITE_FLAG(0, "sub_layer_profile_present_flag[i]");
> +         WRITE_FLAG(0, "sub_layer_level_present_flag[i]");
> +         for (int i = maxTempSubLayerMinus1; i < 8 ; i++)
> +             WRITE_CODE(0, 2, "reserved_zero_2bits");
> +    }
>  }
>  
>  void Entropy::codeVUI(const VUI& vui)
> @@ -331,24 +348,27 @@
>  
>  void Entropy::codeHrdParameters(const HRDInfo& hrd)
>  {
> -    WRITE_FLAG(1, "nal_hrd_parameters_present_flag");
> -    WRITE_FLAG(0, "vcl_hrd_parameters_present_flag");
> -    WRITE_FLAG(0, "sub_pic_hrd_params_present_flag");
> +    for(int i = 0; i <= 1; i++)

white-space fixed here

> +    {
> +        WRITE_FLAG(1, "nal_hrd_parameters_present_flag");
> +        WRITE_FLAG(0, "vcl_hrd_parameters_present_flag");
> +        WRITE_FLAG(0, "sub_pic_hrd_params_present_flag");
>  
> -    WRITE_CODE(hrd.bitRateScale, 4, "bit_rate_scale");
> -    WRITE_CODE(hrd.cpbSizeScale, 4, "cpb_size_scale");
> +        WRITE_CODE(hrd.bitRateScale, 4, "bit_rate_scale");
> +        WRITE_CODE(hrd.cpbSizeScale, 4, "cpb_size_scale");
>  
> -    WRITE_CODE(hrd.initialCpbRemovalDelayLength - 1, 5, "initial_cpb_removal_delay_length_minus1");
> -    WRITE_CODE(hrd.cpbRemovalDelayLength - 1,        5, "au_cpb_removal_delay_length_minus1");
> -    WRITE_CODE(hrd.dpbOutputDelayLength - 1,         5, "dpb_output_delay_length_minus1");
> +        WRITE_CODE(hrd.initialCpbRemovalDelayLength - 1, 5, "initial_cpb_removal_delay_length_minus1");
> +        WRITE_CODE(hrd.cpbRemovalDelayLength - 1,        5, "au_cpb_removal_delay_length_minus1");
> +        WRITE_CODE(hrd.dpbOutputDelayLength - 1,         5, "dpb_output_delay_length_minus1");
>  
> -    WRITE_FLAG(1, "fixed_pic_rate_general_flag");
> -    WRITE_UVLC(0, "elemental_duration_in_tc_minus1");
> -    WRITE_UVLC(0, "cpb_cnt_minus1");
> +        WRITE_FLAG(1, "fixed_pic_rate_general_flag");
> +        WRITE_UVLC(0, "elemental_duration_in_tc_minus1");
> +        WRITE_UVLC(0, "cpb_cnt_minus1");
>  
> -    WRITE_UVLC(hrd.bitRateValue - 1, "bit_rate_value_minus1");
> -    WRITE_UVLC(hrd.cpbSizeValue - 1, "cpb_size_value_minus1");
> -    WRITE_FLAG(hrd.cbrFlag, "cbr_flag");
> +        WRITE_UVLC(hrd.bitRateValue - 1, "bit_rate_value_minus1");
> +        WRITE_UVLC(hrd.cpbSizeValue - 1, "cpb_size_value_minus1");
> +        WRITE_FLAG(hrd.cbrFlag, "cbr_flag");
> +    }
>  }
>  
>  void Entropy::codeAUD(const Slice& slice)
> diff -r 4583eda4cf55 -r 830d29c97117 source/encoder/entropy.h
> --- a/source/encoder/entropy.h	Mon Feb 02 17:03:40 2015 +0530
> +++ b/source/encoder/entropy.h	Tue Feb 03 16:21:21 2015 +0530
> @@ -230,7 +230,7 @@
>      void writeEpExGolomb(uint32_t symbol, uint32_t count);
>      void writeCoefRemainExGolomb(uint32_t symbol, const uint32_t absGoRice);
>  
> -    void codeProfileTier(const ProfileTierLevel& ptl);
> +    void codeProfileTier(const ProfileTierLevel& ptl, int maxTempSubLayerMinus1);
>      void codeScalingList(const ScalingList&);
>      void codeScalingList(const ScalingList& scalingList, uint32_t sizeId, uint32_t listId);
>  
> diff -r 4583eda4cf55 -r 830d29c97117 source/encoder/level.cpp
> --- a/source/encoder/level.cpp	Mon Feb 02 17:03:40 2015 +0530
> +++ b/source/encoder/level.cpp	Tue Feb 03 16:21:21 2015 +0530
> @@ -60,6 +60,7 @@
>  /* determine minimum decoder level required to decode the described video */
>  void determineLevel(const x265_param &param, VPS& vps)
>  {
> +    vps.maxTempSubLayers = param.bEnableTemporalSubLayers ? 2 : 1;
>      if (param.bLossless)
>          vps.ptl.profileIdc = Profile::NONE;
>      else if (param.internalCsp == X265_CSP_I420)
> diff -r 4583eda4cf55 -r 830d29c97117 source/encoder/nal.cpp
> --- a/source/encoder/nal.cpp	Mon Feb 02 17:03:40 2015 +0530
> +++ b/source/encoder/nal.cpp	Tue Feb 03 16:21:21 2015 +0530
> @@ -107,7 +107,7 @@
>       * nuh_reserved_zero_6bits  6-bits
>       * nuh_temporal_id_plus1    3-bits */
>      out[bytes++] = (uint8_t)nalUnitType << 1;
> -    out[bytes++] = 1;
> +    out[bytes++] = 1 + (nalUnitType == NAL_UNIT_CODED_SLICE_TSA_N);
>  
>      /* 7.4.1 ...
>       * Within the NAL unit, the following three-byte sequences shall not occur at
> diff -r 4583eda4cf55 -r 830d29c97117 source/x265.h
> --- a/source/x265.h	Mon Feb 02 17:03:40 2015 +0530
> +++ b/source/x265.h	Tue Feb 03 16:21:21 2015 +0530
> @@ -791,6 +791,12 @@
>       * CU. */
>      int       bCULossless;
>  
> +    /* Enable Temporal Sub Layers while encoding, signals NAL units of coded slices
> +     * with their temporalId. Output bitstreams can be extracted either at the base temporal layer
> +     * (layer 0) with roughly half the frame rate or at a higher temporal layer (layer 1)
> +     * that decodes all the frames in the sequence. */
> +    int       bEnableTemporalSubLayers;
> +
>      /*== Rate Control ==*/
>  
>      struct
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho