[x265] [PATCH] encoder: Add support for Temporal Layering of the encoded bitstream
Steve Borho
steve at borho.org
Mon Feb 2 19:38:19 CET 2015
On 02/02, aarthi at multicorewareinc.com wrote:
> # HG changeset patch
> # User Aarthi Thirumalai
> # Date 1422885719 -19800
> # Mon Feb 02 19:31:59 2015 +0530
> # Node ID e241b255b992fdf5c83c1744240ff5a6beaa188b
> # Parent 1760823cdd46572b5db364cb93495bbff6908e17
> encoder: Add support for Temporal Layering of the encoded bitstream.
>
> use --temporal-layers to enable Temporal Sub Layers while encoding, signals NAL units of coded slices
> with their temporalId. Output bitstreams can be extracted either at the base temporal layer
> (layer 0) with roughly half the frame rate or at a higher temporal layer (layer 1)
> that decodes all the frames in the sequence.
implementation looks mostly ok, lots of nits
>
> diff -r 1760823cdd46 -r e241b255b992 doc/reST/cli.rst
> --- a/doc/reST/cli.rst Mon Feb 02 16:27:07 2015 +0530
> +++ b/doc/reST/cli.rst Mon Feb 02 19:31:59 2015 +0530
> @@ -1048,15 +1048,6 @@
> target bitrate in CBR mode. Bitrate adherence is prioritised
> over quality. Rate tolerance is reduced to 50%. Default disabled.
>
> - This option is for use-cases which require the final average bitrate
> - to be within very strict limits of the target - preventing overshoots
> - completely, and achieve bitrates within 5% of target bitrate,
> - especially in short segment encodes. Typically, the encoder stays
> - conservative, waiting until there is enough feedback in terms of
> - encoded frames to control QP. strict-cbr allows the encoder to be
> - more aggressive in hitting the target bitrate even for short segment
> - videos. Experimental.
This doc change and the one below, if deliberate, should be a seperate patch
> .. option:: --cbqpoffs <integer>
>
> Offset of Cb chroma QP from the luma QP selected by rate control.
> @@ -1097,6 +1088,12 @@
> The maximum single adjustment in QP allowed to rate control. Default
> 4
>
> +.. option:: --ratetol <float>
> +
> + The degree of rate fluctuation that x265 tolerates. Rate tolerance
> + is used along with overflow (difference between actual and target
> + bitrate), to adjust qp. Default is 1.0
> +
> .. option:: --qblur <float>
>
> Temporally blur quants. Default 0.5
> @@ -1374,6 +1371,12 @@
> 1. MD5
> 2. CRC
> 3. Checksum
> +
> +.. option:: --temporal-layers,--no-temporal-layers
> +
> + Enable Temporal Sub Layers in the bitstream and signal the temporal layer ids
> + in the VPS, SPS and coded slice NAL unit headers. As of now,
> + maxTemporalSubLayers that can be enabled = 2 when this option is turned on.
>
> Debugging options
> =================
> diff -r 1760823cdd46 -r e241b255b992 source/common/param.cpp
> --- a/source/common/param.cpp Mon Feb 02 16:27:07 2015 +0530
> +++ b/source/common/param.cpp Mon Feb 02 19:31:59 2015 +0530
> @@ -181,6 +181,7 @@
> param->bIntraInBFrames = 0;
> param->bLossless = 0;
> param->bCULossless = 0;
> + param->bEnableTemporalSubLayers = 1;
>
> /* Rate control options */
> param->rc.vbvMaxBitrate = 0;
> @@ -806,6 +807,10 @@
> OPT("scaling-list") p->scalingLists = strdup(value);
> OPT("lambda-file") p->rc.lambdaFileName = strdup(value);
> OPT("analysis-file") p->analysisFileName = strdup(value);
> + OPT("temporal-layers")
> + {
> + p->bEnableTemporalSubLayers = atobool(value);
> + }
no need for braces, and this option should probably be near the other
slicetype decision options like open-gop and scenecut
> else
> return X265_PARAM_BAD_NAME;
> #undef OPT
> diff -r 1760823cdd46 -r e241b255b992 source/common/slice.h
> --- a/source/common/slice.h Mon Feb 02 16:27:07 2015 +0530
> +++ b/source/common/slice.h Mon Feb 02 19:31:59 2015 +0530
> @@ -149,6 +149,7 @@
>
> struct VPS
> {
> + uint32_t maxTempSubLayers;
> uint32_t numReorderPics;
> uint32_t maxDecPicBuffering;
> HRDInfo hrdParameters;
> @@ -228,6 +229,7 @@
> bool bUseAMP; // use param
> uint32_t maxAMPDepth;
>
> + uint32_t maxTempSubLayers; // max number of Temporal Sub layers
> uint32_t maxDecPicBuffering; // these are dups of VPS values
> int numReorderPics;
> int maxLatencyIncrease;
> diff -r 1760823cdd46 -r e241b255b992 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp Mon Feb 02 16:27:07 2015 +0530
> +++ b/source/encoder/encoder.cpp Mon Feb 02 19:31:59 2015 +0530
> @@ -1331,6 +1331,7 @@
> sps->bUseAMP = m_param->bEnableAMP;
> sps->maxAMPDepth = m_param->bEnableAMP ? g_maxCUDepth : 0;
>
> + sps->maxTempSubLayers = m_param->bEnableTemporalSubLayers ? 2 : 1;
> sps->maxDecPicBuffering = m_vps.maxDecPicBuffering;
> sps->numReorderPics = m_vps.numReorderPics;
> sps->maxLatencyIncrease = m_param->bframes;
> @@ -1540,6 +1541,16 @@
> p->bDistributeMotionEstimation = p->bDistributeModeAnalysis = 0;
> }
>
> + if (p->bEnableTemporalSubLayers)
> + {
> + if (p->bFrameAdaptive)
> + x265_log(p, X265_LOG_WARNING, "Scalable Video Coding needs fixed GOP structure, requires --b-adapt 0\n");
> + p->bFrameAdaptive = 0;
> + if (p->scenecutThreshold)
> + x265_log(p, X265_LOG_WARNING, "Scalable Video Coding needs fixed GOP structure, requires --scenecut 0\n");
nit: we should be recommending --no-scenecut
> + p->scenecutThreshold = 0;
> + }
> +
> m_bframeDelay = p->bframes ? (p->bBPyramid ? 2 : 1) : 0;
>
> p->bFrameBias = X265_MIN(X265_MAX(-90, p->bFrameBias), 100);
> diff -r 1760823cdd46 -r e241b255b992 source/encoder/entropy.cpp
> --- a/source/encoder/entropy.cpp Mon Feb 02 16:27:07 2015 +0530
> +++ b/source/encoder/entropy.cpp Mon Feb 02 19:31:59 2015 +0530
> @@ -51,17 +51,20 @@
> WRITE_CODE(0, 4, "vps_video_parameter_set_id");
> WRITE_CODE(3, 2, "vps_reserved_three_2bits");
> WRITE_CODE(0, 6, "vps_reserved_zero_6bits");
> - WRITE_CODE(0, 3, "vps_max_sub_layers_minus1");
> - WRITE_FLAG(1, "vps_temporal_id_nesting_flag");
> + WRITE_CODE(vps.maxTempSubLayers - 1, 3, "vps_max_sub_layers_minus1");
> + WRITE_FLAG(vps.maxTempSubLayers == 1 ? 1 : 0, "vps_temporal_id_nesting_flag");
!!vps.maxTempSubLayers. We should probably ripoff x264's BOOLIFY macro
> WRITE_CODE(0xffff, 16, "vps_reserved_ffff_16bits");
>
> - codeProfileTier(vps.ptl);
> + codeProfileTier(vps.ptl ,vps.maxTempSubLayers - 1);
>
> WRITE_FLAG(true, "vps_sub_layer_ordering_info_present_flag");
> - WRITE_UVLC(vps.maxDecPicBuffering - 1, "vps_max_dec_pic_buffering_minus1[i]");
> - WRITE_UVLC(vps.numReorderPics, "vps_num_reorder_pics[i]");
> + for(uint32_t i = 0; i <= vps.maxTempSubLayers - 1; i++)
white-space
> + {
> + WRITE_UVLC(vps.maxDecPicBuffering - 1, "vps_max_dec_pic_buffering_minus1[i]");
> + WRITE_UVLC(vps.numReorderPics, "vps_num_reorder_pics[i]");
> + WRITE_UVLC(0, "vps_max_latency_increase_plus1[i]");
> + }
>
> - WRITE_UVLC(0, "vps_max_latency_increase_plus1[i]");
> WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
> WRITE_UVLC(0, "vps_max_op_sets_minus1");
> WRITE_FLAG(0, "vps_timing_info_present_flag"); /* we signal timing info in SPS-VUI */
> @@ -71,10 +74,10 @@
> void Entropy::codeSPS(const SPS& sps, const ScalingList& scalingList, const ProfileTierLevel& ptl)
> {
> WRITE_CODE(0, 4, "sps_video_parameter_set_id");
> - WRITE_CODE(0, 3, "sps_max_sub_layers_minus1");
> - WRITE_FLAG(1, "sps_temporal_id_nesting_flag");
> + WRITE_CODE(sps.maxTempSubLayers - 1, 3, "sps_max_sub_layers_minus1");
> + WRITE_FLAG(sps.maxTempSubLayers == 1 ? 1 : 0, "sps_temporal_id_nesting_flag");
ditto
> - codeProfileTier(ptl);
> + codeProfileTier(ptl, sps.maxTempSubLayers - 1);
>
> WRITE_UVLC(0, "sps_seq_parameter_set_id");
> WRITE_UVLC(sps.chromaFormatIdc, "chroma_format_idc");
> @@ -101,9 +104,12 @@
> WRITE_UVLC(BITS_FOR_POC - 4, "log2_max_pic_order_cnt_lsb_minus4");
> WRITE_FLAG(true, "sps_sub_layer_ordering_info_present_flag");
>
> - WRITE_UVLC(sps.maxDecPicBuffering - 1, "sps_max_dec_pic_buffering_minus1[i]");
> - WRITE_UVLC(sps.numReorderPics, "sps_num_reorder_pics[i]");
> - WRITE_UVLC(sps.maxLatencyIncrease + 1, "sps_max_latency_increase_plus1[i]");
> + for(uint32_t i = 0; i <= sps.maxTempSubLayers - 1; i++)
> + {
> + WRITE_UVLC(sps.maxDecPicBuffering - 1, "sps_max_dec_pic_buffering_minus1[i]");
> + WRITE_UVLC(sps.numReorderPics, "sps_num_reorder_pics[i]");
> + WRITE_UVLC(sps.maxLatencyIncrease + 1, "sps_max_latency_increase_plus1[i]");
> + }
At first glance, it seems like these values would be different per
layer, but I am no expert on the subject.
> WRITE_UVLC(sps.log2MinCodingBlockSize - 3, "log2_min_coding_block_size_minus3");
> WRITE_UVLC(sps.log2DiffMaxMinCodingBlockSize, "log2_diff_max_min_coding_block_size");
> @@ -184,7 +190,7 @@
> WRITE_FLAG(0, "pps_extension_flag");
> }
>
> -void Entropy::codeProfileTier(const ProfileTierLevel& ptl)
> +void Entropy::codeProfileTier(const ProfileTierLevel& ptl, int maxTempSubLayerMinus1)
> {
> WRITE_CODE(0, 2, "XXX_profile_space[]");
> WRITE_FLAG(ptl.tierFlag, "XXX_tier_flag[]");
> @@ -222,6 +228,13 @@
> }
>
> WRITE_CODE(ptl.levelIdc, 8, "general_level_idc");
> + if (maxTempSubLayerMinus1 > 0)
if (maxTempSubLayerMinus1)
> + {
> + WRITE_FLAG(0, "sub_layer_profile_present_flag[i]");
> + WRITE_FLAG(0, "sub_layer_level_present_flag[i]");
> + for (int i = maxTempSubLayerMinus1; i < 8 ; i++)
> + WRITE_CODE(0, 2, "reserved_zero_2bits");
indent
> + }
> }
>
> void Entropy::codeVUI(const VUI& vui)
> @@ -331,24 +344,27 @@
>
> void Entropy::codeHrdParameters(const HRDInfo& hrd)
> {
> - WRITE_FLAG(1, "nal_hrd_parameters_present_flag");
> - WRITE_FLAG(0, "vcl_hrd_parameters_present_flag");
> - WRITE_FLAG(0, "sub_pic_hrd_params_present_flag");
> + for(int i = 0; i <= 1; i++)
> + {
> + WRITE_FLAG(1, "nal_hrd_parameters_present_flag");
> + WRITE_FLAG(0, "vcl_hrd_parameters_present_flag");
> + WRITE_FLAG(0, "sub_pic_hrd_params_present_flag");
>
> - WRITE_CODE(hrd.bitRateScale, 4, "bit_rate_scale");
> - WRITE_CODE(hrd.cpbSizeScale, 4, "cpb_size_scale");
> + WRITE_CODE(hrd.bitRateScale, 4, "bit_rate_scale");
> + WRITE_CODE(hrd.cpbSizeScale, 4, "cpb_size_scale");
>
> - WRITE_CODE(hrd.initialCpbRemovalDelayLength - 1, 5, "initial_cpb_removal_delay_length_minus1");
> - WRITE_CODE(hrd.cpbRemovalDelayLength - 1, 5, "au_cpb_removal_delay_length_minus1");
> - WRITE_CODE(hrd.dpbOutputDelayLength - 1, 5, "dpb_output_delay_length_minus1");
> + WRITE_CODE(hrd.initialCpbRemovalDelayLength - 1, 5, "initial_cpb_removal_delay_length_minus1");
> + WRITE_CODE(hrd.cpbRemovalDelayLength - 1, 5, "au_cpb_removal_delay_length_minus1");
> + WRITE_CODE(hrd.dpbOutputDelayLength - 1, 5, "dpb_output_delay_length_minus1");
>
> - WRITE_FLAG(1, "fixed_pic_rate_general_flag");
> - WRITE_UVLC(0, "elemental_duration_in_tc_minus1");
> - WRITE_UVLC(0, "cpb_cnt_minus1");
> + WRITE_FLAG(1, "fixed_pic_rate_general_flag");
> + WRITE_UVLC(0, "elemental_duration_in_tc_minus1");
> + WRITE_UVLC(0, "cpb_cnt_minus1");
>
> - WRITE_UVLC(hrd.bitRateValue - 1, "bit_rate_value_minus1");
> - WRITE_UVLC(hrd.cpbSizeValue - 1, "cpb_size_value_minus1");
> - WRITE_FLAG(hrd.cbrFlag, "cbr_flag");
> + WRITE_UVLC(hrd.bitRateValue - 1, "bit_rate_value_minus1");
> + WRITE_UVLC(hrd.cpbSizeValue - 1, "cpb_size_value_minus1");
> + WRITE_FLAG(hrd.cbrFlag, "cbr_flag");
> + }
> }
>
> void Entropy::codeAUD(const Slice& slice)
> diff -r 1760823cdd46 -r e241b255b992 source/encoder/entropy.h
> --- a/source/encoder/entropy.h Mon Feb 02 16:27:07 2015 +0530
> +++ b/source/encoder/entropy.h Mon Feb 02 19:31:59 2015 +0530
> @@ -230,7 +230,7 @@
> void writeEpExGolomb(uint32_t symbol, uint32_t count);
> void writeCoefRemainExGolomb(uint32_t symbol, const uint32_t absGoRice);
>
> - void codeProfileTier(const ProfileTierLevel& ptl);
> + void codeProfileTier(const ProfileTierLevel& ptl, int maxTempSubLayerMinus1);
> void codeScalingList(const ScalingList&);
> void codeScalingList(const ScalingList& scalingList, uint32_t sizeId, uint32_t listId);
>
> diff -r 1760823cdd46 -r e241b255b992 source/encoder/level.cpp
> --- a/source/encoder/level.cpp Mon Feb 02 16:27:07 2015 +0530
> +++ b/source/encoder/level.cpp Mon Feb 02 19:31:59 2015 +0530
> @@ -60,6 +60,7 @@
> /* determine minimum decoder level required to decode the described video */
> void determineLevel(const x265_param ¶m, VPS& vps)
> {
> + vps.maxTempSubLayers = param.bEnableTemporalSubLayers ? 2 : 1;
> if (param.bLossless)
> vps.ptl.profileIdc = Profile::NONE;
> else if (param.internalCsp == X265_CSP_I420)
> diff -r 1760823cdd46 -r e241b255b992 source/encoder/nal.cpp
> --- a/source/encoder/nal.cpp Mon Feb 02 16:27:07 2015 +0530
> +++ b/source/encoder/nal.cpp Mon Feb 02 19:31:59 2015 +0530
> @@ -107,6 +107,9 @@
> * nuh_reserved_zero_6bits 6-bits
> * nuh_temporal_id_plus1 3-bits */
> out[bytes++] = (uint8_t)nalUnitType << 1;
> + if (nalUnitType == NAL_UNIT_CODED_SLICE_TSA_N)
> + out[bytes++] = 2;
> + else
> out[bytes++] = 1;
out[bytes++] = 1 + (nalUnitType == NAL_UNIT_CODED_SLICE_TSA_N);
>
> /* 7.4.1 ...
> diff -r 1760823cdd46 -r e241b255b992 source/x265.h
> --- a/source/x265.h Mon Feb 02 16:27:07 2015 +0530
> +++ b/source/x265.h Mon Feb 02 19:31:59 2015 +0530
> @@ -789,6 +789,12 @@
> * CU. */
> int bCULossless;
>
> + /* Enable Temporal Sub Layers while encoding, signals NAL units of coded slices
> + * with their temporalId. Output bitstreams can be extracted either at the base temporal layer
> + * (layer 0) with roughly half the frame rate or at a higher temporal layer (layer 1)
> + * that decodes all the frames in the sequence. */
> + int bEnableTemporalSubLayers;
> +
> /*== Rate Control ==*/
>
> struct
> diff -r 1760823cdd46 -r e241b255b992 source/x265cli.h
> --- a/source/x265cli.h Mon Feb 02 16:27:07 2015 +0530
> +++ b/source/x265cli.h Mon Feb 02 19:31:59 2015 +0530
> @@ -193,6 +193,7 @@
> { "analysis-mode", required_argument, NULL, 0 },
> { "analysis-file", required_argument, NULL, 0 },
> { "strict-cbr", no_argument, NULL, 0 },
> + { "temporal-layers", no_argument, NULL, 0 },
w/s
> { 0, 0, 0, 0 },
> { 0, 0, 0, 0 },
> { 0, 0, 0, 0 },
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list