[x265] [PATCH 1 of 2] encoder: Add support for Temporal Layering of the encoded bitstream
aarthi at multicorewareinc.com
aarthi at multicorewareinc.com
Tue Feb 3 12:08:14 CET 2015
# HG changeset patch
# User Aarthi Thirumalai
# Date 1422960681 -19800
# Tue Feb 03 16:21:21 2015 +0530
# Node ID 830d29c97117c609585b7c18cc75f120a89ce79e
# Parent 4583eda4cf55e9a7f5c11d1ea660367f3822af53
encoder: Add support for Temporal Layering of the encoded bitstream.
Implements Temporal Sub Layers while encoding, signals NAL units of coded slices
with their temporalId. Output bitstreams can be extracted either at the base temporal layer
(layer 0) with roughly half the frame rate or at a higher temporal layer (layer 1)
that decodes all the frames in the sequence.
diff -r 4583eda4cf55 -r 830d29c97117 source/common/param.cpp
--- a/source/common/param.cpp Mon Feb 02 17:03:40 2015 +0530
+++ b/source/common/param.cpp Tue Feb 03 16:21:21 2015 +0530
@@ -181,6 +181,7 @@
param->bIntraInBFrames = 0;
param->bLossless = 0;
param->bCULossless = 0;
+ param->bEnableTemporalSubLayers = 0;
/* Rate control options */
param->rc.vbvMaxBitrate = 0;
diff -r 4583eda4cf55 -r 830d29c97117 source/common/slice.h
--- a/source/common/slice.h Mon Feb 02 17:03:40 2015 +0530
+++ b/source/common/slice.h Tue Feb 03 16:21:21 2015 +0530
@@ -149,6 +149,7 @@
struct VPS
{
+ uint32_t maxTempSubLayers;
uint32_t numReorderPics;
uint32_t maxDecPicBuffering;
HRDInfo hrdParameters;
@@ -228,6 +229,7 @@
bool bUseAMP; // use param
uint32_t maxAMPDepth;
+ uint32_t maxTempSubLayers; // max number of Temporal Sub layers
uint32_t maxDecPicBuffering; // these are dups of VPS values
int numReorderPics;
int maxLatencyIncrease;
diff -r 4583eda4cf55 -r 830d29c97117 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Mon Feb 02 17:03:40 2015 +0530
+++ b/source/encoder/encoder.cpp Tue Feb 03 16:21:21 2015 +0530
@@ -669,6 +669,14 @@
/* determine references, setup RPS, etc */
m_dpb->prepareEncode(frameEnc);
+ /* If temporal Layers are enabled, set tempLayer and NAL Unit Type.
+ * For now, there are a max of 2 layers, with the non Referenced B frames
+ * forming the temporal sublayer and other referenced frames are on the base layer */
+ if (!frameEnc->m_encData->m_bHasReferences && m_param->bEnableTemporalSubLayers)
+ {
+ frameEnc->m_encData->m_slice->m_nalUnitType = NAL_UNIT_CODED_SLICE_TSA_N;
+ }
+
if (m_param->rc.rateControlMode != X265_RC_CQP)
m_lookahead->getEstimatedPictureCost(frameEnc);
@@ -1413,6 +1421,7 @@
sps->bUseAMP = m_param->bEnableAMP;
sps->maxAMPDepth = m_param->bEnableAMP ? g_maxCUDepth : 0;
+ sps->maxTempSubLayers = m_param->bEnableTemporalSubLayers ? 2 : 1;
sps->maxDecPicBuffering = m_vps.maxDecPicBuffering;
sps->numReorderPics = m_vps.numReorderPics;
sps->maxLatencyIncrease = m_param->bframes;
@@ -1622,6 +1631,19 @@
p->bDistributeMotionEstimation = p->bDistributeModeAnalysis = 0;
}
+ if (p->bEnableTemporalSubLayers)
+ {
+ if (p->bFrameAdaptive)
+ x265_log(p, X265_LOG_WARNING, "Scalable Video Coding needs fixed GOP structure, requires --b-adapt 0\n");
+ p->bFrameAdaptive = 0;
+ if (p->scenecutThreshold)
+ x265_log(p, X265_LOG_WARNING, "Scalable Video Coding needs fixed GOP structure, requires --no -scenecut\n");
+ p->scenecutThreshold = 0;
+ if (p->bframes != 3)
+ x265_log(p, X265_LOG_WARNING, "Ideally 3 bframes are needed to generate a base temporal layer bitstream with half the fps, requires --bframes 3\n");
+ p->bframes = 3;
+ }
+
m_bframeDelay = p->bframes ? (p->bBPyramid ? 2 : 1) : 0;
p->bFrameBias = X265_MIN(X265_MAX(-90, p->bFrameBias), 100);
diff -r 4583eda4cf55 -r 830d29c97117 source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp Mon Feb 02 17:03:40 2015 +0530
+++ b/source/encoder/entropy.cpp Tue Feb 03 16:21:21 2015 +0530
@@ -51,17 +51,22 @@
WRITE_CODE(0, 4, "vps_video_parameter_set_id");
WRITE_CODE(3, 2, "vps_reserved_three_2bits");
WRITE_CODE(0, 6, "vps_reserved_zero_6bits");
- WRITE_CODE(0, 3, "vps_max_sub_layers_minus1");
- WRITE_FLAG(1, "vps_temporal_id_nesting_flag");
+ WRITE_CODE(vps.maxTempSubLayers - 1, 3, "vps_max_sub_layers_minus1");
+ // Temporal Id Nesting is disabled only when maxTemporalLayers > 1
+ WRITE_FLAG(!(vps.maxTempSubLayers - 1), "vps_temporal_id_nesting_flag");
WRITE_CODE(0xffff, 16, "vps_reserved_ffff_16bits");
- codeProfileTier(vps.ptl);
+ codeProfileTier(vps.ptl ,vps.maxTempSubLayers - 1);
WRITE_FLAG(true, "vps_sub_layer_ordering_info_present_flag");
- WRITE_UVLC(vps.maxDecPicBuffering - 1, "vps_max_dec_pic_buffering_minus1[i]");
- WRITE_UVLC(vps.numReorderPics, "vps_num_reorder_pics[i]");
- WRITE_UVLC(0, "vps_max_latency_increase_plus1[i]");
+ for (uint32_t i = 0; i <= vps.maxTempSubLayers - 1; i++)
+ {
+ WRITE_UVLC(vps.maxDecPicBuffering - 1, "vps_max_dec_pic_buffering_minus1[i]");
+ WRITE_UVLC(vps.numReorderPics, "vps_num_reorder_pics[i]");
+ WRITE_UVLC(0, "vps_max_latency_increase_plus1[i]");
+ }
+
WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
WRITE_UVLC(0, "vps_max_op_sets_minus1");
WRITE_FLAG(0, "vps_timing_info_present_flag"); /* we signal timing info in SPS-VUI */
@@ -71,10 +76,11 @@
void Entropy::codeSPS(const SPS& sps, const ScalingList& scalingList, const ProfileTierLevel& ptl)
{
WRITE_CODE(0, 4, "sps_video_parameter_set_id");
- WRITE_CODE(0, 3, "sps_max_sub_layers_minus1");
- WRITE_FLAG(1, "sps_temporal_id_nesting_flag");
+ WRITE_CODE(sps.maxTempSubLayers - 1, 3, "sps_max_sub_layers_minus1");
+ // Temporal Id Nesting is disabled only when maxTemporalLayers > 1
+ WRITE_FLAG(!(sps.maxTempSubLayers - 1), "sps_temporal_id_nesting_flag");
- codeProfileTier(ptl);
+ codeProfileTier(ptl, sps.maxTempSubLayers - 1);
WRITE_UVLC(0, "sps_seq_parameter_set_id");
WRITE_UVLC(sps.chromaFormatIdc, "chroma_format_idc");
@@ -101,9 +107,12 @@
WRITE_UVLC(BITS_FOR_POC - 4, "log2_max_pic_order_cnt_lsb_minus4");
WRITE_FLAG(true, "sps_sub_layer_ordering_info_present_flag");
- WRITE_UVLC(sps.maxDecPicBuffering - 1, "sps_max_dec_pic_buffering_minus1[i]");
- WRITE_UVLC(sps.numReorderPics, "sps_num_reorder_pics[i]");
- WRITE_UVLC(sps.maxLatencyIncrease + 1, "sps_max_latency_increase_plus1[i]");
+ for (uint32_t i = 0; i <= sps.maxTempSubLayers - 1; i++)
+ {
+ WRITE_UVLC(sps.maxDecPicBuffering - 1, "sps_max_dec_pic_buffering_minus1[i]");
+ WRITE_UVLC(sps.numReorderPics, "sps_num_reorder_pics[i]");
+ WRITE_UVLC(sps.maxLatencyIncrease + 1, "sps_max_latency_increase_plus1[i]");
+ }
WRITE_UVLC(sps.log2MinCodingBlockSize - 3, "log2_min_coding_block_size_minus3");
WRITE_UVLC(sps.log2DiffMaxMinCodingBlockSize, "log2_diff_max_min_coding_block_size");
@@ -184,7 +193,7 @@
WRITE_FLAG(0, "pps_extension_flag");
}
-void Entropy::codeProfileTier(const ProfileTierLevel& ptl)
+void Entropy::codeProfileTier(const ProfileTierLevel& ptl, int maxTempSubLayerMinus1)
{
WRITE_CODE(0, 2, "XXX_profile_space[]");
WRITE_FLAG(ptl.tierFlag, "XXX_tier_flag[]");
@@ -222,6 +231,14 @@
}
WRITE_CODE(ptl.levelIdc, 8, "general_level_idc");
+
+ if (maxTempSubLayerMinus1)
+ {
+ WRITE_FLAG(0, "sub_layer_profile_present_flag[i]");
+ WRITE_FLAG(0, "sub_layer_level_present_flag[i]");
+ for (int i = maxTempSubLayerMinus1; i < 8 ; i++)
+ WRITE_CODE(0, 2, "reserved_zero_2bits");
+ }
}
void Entropy::codeVUI(const VUI& vui)
@@ -331,24 +348,27 @@
void Entropy::codeHrdParameters(const HRDInfo& hrd)
{
- WRITE_FLAG(1, "nal_hrd_parameters_present_flag");
- WRITE_FLAG(0, "vcl_hrd_parameters_present_flag");
- WRITE_FLAG(0, "sub_pic_hrd_params_present_flag");
+ for(int i = 0; i <= 1; i++)
+ {
+ WRITE_FLAG(1, "nal_hrd_parameters_present_flag");
+ WRITE_FLAG(0, "vcl_hrd_parameters_present_flag");
+ WRITE_FLAG(0, "sub_pic_hrd_params_present_flag");
- WRITE_CODE(hrd.bitRateScale, 4, "bit_rate_scale");
- WRITE_CODE(hrd.cpbSizeScale, 4, "cpb_size_scale");
+ WRITE_CODE(hrd.bitRateScale, 4, "bit_rate_scale");
+ WRITE_CODE(hrd.cpbSizeScale, 4, "cpb_size_scale");
- WRITE_CODE(hrd.initialCpbRemovalDelayLength - 1, 5, "initial_cpb_removal_delay_length_minus1");
- WRITE_CODE(hrd.cpbRemovalDelayLength - 1, 5, "au_cpb_removal_delay_length_minus1");
- WRITE_CODE(hrd.dpbOutputDelayLength - 1, 5, "dpb_output_delay_length_minus1");
+ WRITE_CODE(hrd.initialCpbRemovalDelayLength - 1, 5, "initial_cpb_removal_delay_length_minus1");
+ WRITE_CODE(hrd.cpbRemovalDelayLength - 1, 5, "au_cpb_removal_delay_length_minus1");
+ WRITE_CODE(hrd.dpbOutputDelayLength - 1, 5, "dpb_output_delay_length_minus1");
- WRITE_FLAG(1, "fixed_pic_rate_general_flag");
- WRITE_UVLC(0, "elemental_duration_in_tc_minus1");
- WRITE_UVLC(0, "cpb_cnt_minus1");
+ WRITE_FLAG(1, "fixed_pic_rate_general_flag");
+ WRITE_UVLC(0, "elemental_duration_in_tc_minus1");
+ WRITE_UVLC(0, "cpb_cnt_minus1");
- WRITE_UVLC(hrd.bitRateValue - 1, "bit_rate_value_minus1");
- WRITE_UVLC(hrd.cpbSizeValue - 1, "cpb_size_value_minus1");
- WRITE_FLAG(hrd.cbrFlag, "cbr_flag");
+ WRITE_UVLC(hrd.bitRateValue - 1, "bit_rate_value_minus1");
+ WRITE_UVLC(hrd.cpbSizeValue - 1, "cpb_size_value_minus1");
+ WRITE_FLAG(hrd.cbrFlag, "cbr_flag");
+ }
}
void Entropy::codeAUD(const Slice& slice)
diff -r 4583eda4cf55 -r 830d29c97117 source/encoder/entropy.h
--- a/source/encoder/entropy.h Mon Feb 02 17:03:40 2015 +0530
+++ b/source/encoder/entropy.h Tue Feb 03 16:21:21 2015 +0530
@@ -230,7 +230,7 @@
void writeEpExGolomb(uint32_t symbol, uint32_t count);
void writeCoefRemainExGolomb(uint32_t symbol, const uint32_t absGoRice);
- void codeProfileTier(const ProfileTierLevel& ptl);
+ void codeProfileTier(const ProfileTierLevel& ptl, int maxTempSubLayerMinus1);
void codeScalingList(const ScalingList&);
void codeScalingList(const ScalingList& scalingList, uint32_t sizeId, uint32_t listId);
diff -r 4583eda4cf55 -r 830d29c97117 source/encoder/level.cpp
--- a/source/encoder/level.cpp Mon Feb 02 17:03:40 2015 +0530
+++ b/source/encoder/level.cpp Tue Feb 03 16:21:21 2015 +0530
@@ -60,6 +60,7 @@
/* determine minimum decoder level required to decode the described video */
void determineLevel(const x265_param ¶m, VPS& vps)
{
+ vps.maxTempSubLayers = param.bEnableTemporalSubLayers ? 2 : 1;
if (param.bLossless)
vps.ptl.profileIdc = Profile::NONE;
else if (param.internalCsp == X265_CSP_I420)
diff -r 4583eda4cf55 -r 830d29c97117 source/encoder/nal.cpp
--- a/source/encoder/nal.cpp Mon Feb 02 17:03:40 2015 +0530
+++ b/source/encoder/nal.cpp Tue Feb 03 16:21:21 2015 +0530
@@ -107,7 +107,7 @@
* nuh_reserved_zero_6bits 6-bits
* nuh_temporal_id_plus1 3-bits */
out[bytes++] = (uint8_t)nalUnitType << 1;
- out[bytes++] = 1;
+ out[bytes++] = 1 + (nalUnitType == NAL_UNIT_CODED_SLICE_TSA_N);
/* 7.4.1 ...
* Within the NAL unit, the following three-byte sequences shall not occur at
diff -r 4583eda4cf55 -r 830d29c97117 source/x265.h
--- a/source/x265.h Mon Feb 02 17:03:40 2015 +0530
+++ b/source/x265.h Tue Feb 03 16:21:21 2015 +0530
@@ -791,6 +791,12 @@
* CU. */
int bCULossless;
+ /* Enable Temporal Sub Layers while encoding, signals NAL units of coded slices
+ * with their temporalId. Output bitstreams can be extracted either at the base temporal layer
+ * (layer 0) with roughly half the frame rate or at a higher temporal layer (layer 1)
+ * that decodes all the frames in the sequence. */
+ int bEnableTemporalSubLayers;
+
/*== Rate Control ==*/
struct
More information about the x265-devel
mailing list