[x265] [PATCH] b-pyramid implementation: Allow the use of B-frames as references for non B and B frames
Gopu Govindaswamy
gopu at multicorewareinc.com
Mon Nov 18 12:21:42 CET 2013
On Mon, Nov 18, 2013 at 4:47 PM, Deepthi Nandakumar
<deepthi at multicorewareinc.com> wrote:
>
>
>
> On Mon, Nov 18, 2013 at 3:40 PM, Gopu Govindaswamy
> <gopu at multicorewareinc.com> wrote:
>>
>> # HG changeset patch
>> # User Gopu Govindaswamy <gopu at multicorewareinc.com>
>> # Date 1384769433 -19800
>> # Node ID 1e22b93638072ed805478d7af17f90e285fb4969
>> # Parent 2321ebe0bf64e5f3c0034076c7edb3ecbcd48039
>> b-pyramid implementation: Allow the use of B-frames as references for non
>> B and B frames
>>
>> when we enable the b-pyramid the bitrates efficienctly reduced and there
>> is not much diff in the performance
>> and the PSNR 00. increased some of the clips and decreased some of clips
>>
>> Test results for reference when enable and disable the b-pyramid:
>> cli option : -b 10 --hash=1 -f 100 --b-pyramid=1 --ref=1 --b-adapt=2
>> Enable B-reference : --b-pyramid=1
>> Disable B-reference : --b-pyramid=0
>>
>> Results:
>> Enable / Disable
>>
>> clip - FourPeople_1280x720_60.yuv
>> Total time taken - 9.70s (10.31 fps) / 9.93s (10.07 fps)
>> Bitrates - 516.30 kb/s / 544.68 kb/s
>> PSNR - 39.725 / 39.701
>>
>> clip - BasketballDrive_1920x1080_50.y4m
>> Total time taken - 39.06s (2.51 fps) / 38.98s (2.57 fps)
>> Bitrates - 4166.92 kb/s / 4370.43 kb/s
>> PSNR - 37.261 / 37.268
>>
>> clip - Johnny_1280x720_60.y4m
>> Total time taken - 8.88s (11.27 fps) / 11.08s (9.03 fps)
>> Bitrates - 304.29 kb/s / 328.84 kb/s
>> PSNR - 40.605 / 40.551
>>
>> Total time taken - 30.97s (3.23 fps) / 33.65s (2.97 fps)
>> Bitrates - 3496.84 kb/s / 3683.93 kb/s
>> PSNR - 35.645 / 35.660
>>
>> diff -r 2321ebe0bf64 -r 1e22b9363807 source/common/common.cpp
>> --- a/source/common/common.cpp Mon Nov 18 11:32:06 2013 +0530
>> +++ b/source/common/common.cpp Mon Nov 18 15:40:33 2013 +0530
>> @@ -54,6 +54,7 @@
>>
>> static int parseCspName(const char *arg, int& error);
>> static int parseName(const char *arg, const char * const * names, int&
>> error);
>> +static int parse_enum(const char *, const char * const * names, int
>> *dst);
>>
>> using namespace x265;
>>
>> @@ -165,6 +166,7 @@
>> param->bframes = 3;
>> param->lookaheadDepth = 40;
>> param->bFrameAdaptive = X265_B_ADAPT_FAST;
>> + param->bpyramid = 0;
>> param->scenecutThreshold = 40; /* Magic number pulled in from x264*/
>>
>> /* Intra Coding Tools */
>> @@ -532,7 +534,7 @@
>> }
>>
>> CHECK(param->bEnableWavefront < 0, "WaveFrontSynchro cannot be
>> negative");
>> -
>> + CHECK(param->bpyramid >= 2, "b-pyramid is 0 or 1");
>> return check_failed;
>> }
>>
>> @@ -620,6 +622,7 @@
>> x265_log(param, X265_LOG_INFO, "RDpenalty :
>> %d\n", param->rdPenalty);
>> }
>> x265_log(param, X265_LOG_INFO, "Lookahead / bframes / badapt : %d /
>> %d / %d\n", param->lookaheadDepth, param->bframes, param->bFrameAdaptive);
>> + x265_log(param, X265_LOG_INFO, "b-pyramid / weightp / ref : %d /
>> %d / %d\n", param->bpyramid, param->bEnableWeightedPred,
>> param->maxNumReferences);
>> x265_log(param, X265_LOG_INFO, "tools: ");
>> #define TOOLOPT(FLAG, STR) if (FLAG) fprintf(stderr, "%s ", STR)
>> TOOLOPT(param->bEnableRectInter, "rect");
>> @@ -628,7 +631,6 @@
>> TOOLOPT(param->bEnableConstrainedIntra, "cip");
>> TOOLOPT(param->bEnableEarlySkip, "esd");
>> fprintf(stderr, "rd=%d ", param->rdLevel);
>> - fprintf(stderr, "ref=%d ", param->maxNumReferences);
>>
>> TOOLOPT(param->bEnableLoopFilter, "lft");
>> if (param->bEnableSAO)
>> @@ -650,7 +652,6 @@
>> else
>> fprintf(stderr, "tskip ");
>> }
>> - TOOLOPT(param->bEnableWeightedPred, "weightp");
>> TOOLOPT(param->bEnableWeightedBiPred, "weightbp");
>> TOOLOPT(param->rc.aqMode, "aq");
>> fprintf(stderr, "\n");
>> @@ -747,6 +748,15 @@
>> }
>> OPT("input-csp") p->sourceCsp = ::parseCspName(value, berror);
>> OPT("me") p->searchMethod = ::parseName(value,
>> x265_motion_est_names, berror);
>> + OPT("b-pyramid")
>> + {
>> + berror |= parse_enum(value, x265_b_pyramid_names, &p->bpyramid);
>> + if (berror)
>> + {
>> + berror = 0;
>> + p->bpyramid = atoi(value);
>> + }
>> + }
>
>
> Not clear why parse_enum is required here? For now, this is a boolean flag
> which can be assigned directly to the param structure.
Because we can use --b-pyramid=none or --b-pyramid=0
--b-pyramid=normal or --b-pyramid=1
>
>
>>
>> else
>> return X265_PARAM_BAD_NAME;
>> #undef OPT
>> @@ -802,6 +812,7 @@
>> BOOL(p->bEnableSAO, "sao");
>> s += sprintf(s, " sao-lcu-bounds=%d", p->saoLcuBoundary);
>> s += sprintf(s, " sao-lcu-opt=%d", p->saoLcuBasedOptimization);
>> + s += sprintf(s, " b-pyramid=%d", p->bpyramid);
>> #undef BOOL
>>
>> return buf;
>> @@ -843,3 +854,13 @@
>> error = 1;
>> return a;
>> }
>> +static int parse_enum(const char *arg, const char * const * names, int
>> *dst)
>> +{
>> + for (int i = 0; names[i]; i++)
>> + if (!strcmp(arg, names[i]))
>> + {
>> + *dst = i;
>> + return 0;
>> + }
>> + return -1;
>> +}
>> diff -r 2321ebe0bf64 -r 1e22b9363807 source/common/common.h
>> --- a/source/common/common.h Mon Nov 18 11:32:06 2013 +0530
>> +++ b/source/common/common.h Mon Nov 18 15:40:33 2013 +0530
>> @@ -107,6 +107,7 @@
>> #define X265_LOG2(x) log2(x)
>> #endif
>>
>> +static const char * const x265_b_pyramid_names[] = {"none", "normal", 0};
>> /* defined in common.cpp */
>> int64_t x265_mdate(void);
>> void x265_log(x265_param *param, int level, const char *fmt, ...);
>> diff -r 2321ebe0bf64 -r 1e22b9363807 source/encoder/dpb.cpp
>> --- a/source/encoder/dpb.cpp Mon Nov 18 11:32:06 2013 +0530
>> +++ b/source/encoder/dpb.cpp Mon Nov 18 15:40:33 2013 +0530
>> @@ -78,7 +78,17 @@
>> m_lastIDR = pocCurr;
>> }
>> slice->setLastIDR(m_lastIDR);
>> - slice->setReferenced(slice->getSliceType() != B_SLICE);
>> +
>> + if (slice->getSliceType() != B_SLICE)
>> + slice->setReferenced(true);
>> + else
>> + {
>> + if (pic->m_lowres.sliceType == X265_TYPE_BREF)
>> + slice->setReferenced(true);
>> + else
>> + slice->setReferenced(false);
>> + }
>> +
>> slice->setTemporalLayerNonReferenceFlag(!slice->isReferenced());
>> // Set the nal unit type
>> slice->setNalUnitType(getNalUnitType(pocCurr, m_lastIDR, pic));
>> diff -r 2321ebe0bf64 -r 1e22b9363807 source/encoder/encoder.cpp
>> --- a/source/encoder/encoder.cpp Mon Nov 18 11:32:06 2013 +0530
>> +++ b/source/encoder/encoder.cpp Mon Nov 18 15:40:33 2013 +0530
>> @@ -1223,7 +1223,13 @@
>> vps.setMaxLayers(1);
>> for (int i = 0; i < MAX_TLAYER; i++)
>> {
>> - m_numReorderPics[i] = 1;
>> + /* Increase the DPB size if enabled the bpyramid the b-ref always
>> should take Lo and L1 as a non B frames
>> + the dpb size is always 3 when enabled the b-pyramid */
>> + if (_param->bpyramid && _param->bframes > 1)
>> + m_numReorderPics[i] = 3;
>> + else
>> + m_numReorderPics[i] = 1;
>> +
>> m_maxDecPicBuffering[i] = X265_MIN(MAX_NUM_REF,
>> X265_MAX(m_numReorderPics[i] + 1, _param->maxNumReferences) + 1);
>> vps.setNumReorderPics(m_numReorderPics[i], i);
>> vps.setMaxDecPicBuffering(m_maxDecPicBuffering[i], i);
>> diff -r 2321ebe0bf64 -r 1e22b9363807 source/encoder/slicetype.cpp
>> --- a/source/encoder/slicetype.cpp Mon Nov 18 11:32:06 2013 +0530
>> +++ b/source/encoder/slicetype.cpp Mon Nov 18 15:40:33 2013 +0530
>> @@ -643,21 +643,22 @@
>> Lowres& frm = list[bframes]->m_lowres;
>>
>> if (frm.sliceType == X265_TYPE_BREF
>> - /* && h->param.i_bframe_pyramid < X264_B_PYRAMID_NORMAL
>> && brefs == h->param.i_bframe_pyramid*/)
>> + && cfg->param.bpyramid < X265_B_PYRAMID_NORMAL && brefs
>> == cfg->param.bpyramid)
>> {
>> frm.sliceType = X265_TYPE_B;
>> - x265_log(&cfg->param, X265_LOG_WARNING, "B-ref is not yet
>> supported\n");
>> + x265_log(&cfg->param, X265_LOG_WARNING, "B-ref at frame
>> %d incompatible with B-pyramid %s \n",
>> + frm.frameNum,
>> x265_b_pyramid_names[cfg->param.bpyramid] );
>> }
>>
>> /* pyramid with multiple B-refs needs a big enough dpb that
>> the preceding P-frame stays available.
>> - smaller dpb could be supported by smart enough use of
>> mmco, but it's easier just to forbid it.
>> - else if (frm.sliceType == X265_TYPE_BREF &&
>> cfg->param.i_bframe_pyramid == X265_B_PYRAMID_NORMAL &&
>> - brefs && cfg->param.i_frame_reference <= (brefs+3))
>> + smaller dpb could be supported by smart enough use of
>> mmco, but it's easier just to forbid it.*/
>> + else if (frm.sliceType == X265_TYPE_BREF &&
>> cfg->param.bpyramid == X265_B_PYRAMID_NORMAL &&
>> + brefs && cfg->param.maxNumReferences <= (brefs+3))
>> {
>> frm.sliceType = X265_TYPE_B;
>> x265_log(&cfg->param, X265_LOG_WARNING, "B-ref at frame
>> %d incompatible with B-pyramid %s and %d reference frames\n",
>> - frm.sliceType,
>> x264_b_pyramid_names[h->param.i_bframe_pyramid],
>> h->param.i_frame_reference);
>> - } */
>> + frm.sliceType,
>> x265_b_pyramid_names[cfg->param.bpyramid], cfg->param.maxNumReferences);
>> + }
>>
>> if (frm.sliceType == X265_TYPE_KEYFRAME)
>> frm.sliceType = cfg->param.bOpenGOP ? X265_TYPE_I :
>> X265_TYPE_IDR;
>> @@ -716,12 +717,12 @@
>> list[bframes]->m_lowres.leadingBframes = bframes;
>> lastNonB = &list[bframes]->m_lowres;
>>
>> - /* insert a bref into the sequence
>> - if (h->param.i_bframe_pyramid && bframes > 1 && !brefs)
>> + /* insert a bref into the sequence */
>> + if (cfg->param.bpyramid && bframes > 1 && !brefs)
>> {
>> - h->lookahead->next.list[bframes/2]->i_type = X264_TYPE_BREF;
>> + list[bframes/2]->m_lowres.sliceType = X265_TYPE_BREF;
>> brefs++;
>> - } */
>> + }
>>
>> /* calculate the frame costs ahead of time for
>> x264_rc_analyse_slice while we still have lowres */
>> if (cfg->param.rc.rateControlMode != X265_RC_CQP)
>> @@ -742,8 +743,7 @@
>>
>> estimateFrameCost(p0, p1, b, 0);
>>
>> - /*
>> - if ((p0 != p1 || bframes) && cfg->param.rc.i_vbv_buffer_size)
>> + if ((p0 != p1 || bframes) /*&&
>> cfg->param.rc.i_vbv_buffer_size*/ )
>> {
>> // We need the intra costs for row SATDs
>> estimateFrameCost(b, b, b, 0);
>> @@ -752,7 +752,7 @@
>> p0 = 0;
>> for (b = 1; b <= bframes; b++)
>> {
>> - if (frames[b]->i_type == X265_TYPE_B)
>> + if (frames[b]->sliceType == X265_TYPE_B)
>> for (p1 = b; frames[p1]->sliceType ==
>> X265_TYPE_B;)
>> p1++;
>> else
>> @@ -761,7 +761,7 @@
>> if (frames[b]->sliceType == X265_TYPE_BREF)
>> p0 = b;
>> }
>> - } */
>> + }
>> }
>>
>> /* dequeue all frames from inputQueue that are about to be
>> enqueued
>> @@ -774,10 +774,23 @@
>>
>> /* add non-B to output queue */
>> outputQueue.pushBack(*list[bframes]);
>> +
>> + /* Add B-ref frame next to P frame in output queue, the B-ref
>> encode before non B-ref frame */
>> + if (bframes > 1 && cfg->param.bpyramid)
>> + {
>> + for (int i = 0; i < bframes; i++)
>> + {
>> + if(list[i]->m_lowres.sliceType == X265_TYPE_BREF)
>> + outputQueue.pushBack(*list[i]);
>> + }
>> + }
>> +
>> /* add B frames to output queue */
>> for (int i = 0; i < bframes; i++)
>> {
>> - outputQueue.pushBack(*list[i]);
>> + /* push all the B frames into output queue except B-ref,
>> which already pushed into output queue*/
>> + if (list[i]->m_lowres.sliceType != X265_TYPE_BREF)
>> + outputQueue.pushBack(*list[i]);
>> }
>>
>> return;
>> @@ -1155,11 +1168,7 @@
>> if (cost > threshold)
>> break;
>>
>> - /* Keep some B-frames as references: 0=off, 1=strict
>> hierarchical, 2=normal */
>> - //TODO Add this into param
>> - int bframe_pyramid = 0;
>> -
>> - if (bframe_pyramid && next_p - cur_p > 2)
>> + if (cfg->param.bpyramid && next_p - cur_p > 2)
>> {
>> int middle = cur_p + (next_p - cur_p) / 2;
>> cost += estimateFrameCost(cur_p, next_p, middle, 0);
>> diff -r 2321ebe0bf64 -r 1e22b9363807 source/x265.cpp
>> --- a/source/x265.cpp Mon Nov 18 11:32:06 2013 +0530
>> +++ b/source/x265.cpp Mon Nov 18 15:40:33 2013 +0530
>> @@ -116,6 +116,7 @@
>> { "bframes", required_argument, NULL, 'b' },
>> { "bframe-bias", required_argument, NULL, 0 },
>> { "b-adapt", required_argument, NULL, 0 },
>> + { "b-pyramid", required_argument, NULL, 0 },
>> { "ref", required_argument, NULL, 0 },
>> { "no-weightp", no_argument, NULL, 0 },
>> { "weightp", no_argument, NULL, 'w' },
>> @@ -303,6 +304,7 @@
>> H0(" --bframes Maximum number of consecutive
>> b-frames (now it only enables B GOP structure) Default %d\n",
>> param->bframes);
>> H0(" --bframe-bias Bias towards B frame decisions.
>> Default %d\n", param->bFrameBias);
>> H0(" --b-adapt 0 - none, 1 - fast, 2 - full
>> (trellis) adaptive B frame scheduling. Default %d\n",
>> param->bFrameAdaptive);
>> + H0("...--b-pyramid...................Use B-frame reference 0:
>> Disabled, 1: Enabled Default\n", param->bpyramid);
>> H0(" --ref max number of L0 references to
>> be allowed (1 .. 16) Default %d\n", param->maxNumReferences);
>> H0("-w/--[no-]weightp Enable weighted prediction in P
>> slices. Default %s\n", OPT(param->bEnableWeightedPred));
>> H0("\nQP, rate control and rate distortion options:\n");
>> diff -r 2321ebe0bf64 -r 1e22b9363807 source/x265.h
>> --- a/source/x265.h Mon Nov 18 11:32:06 2013 +0530
>> +++ b/source/x265.h Mon Nov 18 15:40:33 2013 +0530
>> @@ -204,6 +204,7 @@
>> #define X265_TYPE_I 0x0002
>> #define X265_TYPE_P 0x0003
>> #define X265_TYPE_BREF 0x0004 /* Non-disposable B-frame */
>> +#define X265_B_PYRAMID_NORMAL 0x0001
>> #define X265_TYPE_B 0x0005
>> #define X265_TYPE_KEYFRAME 0x0006 /* IDR or I depending on
>> b_open_gop option */
>> #define X265_AQ_NONE 0
>> @@ -315,6 +316,7 @@
>> int bframes; ///< Max number of
>> consecutive B-frames
>> int lookaheadDepth; ///< Number of frames to
>> use for lookahead, determines encoder latency
>> int bFrameAdaptive; ///< 0 - none, 1 - fast, 2
>> - full (trellis) adaptive B frame scheduling
>> + int bpyramid; ///< 0 - none, 1 - normal
>> use B-frame reference
>> int bFrameBias;
>> int scenecutThreshold; ///< how aggressively to
>> insert extra I frames
>>
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>
>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
--
Thanks & Regards
Gopu G
Multicoreware Inc
More information about the x265-devel
mailing list