[x265] [PATCH] b-pyramid implementation: Allow the use of B-frames as references for non B and B frames
Steve Borho
steve at borho.org
Tue Nov 19 09:07:06 CET 2013
On Nov 19, 2013, at 12:32 AM, Gopu Govindaswamy <gopu at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Gopu Govindaswamy <gopu at multicorewareinc.com>
> # Date 1384842731 -19800
> # Node ID c386acea7ba1ca48f32060f265586618ee744a9f
> # Parent 2f5f538d2cbca3b46e8d27d860e9787cc19f406f
> b-pyramid implementation: Allow the use of B-frames as references for non B and B frames
>
> when we enable the b-pyramid the bitrates efficienctly reduced and there is not much diff in the performance
> and the PSNR 00. increased some of the clips and decreased some of clips
>
> Test results for reference when enable and disable the b-pyramid:
> cli option : -b 10 --hash=1 -f 100 --b-pyramid=1 --b-adapt=2
> Enable B-reference : --b-pyramid=1
> Disable B-reference : --b-pyramid=0
>
> Results:
> Enable / Disable
>
> Clips - Johnny_1280x720_60.y4m
> Total time taken - 11.19s (8.94 fps) / 13.44s (7.44 fps)
> Bitrates - 303.52 kb/s / 326.79 kb/s
> PSNR - 40.679 / 40.612
>
> Clips - Cactus_1920x1080_50.y4m
> Total Time taken - 44.61s (2.24 fps) / 48.23s (2.07 fps)
> Bitrates - 3420.80 kb/s / 3575.20 kb/s
> PSNR - 35.709 / 35.726
>
> Clips - BasketballDrive_1920x1080_50.y4m
> Total time taken - 54.15s (1.85 fps) / 53.72s (1.86 fps)
> Bitrates - 4114.07 kb/s / 4310.45 kb/s
> PSNR - 37.283 / 37.290
>
> Clips - FourPeople_1280x720_60
> Total time taken - 11.79s (8.48 fps) / 12.16s (8.23 fps)
> Bitrates - 514.90 kb/s / 539.08 kb/s
> PSNR - 39.782 / 39.757
>
> diff -r 2f5f538d2cbc -r c386acea7ba1 source/common/common.cpp
> --- a/source/common/common.cpp Mon Nov 18 16:44:31 2013 -0600
> +++ b/source/common/common.cpp Tue Nov 19 12:02:11 2013 +0530
> @@ -165,6 +165,7 @@
> param->bframes = 3;
> param->lookaheadDepth = 40;
> param->bFrameAdaptive = X265_B_ADAPT_FAST;
> + param->bpyramid = 0;
> param->scenecutThreshold = 40; /* Magic number pulled in from x264*/
>
> /* Intra Coding Tools */
> @@ -634,6 +635,7 @@
> x265_log(param, X265_LOG_INFO, "RDpenalty : %d\n", param->rdPenalty);
> }
> x265_log(param, X265_LOG_INFO, "Lookahead / bframes / badapt : %d / %d / %d\n", param->lookaheadDepth, param->bframes, param->bFrameAdaptive);
> + x265_log(param, X265_LOG_INFO, "b-pyramid / weightp / ref : %d / %d / %d\n", param->bpyramid, param->bEnableWeightedPred, param->maxNumReferences);
> x265_log(param, X265_LOG_INFO, "tools: ");
> #define TOOLOPT(FLAG, STR) if (FLAG) fprintf(stderr, "%s ", STR)
> TOOLOPT(param->bEnableRectInter, "rect");
> @@ -642,7 +644,6 @@
> TOOLOPT(param->bEnableConstrainedIntra, "cip");
> TOOLOPT(param->bEnableEarlySkip, "esd");
> fprintf(stderr, "rd=%d ", param->rdLevel);
> - fprintf(stderr, "ref=%d ", param->maxNumReferences);
>
> TOOLOPT(param->bEnableLoopFilter, "lft");
> if (param->bEnableSAO)
> @@ -664,7 +665,6 @@
> else
> fprintf(stderr, "tskip ");
> }
> - TOOLOPT(param->bEnableWeightedPred, "weightp");
> TOOLOPT(param->bEnableWeightedBiPred, "weightbp");
> TOOLOPT(param->rc.aqMode, "aq-mode");
> if (param->rc.aqMode)
> @@ -764,6 +764,7 @@
> }
> OPT("input-csp") p->sourceCsp = ::parseCspName(value, berror);
> OPT("me") p->searchMethod = ::parseName(value, x265_motion_est_names, berror);
> + OPT("b-pyramid") p->bpyramid = ::parseName(value, x265_b_pyramid_names, berror);
> else
> return X265_PARAM_BAD_NAME;
> #undef OPT
> @@ -821,6 +822,7 @@
> BOOL(p->bEnableSAO, "sao");
> s += sprintf(s, " sao-lcu-bounds=%d", p->saoLcuBoundary);
> s += sprintf(s, " sao-lcu-opt=%d", p->saoLcuBasedOptimization);
> + s += sprintf(s, " b-pyramid=%d", p->bpyramid);
> #undef BOOL
>
> return buf;
> diff -r 2f5f538d2cbc -r c386acea7ba1 source/common/common.h
> --- a/source/common/common.h Mon Nov 18 16:44:31 2013 -0600
> +++ b/source/common/common.h Tue Nov 19 12:02:11 2013 +0530
> @@ -107,6 +107,7 @@
> #define X265_LOG2(x) log2(x)
> #endif
>
> +static const char * const x265_b_pyramid_names[] = {"none", "normal", 0};
string arrays like this need to go in x265.h now so API users can pass the strings to our param parser
> /* defined in common.cpp */
> int64_t x265_mdate(void);
> void x265_log(x265_param *param, int level, const char *fmt, ...);
> diff -r 2f5f538d2cbc -r c386acea7ba1 source/encoder/dpb.cpp
> --- a/source/encoder/dpb.cpp Mon Nov 18 16:44:31 2013 -0600
> +++ b/source/encoder/dpb.cpp Tue Nov 19 12:02:11 2013 +0530
> @@ -78,7 +78,17 @@
> m_lastIDR = pocCurr;
> }
> slice->setLastIDR(m_lastIDR);
> - slice->setReferenced(slice->getSliceType() != B_SLICE);
> +
> + if (slice->getSliceType() != B_SLICE)
> + slice->setReferenced(true);
> + else
> + {
> + if (pic->m_lowres.sliceType == X265_TYPE_BREF)
> + slice->setReferenced(true);
> + else
> + slice->setReferenced(false);
> + }
> +
> slice->setTemporalLayerNonReferenceFlag(!slice->isReferenced());
> // Set the nal unit type
> slice->setNalUnitType(getNalUnitType(pocCurr, m_lastIDR, pic));
> diff -r 2f5f538d2cbc -r c386acea7ba1 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp Mon Nov 18 16:44:31 2013 -0600
> +++ b/source/encoder/encoder.cpp Tue Nov 19 12:02:11 2013 +0530
> @@ -1180,6 +1180,11 @@
> {
> _param->bEnableRDOQTS = 0;
> }
> + if (_param->bpyramid && !_param->bframes)
> + {
> + x265_log(_param, X265_LOG_INFO, "Warning: b-pyramid enabled if bframes > 1 \n");
extra space before line feed. Remove Warning: and make this use X265_LOG_WARNING
> + _param->bpyramid = 0;
> + }
>
> /* Set flags according to RDLevel specified - check_params has verified that RDLevel is within range */
> switch (_param->rdLevel)
> @@ -1223,8 +1228,18 @@
> vps.setMaxLayers(1);
> for (int i = 0; i < MAX_TLAYER; i++)
> {
> - m_numReorderPics[i] = 1;
> - m_maxDecPicBuffering[i] = X265_MIN(MAX_NUM_REF, X265_MAX(m_numReorderPics[i] + 1, _param->maxNumReferences) + 1);
> + /* Increase the DPB size and reorderpicture if enabled the bpyramid */
> + if (_param->bpyramid && _param->bframes > 1)
> + {
> + m_numReorderPics[i] = 2;
> + m_maxDecPicBuffering[i] = X265_MIN(MAX_NUM_REF, X265_MAX(m_numReorderPics[i] + 1, _param->maxNumReferences) + 2);
> + }
> + else
> + {
> + m_numReorderPics[i] = 1;
> + m_maxDecPicBuffering[i] = X265_MIN(MAX_NUM_REF, X265_MAX(m_numReorderPics[i] + 1, _param->maxNumReferences) + 1);
> + }
seems over complicated. how about:
m_numReorderPics[i] = (_param->bpyramid && _param->bframes > 1) ? 2 : 1;
m_maxDecPicBuffering[i] = X265_MIN(MAX_NUM_REF, X265_MAX(m_numReorderPics[i] + 1, _param->maxNumReferences) + m_numReorderPics[i]);
> +
> vps.setNumReorderPics(m_numReorderPics[i], i);
> vps.setMaxDecPicBuffering(m_maxDecPicBuffering[i], i);
> }
> diff -r 2f5f538d2cbc -r c386acea7ba1 source/encoder/slicetype.cpp
> --- a/source/encoder/slicetype.cpp Mon Nov 18 16:44:31 2013 -0600
> +++ b/source/encoder/slicetype.cpp Tue Nov 19 12:02:11 2013 +0530
> @@ -643,21 +643,22 @@
> Lowres& frm = list[bframes]->m_lowres;
>
> if (frm.sliceType == X265_TYPE_BREF
> - /* && h->param.i_bframe_pyramid < X264_B_PYRAMID_NORMAL && brefs == h->param.i_bframe_pyramid*/)
> + && cfg->param.bpyramid < X265_B_PYRAMID_NORMAL && brefs == cfg->param.bpyramid)
> {
> frm.sliceType = X265_TYPE_B;
> - x265_log(&cfg->param, X265_LOG_WARNING, "B-ref is not yet supported\n");
> + x265_log(&cfg->param, X265_LOG_WARNING, "B-ref at frame %d incompatible with B-pyramid %s \n",
> + frm.frameNum, x265_b_pyramid_names[cfg->param.bpyramid] );
> }
>
> /* pyramid with multiple B-refs needs a big enough dpb that the preceding P-frame stays available.
> - smaller dpb could be supported by smart enough use of mmco, but it's easier just to forbid it.
> - else if (frm.sliceType == X265_TYPE_BREF && cfg->param.i_bframe_pyramid == X265_B_PYRAMID_NORMAL &&
> - brefs && cfg->param.i_frame_reference <= (brefs+3))
> + smaller dpb could be supported by smart enough use of mmco, but it's easier just to forbid it.*/
> + else if (frm.sliceType == X265_TYPE_BREF && cfg->param.bpyramid == X265_B_PYRAMID_NORMAL &&
> + brefs && cfg->param.maxNumReferences <= (brefs+3))
> {
> frm.sliceType = X265_TYPE_B;
> x265_log(&cfg->param, X265_LOG_WARNING, "B-ref at frame %d incompatible with B-pyramid %s and %d reference frames\n",
> - frm.sliceType, x264_b_pyramid_names[h->param.i_bframe_pyramid], h->param.i_frame_reference);
> - } */
> + frm.sliceType, x265_b_pyramid_names[cfg->param.bpyramid], cfg->param.maxNumReferences);
> + }
>
> if (frm.sliceType == X265_TYPE_KEYFRAME)
> frm.sliceType = cfg->param.bOpenGOP ? X265_TYPE_I : X265_TYPE_IDR;
> @@ -716,12 +717,12 @@
> list[bframes]->m_lowres.leadingBframes = bframes;
> lastNonB = &list[bframes]->m_lowres;
>
> - /* insert a bref into the sequence
> - if (h->param.i_bframe_pyramid && bframes > 1 && !brefs)
> + /* insert a bref into the sequence */
> + if (cfg->param.bpyramid && bframes > 1 && !brefs)
> {
> - h->lookahead->next.list[bframes/2]->i_type = X264_TYPE_BREF;
> + list[bframes/2]->m_lowres.sliceType = X265_TYPE_BREF;
> brefs++;
> - } */
> + }
>
> /* calculate the frame costs ahead of time for x264_rc_analyse_slice while we still have lowres */
> if (cfg->param.rc.rateControlMode != X265_RC_CQP)
> @@ -742,8 +743,7 @@
>
> estimateFrameCost(p0, p1, b, 0);
>
> - /*
> - if ((p0 != p1 || bframes) && cfg->param.rc.i_vbv_buffer_size)
> + if ((p0 != p1 || bframes) /*&& cfg->param.rc.i_vbv_buffer_size*/ )
> {
> // We need the intra costs for row SATDs
> estimateFrameCost(b, b, b, 0);
> @@ -752,7 +752,7 @@
> p0 = 0;
> for (b = 1; b <= bframes; b++)
> {
> - if (frames[b]->i_type == X265_TYPE_B)
> + if (frames[b]->sliceType == X265_TYPE_B)
> for (p1 = b; frames[p1]->sliceType == X265_TYPE_B;)
> p1++;
> else
> @@ -761,7 +761,7 @@
> if (frames[b]->sliceType == X265_TYPE_BREF)
> p0 = b;
> }
> - } */
> + }
> }
>
> /* dequeue all frames from inputQueue that are about to be enqueued
> @@ -774,10 +774,23 @@
>
> /* add non-B to output queue */
> outputQueue.pushBack(*list[bframes]);
> +
> + /* Add B-ref frame next to P frame in output queue, the B-ref encode before non B-ref frame */
> + if (bframes > 1 && cfg->param.bpyramid)
> + {
> + for (int i = 0; i < bframes; i++)
> + {
> + if(list[i]->m_lowres.sliceType == X265_TYPE_BREF)
> + outputQueue.pushBack(*list[i]);
> + }
> + }
> +
> /* add B frames to output queue */
> for (int i = 0; i < bframes; i++)
> {
> - outputQueue.pushBack(*list[i]);
> + /* push all the B frames into output queue except B-ref, which already pushed into output queue*/
> + if (list[i]->m_lowres.sliceType != X265_TYPE_BREF)
> + outputQueue.pushBack(*list[i]);
> }
>
> return;
> @@ -1155,11 +1168,7 @@
> if (cost > threshold)
> break;
>
> - /* Keep some B-frames as references: 0=off, 1=strict hierarchical, 2=normal */
> - //TODO Add this into param
> - int bframe_pyramid = 0;
> -
> - if (bframe_pyramid && next_p - cur_p > 2)
> + if (cfg->param.bpyramid && next_p - cur_p > 2)
> {
> int middle = cur_p + (next_p - cur_p) / 2;
> cost += estimateFrameCost(cur_p, next_p, middle, 0);
> diff -r 2f5f538d2cbc -r c386acea7ba1 source/x265.cpp
> --- a/source/x265.cpp Mon Nov 18 16:44:31 2013 -0600
> +++ b/source/x265.cpp Tue Nov 19 12:02:11 2013 +0530
> @@ -116,6 +116,7 @@
> { "bframes", required_argument, NULL, 'b' },
> { "bframe-bias", required_argument, NULL, 0 },
> { "b-adapt", required_argument, NULL, 0 },
> + { "b-pyramid", required_argument, NULL, 0 },
> { "ref", required_argument, NULL, 0 },
> { "no-weightp", no_argument, NULL, 0 },
> { "weightp", no_argument, NULL, 'w' },
> @@ -304,6 +305,7 @@
> H0(" --bframes Maximum number of consecutive b-frames (now it only enables B GOP structure) Default %d\n", param->bframes);
> H0(" --bframe-bias Bias towards B frame decisions. Default %d\n", param->bFrameBias);
> H0(" --b-adapt 0 - none, 1 - fast, 2 - full (trellis) adaptive B frame scheduling. Default %d\n", param->bFrameAdaptive);
> + H0("...--b-pyramid...................Use B-frame reference 0: Disabled, 1: Enabled Default\n", param->bpyramid);
> H0(" --ref max number of L0 references to be allowed (1 .. 16) Default %d\n", param->maxNumReferences);
> H0("-w/--[no-]weightp Enable weighted prediction in P slices. Default %s\n", OPT(param->bEnableWeightedPred));
> H0("\nQP, rate control and rate distortion options:\n");
> diff -r 2f5f538d2cbc -r c386acea7ba1 source/x265.h
> --- a/source/x265.h Mon Nov 18 16:44:31 2013 -0600
> +++ b/source/x265.h Tue Nov 19 12:02:11 2013 +0530
> @@ -204,6 +204,7 @@
> #define X265_TYPE_I 0x0002
> #define X265_TYPE_P 0x0003
> #define X265_TYPE_BREF 0x0004 /* Non-disposable B-frame */
> +#define X265_B_PYRAMID_NORMAL 0x0001
> #define X265_TYPE_B 0x0005
> #define X265_TYPE_KEYFRAME 0x0006 /* IDR or I depending on b_open_gop option */
> #define X265_AQ_NONE 0
> @@ -315,6 +316,7 @@
> int bframes; ///< Max number of consecutive B-frames
> int lookaheadDepth; ///< Number of frames to use for lookahead, determines encoder latency
> int bFrameAdaptive; ///< 0 - none, 1 - fast, 2 - full (trellis) adaptive B frame scheduling
> + int bpyramid; ///< 0 - none, 1 - normal use B-frame reference
> int bFrameBias;
> int scenecutThreshold; ///< how aggressively to insert extra I frames
rest looks fine
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 842 bytes
Desc: Message signed with OpenPGP using GPGMail
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131119/e1d2b6a0/attachment.sig>
More information about the x265-devel
mailing list