[x265] [PATCH] b-pyramid implementation: Allow the use of B-frames as references for non B and B frames

Deepthi Nandakumar deepthi at multicorewareinc.com
Mon Nov 18 12:17:47 CET 2013


On Mon, Nov 18, 2013 at 3:40 PM, Gopu Govindaswamy <
gopu at multicorewareinc.com> wrote:

> # HG changeset patch
> # User Gopu Govindaswamy <gopu at multicorewareinc.com>
> # Date 1384769433 -19800
> # Node ID 1e22b93638072ed805478d7af17f90e285fb4969
> # Parent  2321ebe0bf64e5f3c0034076c7edb3ecbcd48039
> b-pyramid implementation: Allow the use of B-frames as references for non
> B and B frames
>
> when we enable the b-pyramid the bitrates efficienctly reduced and there
> is not much diff in the performance
> and the PSNR 00. increased some of the clips and decreased some of clips
>
> Test results for reference when enable and disable the b-pyramid:
> cli option : -b 10 --hash=1 -f 100 --b-pyramid=1 --ref=1 --b-adapt=2
> Enable B-reference  : --b-pyramid=1
> Disable B-reference : --b-pyramid=0
>
> Results:
> Enable / Disable
>
> clip - FourPeople_1280x720_60.yuv
> Total time taken - 9.70s (10.31 fps) / 9.93s (10.07 fps)
> Bitrates - 516.30 kb/s / 544.68 kb/s
> PSNR     - 39.725 / 39.701
>
> clip - BasketballDrive_1920x1080_50.y4m
> Total time taken - 39.06s (2.51 fps) / 38.98s (2.57 fps)
> Bitrates -  4166.92 kb/s / 4370.43 kb/s
> PSNR     -  37.261 / 37.268
>
> clip - Johnny_1280x720_60.y4m
> Total time taken - 8.88s (11.27 fps) / 11.08s (9.03 fps)
> Bitrates - 304.29 kb/s / 328.84 kb/s
> PSNR     - 40.605 / 40.551
>
> Total time taken - 30.97s (3.23 fps) / 33.65s (2.97 fps)
> Bitrates - 3496.84 kb/s / 3683.93 kb/s
> PSNR     - 35.645 / 35.660
>
> diff -r 2321ebe0bf64 -r 1e22b9363807 source/common/common.cpp
> --- a/source/common/common.cpp  Mon Nov 18 11:32:06 2013 +0530
> +++ b/source/common/common.cpp  Mon Nov 18 15:40:33 2013 +0530
> @@ -54,6 +54,7 @@
>
>  static int parseCspName(const char *arg, int& error);
>  static int parseName(const char *arg, const char * const * names, int&
> error);
> +static int parse_enum(const char *, const char * const * names, int *dst);
>
>  using namespace x265;
>
> @@ -165,6 +166,7 @@
>      param->bframes = 3;
>      param->lookaheadDepth = 40;
>      param->bFrameAdaptive = X265_B_ADAPT_FAST;
> +    param->bpyramid = 0;
>      param->scenecutThreshold = 40; /* Magic number pulled in from x264*/
>
>      /* Intra Coding Tools */
> @@ -532,7 +534,7 @@
>      }
>
>      CHECK(param->bEnableWavefront < 0, "WaveFrontSynchro cannot be
> negative");
> -
> +    CHECK(param->bpyramid >= 2, "b-pyramid is 0 or 1");
>      return check_failed;
>  }
>
> @@ -620,6 +622,7 @@
>          x265_log(param, X265_LOG_INFO, "RDpenalty                    :
> %d\n", param->rdPenalty);
>      }
>      x265_log(param, X265_LOG_INFO, "Lookahead / bframes / badapt : %d /
> %d / %d\n", param->lookaheadDepth, param->bframes, param->bFrameAdaptive);
> +    x265_log(param, X265_LOG_INFO, "b-pyramid / weightp / ref    : %d /
> %d / %d\n", param->bpyramid, param->bEnableWeightedPred,
> param->maxNumReferences);
>      x265_log(param, X265_LOG_INFO, "tools: ");
>  #define TOOLOPT(FLAG, STR) if (FLAG) fprintf(stderr, "%s ", STR)
>      TOOLOPT(param->bEnableRectInter, "rect");
> @@ -628,7 +631,6 @@
>      TOOLOPT(param->bEnableConstrainedIntra, "cip");
>      TOOLOPT(param->bEnableEarlySkip, "esd");
>      fprintf(stderr, "rd=%d ", param->rdLevel);
> -    fprintf(stderr, "ref=%d ", param->maxNumReferences);
>
>      TOOLOPT(param->bEnableLoopFilter, "lft");
>      if (param->bEnableSAO)
> @@ -650,7 +652,6 @@
>          else
>              fprintf(stderr, "tskip ");
>      }
> -    TOOLOPT(param->bEnableWeightedPred, "weightp");
>      TOOLOPT(param->bEnableWeightedBiPred, "weightbp");
>      TOOLOPT(param->rc.aqMode, "aq");
>      fprintf(stderr, "\n");
> @@ -747,6 +748,15 @@
>      }
>      OPT("input-csp") p->sourceCsp = ::parseCspName(value, berror);
>      OPT("me")        p->searchMethod = ::parseName(value,
> x265_motion_est_names, berror);
> +    OPT("b-pyramid")
> +    {
> +        berror |= parse_enum(value, x265_b_pyramid_names, &p->bpyramid);
> +        if (berror)
> +        {
> +            berror = 0;
> +            p->bpyramid = atoi(value);
> +        }
> +    }
>

Not clear why parse_enum is required here? For now, this is a boolean flag
which can be assigned directly to the param structure.



>      else
>          return X265_PARAM_BAD_NAME;
>  #undef OPT
> @@ -802,6 +812,7 @@
>      BOOL(p->bEnableSAO, "sao");
>      s += sprintf(s, " sao-lcu-bounds=%d", p->saoLcuBoundary);
>      s += sprintf(s, " sao-lcu-opt=%d", p->saoLcuBasedOptimization);
> +    s += sprintf(s, " b-pyramid=%d", p->bpyramid);
>  #undef BOOL
>
>      return buf;
> @@ -843,3 +854,13 @@
>          error = 1;
>      return a;
>  }
> +static int parse_enum(const char *arg, const char * const * names, int
> *dst)
> +{
> +    for (int i = 0; names[i]; i++)
> +        if (!strcmp(arg, names[i]))
> +        {
> +            *dst = i;
> +            return 0;
> +        }
> +    return -1;
> +}
> diff -r 2321ebe0bf64 -r 1e22b9363807 source/common/common.h
> --- a/source/common/common.h    Mon Nov 18 11:32:06 2013 +0530
> +++ b/source/common/common.h    Mon Nov 18 15:40:33 2013 +0530
> @@ -107,6 +107,7 @@
>  #define X265_LOG2(x)  log2(x)
>  #endif
>
> +static const char * const x265_b_pyramid_names[] = {"none", "normal", 0};
>  /* defined in common.cpp */
>  int64_t x265_mdate(void);
>  void x265_log(x265_param *param, int level, const char *fmt, ...);
> diff -r 2321ebe0bf64 -r 1e22b9363807 source/encoder/dpb.cpp
> --- a/source/encoder/dpb.cpp    Mon Nov 18 11:32:06 2013 +0530
> +++ b/source/encoder/dpb.cpp    Mon Nov 18 15:40:33 2013 +0530
> @@ -78,7 +78,17 @@
>          m_lastIDR = pocCurr;
>      }
>      slice->setLastIDR(m_lastIDR);
> -    slice->setReferenced(slice->getSliceType() != B_SLICE);
> +
> +    if (slice->getSliceType() != B_SLICE)
> +        slice->setReferenced(true);
> +    else
> +    {
> +        if (pic->m_lowres.sliceType == X265_TYPE_BREF)
> +            slice->setReferenced(true);
> +        else
> +            slice->setReferenced(false);
> +    }
> +
>      slice->setTemporalLayerNonReferenceFlag(!slice->isReferenced());
>      // Set the nal unit type
>      slice->setNalUnitType(getNalUnitType(pocCurr, m_lastIDR, pic));
> diff -r 2321ebe0bf64 -r 1e22b9363807 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp        Mon Nov 18 11:32:06 2013 +0530
> +++ b/source/encoder/encoder.cpp        Mon Nov 18 15:40:33 2013 +0530
> @@ -1223,7 +1223,13 @@
>      vps.setMaxLayers(1);
>      for (int i = 0; i < MAX_TLAYER; i++)
>      {
> -        m_numReorderPics[i] = 1;
> +        /* Increase the DPB size if enabled the bpyramid the b-ref always
> should take Lo and L1 as a non B frames
> +        the dpb size is always 3 when enabled the b-pyramid */
> +        if (_param->bpyramid && _param->bframes > 1)
> +            m_numReorderPics[i] = 3;
> +        else
> +            m_numReorderPics[i] = 1;
> +
>          m_maxDecPicBuffering[i] = X265_MIN(MAX_NUM_REF,
> X265_MAX(m_numReorderPics[i] + 1, _param->maxNumReferences) + 1);
>          vps.setNumReorderPics(m_numReorderPics[i], i);
>          vps.setMaxDecPicBuffering(m_maxDecPicBuffering[i], i);
> diff -r 2321ebe0bf64 -r 1e22b9363807 source/encoder/slicetype.cpp
> --- a/source/encoder/slicetype.cpp      Mon Nov 18 11:32:06 2013 +0530
> +++ b/source/encoder/slicetype.cpp      Mon Nov 18 15:40:33 2013 +0530
> @@ -643,21 +643,22 @@
>              Lowres& frm = list[bframes]->m_lowres;
>
>              if (frm.sliceType == X265_TYPE_BREF
> -                /* && h->param.i_bframe_pyramid < X264_B_PYRAMID_NORMAL
> && brefs == h->param.i_bframe_pyramid*/)
> +                && cfg->param.bpyramid < X265_B_PYRAMID_NORMAL && brefs
> == cfg->param.bpyramid)
>              {
>                  frm.sliceType = X265_TYPE_B;
> -                x265_log(&cfg->param, X265_LOG_WARNING, "B-ref is not yet
> supported\n");
> +                x265_log(&cfg->param, X265_LOG_WARNING, "B-ref at frame
> %d incompatible with B-pyramid %s \n",
> +                        frm.frameNum,
> x265_b_pyramid_names[cfg->param.bpyramid] );
>              }
>
>              /* pyramid with multiple B-refs needs a big enough dpb that
> the preceding P-frame stays available.
> -               smaller dpb could be supported by smart enough use of
> mmco, but it's easier just to forbid it.
> -            else if (frm.sliceType == X265_TYPE_BREF &&
> cfg->param.i_bframe_pyramid == X265_B_PYRAMID_NORMAL &&
> -                     brefs && cfg->param.i_frame_reference <= (brefs+3))
> +               smaller dpb could be supported by smart enough use of
> mmco, but it's easier just to forbid it.*/
> +            else if (frm.sliceType == X265_TYPE_BREF &&
> cfg->param.bpyramid == X265_B_PYRAMID_NORMAL &&
> +                     brefs && cfg->param.maxNumReferences <= (brefs+3))
>              {
>                  frm.sliceType = X265_TYPE_B;
>                  x265_log(&cfg->param, X265_LOG_WARNING, "B-ref at frame
> %d incompatible with B-pyramid %s and %d reference frames\n",
> -                          frm.sliceType,
> x264_b_pyramid_names[h->param.i_bframe_pyramid],
> h->param.i_frame_reference);
> -            } */
> +                        frm.sliceType,
> x265_b_pyramid_names[cfg->param.bpyramid], cfg->param.maxNumReferences);
> +            }
>
>              if (frm.sliceType == X265_TYPE_KEYFRAME)
>                  frm.sliceType = cfg->param.bOpenGOP ? X265_TYPE_I :
> X265_TYPE_IDR;
> @@ -716,12 +717,12 @@
>          list[bframes]->m_lowres.leadingBframes = bframes;
>          lastNonB = &list[bframes]->m_lowres;
>
> -        /* insert a bref into the sequence
> -        if (h->param.i_bframe_pyramid && bframes > 1 && !brefs)
> +        /* insert a bref into the sequence */
> +        if (cfg->param.bpyramid && bframes > 1 && !brefs)
>          {
> -            h->lookahead->next.list[bframes/2]->i_type = X264_TYPE_BREF;
> +            list[bframes/2]->m_lowres.sliceType = X265_TYPE_BREF;
>              brefs++;
> -        } */
> +        }
>
>          /* calculate the frame costs ahead of time for
> x264_rc_analyse_slice while we still have lowres */
>          if (cfg->param.rc.rateControlMode != X265_RC_CQP)
> @@ -742,8 +743,7 @@
>
>              estimateFrameCost(p0, p1, b, 0);
>
> -            /*
> -            if ((p0 != p1 || bframes) && cfg->param.rc.i_vbv_buffer_size)
> +            if ((p0 != p1 || bframes) /*&&
> cfg->param.rc.i_vbv_buffer_size*/ )
>              {
>                  // We need the intra costs for row SATDs
>                  estimateFrameCost(b, b, b, 0);
> @@ -752,7 +752,7 @@
>                  p0 = 0;
>                  for (b = 1; b <= bframes; b++)
>                  {
> -                    if (frames[b]->i_type == X265_TYPE_B)
> +                    if (frames[b]->sliceType == X265_TYPE_B)
>                          for (p1 = b; frames[p1]->sliceType ==
> X265_TYPE_B;)
>                              p1++;
>                      else
> @@ -761,7 +761,7 @@
>                      if (frames[b]->sliceType == X265_TYPE_BREF)
>                          p0 = b;
>                  }
> -            } */
> +            }
>          }
>
>          /* dequeue all frames from inputQueue that are about to be
> enqueued
> @@ -774,10 +774,23 @@
>
>          /* add non-B to output queue */
>          outputQueue.pushBack(*list[bframes]);
> +
> +        /* Add B-ref frame next to P frame in output queue, the B-ref
> encode before non B-ref frame */
> +        if (bframes > 1 && cfg->param.bpyramid)
> +        {
> +            for (int i = 0; i < bframes; i++)
> +            {
> +                if(list[i]->m_lowres.sliceType == X265_TYPE_BREF)
> +                    outputQueue.pushBack(*list[i]);
> +            }
> +        }
> +
>          /* add B frames to output queue */
>          for (int i = 0; i < bframes; i++)
>          {
> -            outputQueue.pushBack(*list[i]);
> +            /* push all the B frames into output queue except B-ref,
> which already pushed into output queue*/
> +            if (list[i]->m_lowres.sliceType != X265_TYPE_BREF)
> +                outputQueue.pushBack(*list[i]);
>          }
>
>          return;
> @@ -1155,11 +1168,7 @@
>          if (cost > threshold)
>              break;
>
> -        /* Keep some B-frames as references: 0=off, 1=strict
> hierarchical, 2=normal */
> -        //TODO Add this into param
> -        int bframe_pyramid = 0;
> -
> -        if (bframe_pyramid && next_p - cur_p > 2)
> +        if (cfg->param.bpyramid && next_p - cur_p > 2)
>          {
>              int middle = cur_p + (next_p - cur_p) / 2;
>              cost += estimateFrameCost(cur_p, next_p, middle, 0);
> diff -r 2321ebe0bf64 -r 1e22b9363807 source/x265.cpp
> --- a/source/x265.cpp   Mon Nov 18 11:32:06 2013 +0530
> +++ b/source/x265.cpp   Mon Nov 18 15:40:33 2013 +0530
> @@ -116,6 +116,7 @@
>      { "bframes",        required_argument, NULL, 'b' },
>      { "bframe-bias",    required_argument, NULL, 0 },
>      { "b-adapt",        required_argument, NULL, 0 },
> +    { "b-pyramid",      required_argument, NULL, 0 },
>      { "ref",            required_argument, NULL, 0 },
>      { "no-weightp",           no_argument, NULL, 0 },
>      { "weightp",              no_argument, NULL, 'w' },
> @@ -303,6 +304,7 @@
>      H0("   --bframes                     Maximum number of consecutive
> b-frames (now it only enables B GOP structure) Default %d\n",
> param->bframes);
>      H0("   --bframe-bias                 Bias towards B frame decisions.
> Default %d\n", param->bFrameBias);
>      H0("   --b-adapt                     0 - none, 1 - fast, 2 - full
> (trellis) adaptive B frame scheduling. Default %d\n",
> param->bFrameAdaptive);
> +    H0("...--b-pyramid...................Use B-frame reference 0:
> Disabled, 1: Enabled Default\n", param->bpyramid);
>      H0("   --ref                         max number of L0 references to
> be allowed (1 .. 16) Default %d\n", param->maxNumReferences);
>      H0("-w/--[no-]weightp                Enable weighted prediction in P
> slices. Default %s\n", OPT(param->bEnableWeightedPred));
>      H0("\nQP, rate control and rate distortion options:\n");
> diff -r 2321ebe0bf64 -r 1e22b9363807 source/x265.h
> --- a/source/x265.h     Mon Nov 18 11:32:06 2013 +0530
> +++ b/source/x265.h     Mon Nov 18 15:40:33 2013 +0530
> @@ -204,6 +204,7 @@
>  #define X265_TYPE_I             0x0002
>  #define X265_TYPE_P             0x0003
>  #define X265_TYPE_BREF          0x0004  /* Non-disposable B-frame */
> +#define X265_B_PYRAMID_NORMAL   0x0001
>  #define X265_TYPE_B             0x0005
>  #define X265_TYPE_KEYFRAME      0x0006  /* IDR or I depending on
> b_open_gop option */
>  #define X265_AQ_NONE                 0
> @@ -315,6 +316,7 @@
>      int       bframes;                         ///< Max number of
> consecutive B-frames
>      int       lookaheadDepth;                  ///< Number of frames to
> use for lookahead, determines encoder latency
>      int       bFrameAdaptive;                  ///< 0 - none, 1 - fast, 2
> - full (trellis) adaptive B frame scheduling
> +    int       bpyramid;                        ///< 0 - none, 1 - normal
> use B-frame reference
>      int       bFrameBias;
>      int       scenecutThreshold;               ///< how aggressively to
> insert extra I frames
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131118/daf4be4a/attachment-0001.html>


More information about the x265-devel mailing list