[x265] [PATCH] b-pyramid implementation: Allow the use of B-frames as references for non B and B frames

Gopu Govindaswamy gopu at multicorewareinc.com
Mon Nov 18 11:10:47 CET 2013


# HG changeset patch
# User Gopu Govindaswamy <gopu at multicorewareinc.com>
# Date 1384769433 -19800
# Node ID 1e22b93638072ed805478d7af17f90e285fb4969
# Parent  2321ebe0bf64e5f3c0034076c7edb3ecbcd48039
b-pyramid implementation: Allow the use of B-frames as references for non B and B frames

when we enable the b-pyramid the bitrates efficienctly reduced and there is not much diff in the performance
and the PSNR 00. increased some of the clips and decreased some of clips

Test results for reference when enable and disable the b-pyramid:
cli option : -b 10 --hash=1 -f 100 --b-pyramid=1 --ref=1 --b-adapt=2
Enable B-reference  : --b-pyramid=1
Disable B-reference : --b-pyramid=0

Results:
Enable / Disable

clip - FourPeople_1280x720_60.yuv
Total time taken - 9.70s (10.31 fps) / 9.93s (10.07 fps)
Bitrates - 516.30 kb/s / 544.68 kb/s
PSNR     - 39.725 / 39.701

clip - BasketballDrive_1920x1080_50.y4m
Total time taken - 39.06s (2.51 fps) / 38.98s (2.57 fps)
Bitrates -  4166.92 kb/s / 4370.43 kb/s
PSNR     -  37.261 / 37.268

clip - Johnny_1280x720_60.y4m
Total time taken - 8.88s (11.27 fps) / 11.08s (9.03 fps)
Bitrates - 304.29 kb/s / 328.84 kb/s
PSNR     - 40.605 / 40.551

Total time taken - 30.97s (3.23 fps) / 33.65s (2.97 fps)
Bitrates - 3496.84 kb/s / 3683.93 kb/s
PSNR     - 35.645 / 35.660

diff -r 2321ebe0bf64 -r 1e22b9363807 source/common/common.cpp
--- a/source/common/common.cpp	Mon Nov 18 11:32:06 2013 +0530
+++ b/source/common/common.cpp	Mon Nov 18 15:40:33 2013 +0530
@@ -54,6 +54,7 @@
 
 static int parseCspName(const char *arg, int& error);
 static int parseName(const char *arg, const char * const * names, int& error);
+static int parse_enum(const char *, const char * const * names, int *dst);
 
 using namespace x265;
 
@@ -165,6 +166,7 @@
     param->bframes = 3;
     param->lookaheadDepth = 40;
     param->bFrameAdaptive = X265_B_ADAPT_FAST;
+    param->bpyramid = 0;
     param->scenecutThreshold = 40; /* Magic number pulled in from x264*/
 
     /* Intra Coding Tools */
@@ -532,7 +534,7 @@
     }
 
     CHECK(param->bEnableWavefront < 0, "WaveFrontSynchro cannot be negative");
-
+    CHECK(param->bpyramid >= 2, "b-pyramid is 0 or 1");
     return check_failed;
 }
 
@@ -620,6 +622,7 @@
         x265_log(param, X265_LOG_INFO, "RDpenalty                    : %d\n", param->rdPenalty);
     }
     x265_log(param, X265_LOG_INFO, "Lookahead / bframes / badapt : %d / %d / %d\n", param->lookaheadDepth, param->bframes, param->bFrameAdaptive);
+    x265_log(param, X265_LOG_INFO, "b-pyramid / weightp / ref    : %d / %d / %d\n", param->bpyramid, param->bEnableWeightedPred, param->maxNumReferences);
     x265_log(param, X265_LOG_INFO, "tools: ");
 #define TOOLOPT(FLAG, STR) if (FLAG) fprintf(stderr, "%s ", STR)
     TOOLOPT(param->bEnableRectInter, "rect");
@@ -628,7 +631,6 @@
     TOOLOPT(param->bEnableConstrainedIntra, "cip");
     TOOLOPT(param->bEnableEarlySkip, "esd");
     fprintf(stderr, "rd=%d ", param->rdLevel);
-    fprintf(stderr, "ref=%d ", param->maxNumReferences);
 
     TOOLOPT(param->bEnableLoopFilter, "lft");
     if (param->bEnableSAO)
@@ -650,7 +652,6 @@
         else
             fprintf(stderr, "tskip ");
     }
-    TOOLOPT(param->bEnableWeightedPred, "weightp");
     TOOLOPT(param->bEnableWeightedBiPred, "weightbp");
     TOOLOPT(param->rc.aqMode, "aq");
     fprintf(stderr, "\n");
@@ -747,6 +748,15 @@
     }
     OPT("input-csp") p->sourceCsp = ::parseCspName(value, berror);
     OPT("me")        p->searchMethod = ::parseName(value, x265_motion_est_names, berror);
+    OPT("b-pyramid")
+    {
+        berror |= parse_enum(value, x265_b_pyramid_names, &p->bpyramid);
+        if (berror)
+        {
+            berror = 0;
+            p->bpyramid = atoi(value);
+        }
+    }
     else
         return X265_PARAM_BAD_NAME;
 #undef OPT
@@ -802,6 +812,7 @@
     BOOL(p->bEnableSAO, "sao");
     s += sprintf(s, " sao-lcu-bounds=%d", p->saoLcuBoundary);
     s += sprintf(s, " sao-lcu-opt=%d", p->saoLcuBasedOptimization);
+    s += sprintf(s, " b-pyramid=%d", p->bpyramid);
 #undef BOOL
 
     return buf;
@@ -843,3 +854,13 @@
         error = 1;
     return a;
 }
+static int parse_enum(const char *arg, const char * const * names, int *dst)
+{
+    for (int i = 0; names[i]; i++)
+        if (!strcmp(arg, names[i]))
+        {
+            *dst = i;
+            return 0;
+        }
+    return -1;
+}
diff -r 2321ebe0bf64 -r 1e22b9363807 source/common/common.h
--- a/source/common/common.h	Mon Nov 18 11:32:06 2013 +0530
+++ b/source/common/common.h	Mon Nov 18 15:40:33 2013 +0530
@@ -107,6 +107,7 @@
 #define X265_LOG2(x)  log2(x)
 #endif
 
+static const char * const x265_b_pyramid_names[] = {"none", "normal", 0};
 /* defined in common.cpp */
 int64_t x265_mdate(void);
 void x265_log(x265_param *param, int level, const char *fmt, ...);
diff -r 2321ebe0bf64 -r 1e22b9363807 source/encoder/dpb.cpp
--- a/source/encoder/dpb.cpp	Mon Nov 18 11:32:06 2013 +0530
+++ b/source/encoder/dpb.cpp	Mon Nov 18 15:40:33 2013 +0530
@@ -78,7 +78,17 @@
         m_lastIDR = pocCurr;
     }
     slice->setLastIDR(m_lastIDR);
-    slice->setReferenced(slice->getSliceType() != B_SLICE);
+
+    if (slice->getSliceType() != B_SLICE)
+        slice->setReferenced(true);
+    else
+    {
+        if (pic->m_lowres.sliceType == X265_TYPE_BREF)
+            slice->setReferenced(true);
+        else
+            slice->setReferenced(false);
+    }
+
     slice->setTemporalLayerNonReferenceFlag(!slice->isReferenced());
     // Set the nal unit type
     slice->setNalUnitType(getNalUnitType(pocCurr, m_lastIDR, pic));
diff -r 2321ebe0bf64 -r 1e22b9363807 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Mon Nov 18 11:32:06 2013 +0530
+++ b/source/encoder/encoder.cpp	Mon Nov 18 15:40:33 2013 +0530
@@ -1223,7 +1223,13 @@
     vps.setMaxLayers(1);
     for (int i = 0; i < MAX_TLAYER; i++)
     {
-        m_numReorderPics[i] = 1;
+        /* Increase the DPB size if enabled the bpyramid the b-ref always should take Lo and L1 as a non B frames 
+        the dpb size is always 3 when enabled the b-pyramid */
+        if (_param->bpyramid && _param->bframes > 1)
+            m_numReorderPics[i] = 3;
+        else
+            m_numReorderPics[i] = 1;
+
         m_maxDecPicBuffering[i] = X265_MIN(MAX_NUM_REF, X265_MAX(m_numReorderPics[i] + 1, _param->maxNumReferences) + 1);
         vps.setNumReorderPics(m_numReorderPics[i], i);
         vps.setMaxDecPicBuffering(m_maxDecPicBuffering[i], i);
diff -r 2321ebe0bf64 -r 1e22b9363807 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp	Mon Nov 18 11:32:06 2013 +0530
+++ b/source/encoder/slicetype.cpp	Mon Nov 18 15:40:33 2013 +0530
@@ -643,21 +643,22 @@
             Lowres& frm = list[bframes]->m_lowres;
 
             if (frm.sliceType == X265_TYPE_BREF
-                /* && h->param.i_bframe_pyramid < X264_B_PYRAMID_NORMAL && brefs == h->param.i_bframe_pyramid*/)
+                && cfg->param.bpyramid < X265_B_PYRAMID_NORMAL && brefs == cfg->param.bpyramid)
             {
                 frm.sliceType = X265_TYPE_B;
-                x265_log(&cfg->param, X265_LOG_WARNING, "B-ref is not yet supported\n");
+                x265_log(&cfg->param, X265_LOG_WARNING, "B-ref at frame %d incompatible with B-pyramid %s \n",
+                        frm.frameNum, x265_b_pyramid_names[cfg->param.bpyramid] );
             }
 
             /* pyramid with multiple B-refs needs a big enough dpb that the preceding P-frame stays available.
-               smaller dpb could be supported by smart enough use of mmco, but it's easier just to forbid it.
-            else if (frm.sliceType == X265_TYPE_BREF && cfg->param.i_bframe_pyramid == X265_B_PYRAMID_NORMAL &&
-                     brefs && cfg->param.i_frame_reference <= (brefs+3))
+               smaller dpb could be supported by smart enough use of mmco, but it's easier just to forbid it.*/
+            else if (frm.sliceType == X265_TYPE_BREF && cfg->param.bpyramid == X265_B_PYRAMID_NORMAL &&
+                     brefs && cfg->param.maxNumReferences <= (brefs+3))
             {
                 frm.sliceType = X265_TYPE_B;
                 x265_log(&cfg->param, X265_LOG_WARNING, "B-ref at frame %d incompatible with B-pyramid %s and %d reference frames\n",
-                          frm.sliceType, x264_b_pyramid_names[h->param.i_bframe_pyramid], h->param.i_frame_reference);
-            } */
+                        frm.sliceType, x265_b_pyramid_names[cfg->param.bpyramid], cfg->param.maxNumReferences);
+            }
 
             if (frm.sliceType == X265_TYPE_KEYFRAME)
                 frm.sliceType = cfg->param.bOpenGOP ? X265_TYPE_I : X265_TYPE_IDR;
@@ -716,12 +717,12 @@
         list[bframes]->m_lowres.leadingBframes = bframes;
         lastNonB = &list[bframes]->m_lowres;
 
-        /* insert a bref into the sequence
-        if (h->param.i_bframe_pyramid && bframes > 1 && !brefs)
+        /* insert a bref into the sequence */
+        if (cfg->param.bpyramid && bframes > 1 && !brefs)
         {
-            h->lookahead->next.list[bframes/2]->i_type = X264_TYPE_BREF;
+            list[bframes/2]->m_lowres.sliceType = X265_TYPE_BREF;
             brefs++;
-        } */
+        }
 
         /* calculate the frame costs ahead of time for x264_rc_analyse_slice while we still have lowres */
         if (cfg->param.rc.rateControlMode != X265_RC_CQP)
@@ -742,8 +743,7 @@
 
             estimateFrameCost(p0, p1, b, 0);
 
-            /*
-            if ((p0 != p1 || bframes) && cfg->param.rc.i_vbv_buffer_size)
+            if ((p0 != p1 || bframes) /*&& cfg->param.rc.i_vbv_buffer_size*/ )
             {
                 // We need the intra costs for row SATDs
                 estimateFrameCost(b, b, b, 0);
@@ -752,7 +752,7 @@
                 p0 = 0;
                 for (b = 1; b <= bframes; b++)
                 {
-                    if (frames[b]->i_type == X265_TYPE_B)
+                    if (frames[b]->sliceType == X265_TYPE_B)
                         for (p1 = b; frames[p1]->sliceType == X265_TYPE_B;)
                             p1++;
                     else
@@ -761,7 +761,7 @@
                     if (frames[b]->sliceType == X265_TYPE_BREF)
                         p0 = b;
                 }
-            } */
+            }
         }
 
         /* dequeue all frames from inputQueue that are about to be enqueued
@@ -774,10 +774,23 @@
 
         /* add non-B to output queue */
         outputQueue.pushBack(*list[bframes]);
+
+        /* Add B-ref frame next to P frame in output queue, the B-ref encode before non B-ref frame */
+        if (bframes > 1 && cfg->param.bpyramid)
+        {
+            for (int i = 0; i < bframes; i++)
+            {
+                if(list[i]->m_lowres.sliceType == X265_TYPE_BREF)
+                    outputQueue.pushBack(*list[i]);
+            }
+        }
+
         /* add B frames to output queue */
         for (int i = 0; i < bframes; i++)
         {
-            outputQueue.pushBack(*list[i]);
+            /* push all the B frames into output queue except B-ref, which already pushed into output queue*/
+            if (list[i]->m_lowres.sliceType != X265_TYPE_BREF)
+                outputQueue.pushBack(*list[i]);
         }
 
         return;
@@ -1155,11 +1168,7 @@
         if (cost > threshold)
             break;
 
-        /* Keep some B-frames as references: 0=off, 1=strict hierarchical, 2=normal */
-        //TODO Add this into param
-        int bframe_pyramid = 0;
-
-        if (bframe_pyramid && next_p - cur_p > 2)
+        if (cfg->param.bpyramid && next_p - cur_p > 2)
         {
             int middle = cur_p + (next_p - cur_p) / 2;
             cost += estimateFrameCost(cur_p, next_p, middle, 0);
diff -r 2321ebe0bf64 -r 1e22b9363807 source/x265.cpp
--- a/source/x265.cpp	Mon Nov 18 11:32:06 2013 +0530
+++ b/source/x265.cpp	Mon Nov 18 15:40:33 2013 +0530
@@ -116,6 +116,7 @@
     { "bframes",        required_argument, NULL, 'b' },
     { "bframe-bias",    required_argument, NULL, 0 },
     { "b-adapt",        required_argument, NULL, 0 },
+    { "b-pyramid",      required_argument, NULL, 0 },
     { "ref",            required_argument, NULL, 0 },
     { "no-weightp",           no_argument, NULL, 0 },
     { "weightp",              no_argument, NULL, 'w' },
@@ -303,6 +304,7 @@
     H0("   --bframes                     Maximum number of consecutive b-frames (now it only enables B GOP structure) Default %d\n", param->bframes);
     H0("   --bframe-bias                 Bias towards B frame decisions. Default %d\n", param->bFrameBias);
     H0("   --b-adapt                     0 - none, 1 - fast, 2 - full (trellis) adaptive B frame scheduling. Default %d\n", param->bFrameAdaptive);
+    H0("...--b-pyramid...................Use B-frame reference 0: Disabled, 1: Enabled Default\n", param->bpyramid);
     H0("   --ref                         max number of L0 references to be allowed (1 .. 16) Default %d\n", param->maxNumReferences);
     H0("-w/--[no-]weightp                Enable weighted prediction in P slices. Default %s\n", OPT(param->bEnableWeightedPred));
     H0("\nQP, rate control and rate distortion options:\n");
diff -r 2321ebe0bf64 -r 1e22b9363807 source/x265.h
--- a/source/x265.h	Mon Nov 18 11:32:06 2013 +0530
+++ b/source/x265.h	Mon Nov 18 15:40:33 2013 +0530
@@ -204,6 +204,7 @@
 #define X265_TYPE_I             0x0002
 #define X265_TYPE_P             0x0003
 #define X265_TYPE_BREF          0x0004  /* Non-disposable B-frame */
+#define X265_B_PYRAMID_NORMAL   0x0001
 #define X265_TYPE_B             0x0005
 #define X265_TYPE_KEYFRAME      0x0006  /* IDR or I depending on b_open_gop option */
 #define X265_AQ_NONE                 0
@@ -315,6 +316,7 @@
     int       bframes;                         ///< Max number of consecutive B-frames
     int       lookaheadDepth;                  ///< Number of frames to use for lookahead, determines encoder latency
     int       bFrameAdaptive;                  ///< 0 - none, 1 - fast, 2 - full (trellis) adaptive B frame scheduling
+    int       bpyramid;                        ///< 0 - none, 1 - normal use B-frame reference
     int       bFrameBias;
     int       scenecutThreshold;               ///< how aggressively to insert extra I frames
 


More information about the x265-devel mailing list