[x264-devel] [PATCH] Optionally increment IDR_PIC_ID for IDR frames from a value specified in CLI

Chao Chen chaoc at netflix.com
Tue Jun 2 01:07:19 CEST 2020


H.264 streams generated by current x264 encoder flips the IDR_PIC_ID of IDR frames between 0 and 1.
In this way, the encoded bitstream is compatible with the H.264 specification which requires consecutive
IDR frames to have different IDR_PIC_IDs.

However, in cloud transcoding and fragmented video streaming applications, different fragments of a video
are independently encoded and then assembled at clients (e.g. DASH client). In this case, if we encode using
current x264 implementation, the streams assembled at clients may have consecutive with the same IDR_PIC_ID,
which violates the specification and could break the decoding process at clients.

Specifically, suppose that x264 encoded two video fragments, each have 5 frames. Both fragments are encoded
as I(0)-P-I(1)-P-I(0), where IDR_PIC_ID is given by the number in the parenthesis. If a client assemble the
two fragments toghether, we will have the final stream as I(0)-P-I(1)-P-I(0)-I(0)-P-I(1)-P-I(0). Here, the
3rd and 4th frame will have the same IDR_PIC_ID, which violates the specification. We have seen this issue
could break the H.264 decoder of Edge browser.

To address this limitation, this commit provided a different way to control IDR_PIC_ID in encoded streams. Note
that IDR_PIC_ID is a 16 bit value ranges from 0-65535. With this commit, users can specify an initial IDR_PIC_ID
for the first IDR frame and encoder will increment its value for every encoded IDR frame. It provides us with
the flexibility to avoid IDR_PIC_ID collision in videos assembled at clients. In the above example, we can
encode two fragments as I(1)-P-I(2)-P-I(3) and I(4)-P-I(5)-P-I(6) so that the concatenated stream is compatible
with the specification.

We have tested the stream on thousands of devices including IOS, Smart TV, Android and browsers. None of them
run into decoding errors.

Example usage:
./x264 --output testout.264 --init-idr-id 123 --keyint 10 --frames 100 testsrc.y4m

If `--init-idr-id` is not specified, x264 fall back to default behavior, i.e., flip IDR_PIC_ID bwtween 0 and 1.
---
 common/base.c     | 7 +++++++
 encoder/encoder.c | 4 ++++
 x264.c            | 4 ++++
 x264.h            | 1 +
 4 files changed, 16 insertions(+)

diff --git a/common/base.c b/common/base.c
index abca9144..243ac936 100644
--- a/common/base.c
+++ b/common/base.c
@@ -322,6 +322,7 @@ REALIGN_STACK void x264_param_default( x264_param_t *param )
     param->i_frame_reference = 3;
     param->i_keyint_max = 250;
     param->i_keyint_min = X264_KEYINT_MIN_AUTO;
+    param->i_init_idr_id = -1;
     param->i_bframe = 3;
     param->i_scenecut_threshold = 40;
     param->i_bframe_adaptive = X264_B_ADAPT_FAST;
@@ -972,6 +973,12 @@ REALIGN_STACK int x264_param_parse( x264_param_t *p, const char *name, const cha
         else
             p->i_keyint_max = atoi(value);
     }
+    OPT("init-idr-id")
+    {
+        p->i_init_idr_id = atoi(value);
+        if(p->i_init_idr_id > 65535 || p->i_init_idr_id < 0)
+            b_error = 1;
+    }
     OPT2("min-keyint", "keyint-min")
     {
         p->i_keyint_min = atoi(value);
diff --git a/encoder/encoder.c b/encoder/encoder.c
index 968e2735..b885f947 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -1495,6 +1495,8 @@ x264_t *x264_encoder_open( x264_param_t *param )
 
     if( h->param.i_avcintra_class )
         h->i_idr_pic_id = 5;
+    else if (h->param.i_init_idr_id >=0)
+        h->i_idr_pic_id = h->param.i_init_idr_id;
     else
         h->i_idr_pic_id = 0;
 
@@ -2554,6 +2556,8 @@ static inline void slice_init( x264_t *h, int i_nal_type, int i_global_qp )
                     break;
             }
         }
+        else if( h->param.i_init_idr_id >= 0 )
+            h->i_idr_pic_id = (h->i_idr_pic_id + 1) % 65536;
         else
             h->i_idr_pic_id ^= 1;
     }
diff --git a/x264.c b/x264.c
index 1d6334ee..c182248d 100644
--- a/x264.c
+++ b/x264.c
@@ -693,6 +693,9 @@ static void help( x264_param_t *defaults, int longhelp )
     H0( "\n" );
     H0( "  -I, --keyint <integer or \"infinite\"> Maximum GOP size [%d]\n", defaults->i_keyint_max );
     H2( "  -i, --min-keyint <integer>  Minimum GOP size [auto]\n" );
+    H2( "      --init-idr-id <integer> If specified, increment the IDR_PIC_ID for each IDR frame\n"
+        "                              from the specified value.\n"
+        "                              If not specified, flip the IDR_PIC_ID between 0 and 1.\n");
     H2( "      --no-scenecut           Disable adaptive I-frame decision\n" );
     H2( "      --scenecut <integer>    How aggressively to insert extra I-frames [%d]\n", defaults->i_scenecut_threshold );
     H2( "      --intra-refresh         Use Periodic Intra Refresh instead of IDR frames\n" );
@@ -1043,6 +1046,7 @@ static struct option long_options[] =
     { "min-keyint",  required_argument, NULL, 'i' },
     { "keyint",      required_argument, NULL, 'I' },
     { "intra-refresh",     no_argument, NULL, 0 },
+    { "init-idr-id", required_argument, NULL, 0 },
     { "scenecut",    required_argument, NULL, 0 },
     { "no-scenecut",       no_argument, NULL, 0 },
     { "nf",                no_argument, NULL, 0 },
diff --git a/x264.h b/x264.h
index 3a50f1c8..6aba2ca8 100644
--- a/x264.h
+++ b/x264.h
@@ -351,6 +351,7 @@ typedef struct x264_param_t
                                      * Useful in combination with interactive error resilience. */
     int         i_keyint_max;       /* Force an IDR keyframe at this interval */
     int         i_keyint_min;       /* Scenecuts closer together than this are coded as I, not IDR. */
+    int         i_init_idr_id;      /* The initial idr_pic_id in the encoded stream */
     int         i_scenecut_threshold; /* how aggressively to insert extra I frames */
     int         b_intra_refresh;    /* Whether or not to use periodic intra refresh instead of IDR frames. */
 
-- 
2.25.0



More information about the x264-devel mailing list