[vlc-commits] demux: mp4: constify ES setup

Francois Cartegnie git at videolan.org
Thu Apr 16 15:50:51 CEST 2020


vlc | branch: master | Francois Cartegnie <fcvlcdev at free.fr> | Thu Apr  9 14:51:29 2020 +0200| [e91cb1b9fd0ccd96f99a92b0f52ecdda0cf9c47f] | committer: Francois Cartegnie

demux: mp4: constify ES setup

> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=e91cb1b9fd0ccd96f99a92b0f52ecdda0cf9c47f
---

 modules/demux/mp4/essetup.c | 555 +++++++++++++++++++++++---------------------
 modules/demux/mp4/mp4.c     | 169 ++++++++------
 modules/demux/mp4/mp4.h     |  31 ++-
 3 files changed, 413 insertions(+), 342 deletions(-)

diff --git a/modules/demux/mp4/essetup.c b/modules/demux/mp4/essetup.c
index c6cc34871e..ed3f448b20 100644
--- a/modules/demux/mp4/essetup.c
+++ b/modules/demux/mp4/essetup.c
@@ -49,14 +49,26 @@ static void CopyExtradata( const uint8_t *p_extra, size_t i_extra,
     }
 }
 
-static void SetupGlobalExtensions( mp4_track_t *p_track, MP4_Box_t *p_sample )
+static uint32_t GetSampleType( demux_t *p_demux, const MP4_Box_t *p_sample )
 {
-    if( !p_track->fmt.i_bitrate )
+    const MP4_Box_t *p_frma;
+    if( ( p_frma = MP4_BoxGet( p_sample, "sinf/frma" ) ) && BOXDATA(p_frma) )
+    {
+        msg_Warn( p_demux, "Original Format Box: %4.4s", (char *)&BOXDATA(p_frma)->i_type );
+        return BOXDATA(p_frma)->i_type;
+    }
+    return p_sample->i_type;
+}
+
+static void SetupGlobalExtensions( const MP4_Box_t *p_sample,
+                                   es_format_t *p_fmt )
+{
+    if( !p_fmt->i_bitrate )
     {
         const MP4_Box_t *p_btrt = MP4_BoxGet( p_sample, "btrt" );
         if( p_btrt && BOXDATA(p_btrt) )
         {
-            p_track->fmt.i_bitrate = BOXDATA(p_btrt)->i_avg_bitrate;
+            p_fmt->i_bitrate = BOXDATA(p_btrt)->i_avg_bitrate;
         }
     }
 
@@ -64,30 +76,32 @@ static void SetupGlobalExtensions( mp4_track_t *p_track, MP4_Box_t *p_sample )
     if( p_glbl && p_glbl->data.p_binary && p_glbl->data.p_binary->p_blob )
     {
         CopyExtradata( p_glbl->data.p_binary->p_blob,
-                       p_glbl->data.p_binary->i_blob, &p_track->fmt );
+                       p_glbl->data.p_binary->i_blob, p_fmt );
     }
 }
 
-static void SetupESDS( demux_t *p_demux, mp4_track_t *p_track, const MP4_descriptor_decoder_config_t *p_decconfig )
+static void SetupESDS( demux_t *p_demux, const mp4_track_t *p_track,
+                       const MP4_descriptor_decoder_config_t *p_decconfig,
+                       es_format_t *p_fmt )
 {
     /* First update information based on i_objectTypeIndication */
     switch( p_decconfig->i_objectProfileIndication )
     {
         /* Private ID */
     case( 0xe0 ): /* NeroDigital: dvd subs */
-        if( p_track->fmt.i_cat == SPU_ES )
+        if( p_fmt->i_cat == SPU_ES )
         {
-            p_track->fmt.i_codec = VLC_CODEC_SPU;
+            p_fmt->i_codec = VLC_CODEC_SPU;
             if( p_track->i_width > 0 )
-                p_track->fmt.subs.spu.i_original_frame_width = p_track->i_width;
+                p_fmt->subs.spu.i_original_frame_width = p_track->i_width;
             if( p_track->i_height > 0 )
-                p_track->fmt.subs.spu.i_original_frame_height = p_track->i_height;
+                p_fmt->subs.spu.i_original_frame_height = p_track->i_height;
         }
         break;
     case( 0xe1 ): /* QCelp for 3gp */
-        if( p_track->fmt.i_cat == AUDIO_ES )
+        if( p_fmt->i_cat == AUDIO_ES )
         {
-            p_track->fmt.i_codec = VLC_CODEC_QCELP;
+            p_fmt->i_codec = VLC_CODEC_QCELP;
         }
         break;
 
@@ -96,8 +110,8 @@ static void SetupESDS( demux_t *p_demux, mp4_track_t *p_track, const MP4_descrip
         if( MPEG4_Codec_By_ObjectType( p_decconfig->i_objectProfileIndication,
                                        p_decconfig->p_decoder_specific_info,
                                        p_decconfig->i_decoder_specific_info_len,
-                                       &p_track->fmt.i_codec,
-                                       &p_track->fmt.i_profile ) )
+                                       &p_fmt->i_codec,
+                                       &p_fmt->i_profile ) )
             break;
         /* Unknown entry, but don't touch i_fourcc */
         msg_Warn( p_demux,
@@ -107,35 +121,38 @@ static void SetupESDS( demux_t *p_demux, mp4_track_t *p_track, const MP4_descrip
         return;
     }
 
-    p_track->fmt.i_original_fourcc = 0; /* so we don't have MP4A as original fourcc */
-    p_track->fmt.i_bitrate = p_decconfig->i_avg_bitrate;
+    p_fmt->i_original_fourcc = 0; /* so we don't have MP4A as original fourcc */
+    p_fmt->i_bitrate = p_decconfig->i_avg_bitrate;
 
     CopyExtradata( p_decconfig->p_decoder_specific_info,
                    p_decconfig->i_decoder_specific_info_len,
-                   &p_track->fmt );
+                   p_fmt );
 
-    if( p_track->fmt.i_codec == VLC_CODEC_SPU &&
-            p_track->fmt.i_extra >= 16 * 4 )
+    if( p_fmt->i_codec == VLC_CODEC_SPU &&
+            p_fmt->i_extra >= 16 * 4 )
     {
         for( int i = 0; i < 16; i++ )
         {
-            p_track->fmt.subs.spu.palette[1 + i] =
-                    GetDWBE((char*)p_track->fmt.p_extra + i * 4);
+            p_fmt->subs.spu.palette[1 + i] =
+                    GetDWBE((char*)p_fmt->p_extra + i * 4);
         }
-        p_track->fmt.subs.spu.palette[0] = SPU_PALETTE_DEFINED;
+        p_fmt->subs.spu.palette[0] = SPU_PALETTE_DEFINED;
     }
 }
 
-static int SetupRTPReceptionHintTrack( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
+static int SetupRTPReceptionHintTrack( demux_t *p_demux, const mp4_track_t *p_track,
+                                       const MP4_Box_t *p_sample, es_format_t *p_fmt,
+                                       track_config_t *params )
 {
-    p_track->fmt.i_original_fourcc = p_sample->i_type;
+    const uint32_t i_sample_type = GetSampleType( p_demux, p_sample );
+    p_fmt->i_original_fourcc = i_sample_type;
 
-    if( !p_track->p_sdp )
+    const MP4_Box_t *p_sdp = MP4_BoxGet( p_track->p_track, "udta/hnti/sdp " );
+    if( !p_sdp )
     {
         msg_Err(p_demux, "Required 'sdp '-box not found");
         return 0;
     }
-    MP4_Box_t *p_sdp = p_track->p_sdp;
     char *strtok_state;
     char * pch = strtok_r(BOXDATA(p_sdp)->psz_text, " =\n", &strtok_state); /* media entry */
     if( pch && pch[0] != 'm' )
@@ -171,7 +188,7 @@ static int SetupRTPReceptionHintTrack( demux_t *p_demux, mp4_track_t *p_track, M
             switch( rtp_payload )
             {
              case 3:
-                p_track->fmt.i_codec = VLC_CODEC_GSM;
+                p_fmt->i_codec = VLC_CODEC_GSM;
                 codec_set = true;
                 break;
              default:
@@ -202,17 +219,17 @@ static int SetupRTPReceptionHintTrack( demux_t *p_demux, mp4_track_t *p_track, M
             /* Codecs using slices need their picture constructed from sample */
             if( !strcmp(pch, "H264") )
             {
-                p_track->fmt.i_codec = VLC_CODEC_H264;
+                p_fmt->i_codec = VLC_CODEC_H264;
                 /* ******* sending AnnexB ! */
-                p_track->fmt.b_packetized = false;
+                p_fmt->b_packetized = false;
             }
             else if( !strcmp(pch, "GSM") )
             {
-                p_track->fmt.i_codec = VLC_CODEC_GSM;
+                p_fmt->i_codec = VLC_CODEC_GSM;
             }
             else if( !strcmp(pch, "Speex") )
             {
-                p_track->fmt.i_codec = VLC_CODEC_SPEEX;
+                p_fmt->i_codec = VLC_CODEC_SPEEX;
             }
             else if( !codec_set )
             {
@@ -225,8 +242,8 @@ static int SetupRTPReceptionHintTrack( demux_t *p_demux, mp4_track_t *p_track, M
                 return 0;
             int clock_rate = atoi(pch);
             msg_Dbg(p_demux, "sdp clock rate:%d", clock_rate);
-            if( p_track->fmt.i_cat == AUDIO_ES )
-                p_track->fmt.audio.i_rate = clock_rate;
+            if( p_fmt->i_cat == AUDIO_ES )
+                p_fmt->audio.i_rate = clock_rate;
         }
         pch = strtok_r(NULL, " =\n", &strtok_state); /* next attribute */
     }
@@ -234,89 +251,94 @@ static int SetupRTPReceptionHintTrack( demux_t *p_demux, mp4_track_t *p_track, M
     const MP4_Box_t *p_tims = MP4_BoxGet(p_sample, "tims");
     if( p_tims && BOXDATA(p_tims) && BOXDATA(p_tims)->i_timescale )
     {
-        p_track->i_timescale = BOXDATA(p_tims)->i_timescale;
+        params->i_timescale_override = BOXDATA(p_tims)->i_timescale;
     }
     else
     {
         msg_Warn(p_demux, "Missing mandatory box tims");
         return 0;
     }
-
+#if 0
     const MP4_Box_t *p_tssy = MP4_BoxGet(p_sample, "tssy");
     if( p_tssy && BOXDATA(p_tssy) )
     {
         /* take the 2 last bits which indicate the synchronization mode */
-        p_track->sync_mode = (RTP_timstamp_synchronization_t)
-                             BOXDATA(p_tssy)->i_reserved_timestamp_sync & 0x03;
+        params->sync_mode = (RTP_timstamp_synchronization_t)
+                            BOXDATA(p_tssy)->i_reserved_timestamp_sync & 0x03;
     }
 
     const MP4_Box_t *p_tsro = MP4_BoxGet(p_sample, "tsro");
     if( p_tsro && BOXDATA(p_tsro) )
-        p_track->i_tsro_offset = BOXDATA(p_tsro)->i_offset;
+        params->i_tsro_offset = BOXDATA(p_tsro)->i_offset;
     else
         msg_Dbg(p_demux, "No tsro box present");
     msg_Dbg(p_demux, "setting tsro: %" PRId32, p_track->i_tsro_offset);
-
+#endif
     return 1;
 }
 
 
-int SetupVideoES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
+int SetupVideoES( demux_t *p_demux, const mp4_track_t *p_track, const MP4_Box_t *p_sample,
+                  es_format_t *p_fmt, track_config_t *p_cfg )
 {
-    MP4_Box_data_sample_vide_t *p_vide = p_sample->data.p_sample_vide;
+    track_config_t trackparams, *params = &trackparams;
+
+    const MP4_Box_data_sample_vide_t *p_vide = p_sample->data.p_sample_vide;
     if(!p_vide)
         return 0;
 
-    p_track->fmt.video.i_width = p_vide->i_width;
-    p_track->fmt.video.i_height = p_vide->i_height;
-    p_track->fmt.video.i_bits_per_pixel = p_vide->i_depth;
+    const uint32_t i_sample_type = GetSampleType( p_demux, p_sample );
+
+    p_fmt->video.i_width = p_vide->i_width;
+    p_fmt->video.i_height = p_vide->i_height;
+    p_fmt->video.i_bits_per_pixel = p_vide->i_depth;
 
     /* fall on display size */
-    if( p_track->fmt.video.i_width <= 0 )
-        p_track->fmt.video.i_width = p_track->i_width;
-    if( p_track->fmt.video.i_height <= 0 )
-        p_track->fmt.video.i_height = p_track->i_height;
+    if( p_fmt->video.i_width <= 0 )
+        p_fmt->video.i_width = p_track->i_width;
+    if( p_fmt->video.i_height <= 0 )
+        p_fmt->video.i_height = p_track->i_height;
 
     /* Find out apect ratio from display size */
     if( p_track->i_width > 0 && p_track->i_height > 0 &&
         /* Work-around buggy muxed files */
         p_vide->i_width != p_track->i_width )
     {
-        p_track->fmt.video.i_sar_num = p_track->i_width  * p_track->fmt.video.i_height;
-        p_track->fmt.video.i_sar_den = p_track->i_height * p_track->fmt.video.i_width;
+        p_fmt->video.i_sar_num = p_track->i_width  * p_fmt->video.i_height;
+        p_fmt->video.i_sar_den = p_track->i_height * p_fmt->video.i_width;
     }
 
     /* Support for cropping (eg. in H263 files) */
-    p_track->fmt.video.i_visible_width = p_track->fmt.video.i_width;
-    p_track->fmt.video.i_visible_height = p_track->fmt.video.i_height;
+    p_fmt->video.i_visible_width = p_fmt->video.i_width;
+    p_fmt->video.i_visible_height = p_fmt->video.i_height;
 
     /* Rotation */
     switch( (int)p_track->f_rotation ) {
         case 90:
-            p_track->fmt.video.orientation = ORIENT_ROTATED_90;
+            p_fmt->video.orientation = ORIENT_ROTATED_90;
             break;
         case 180:
             if (p_track->i_flip == 1) {
-                p_track->fmt.video.orientation = ORIENT_VFLIPPED;
+                p_fmt->video.orientation = ORIENT_VFLIPPED;
             } else {
-                p_track->fmt.video.orientation = ORIENT_ROTATED_180;
+                p_fmt->video.orientation = ORIENT_ROTATED_180;
             }
             break;
         case 270:
-            p_track->fmt.video.orientation = ORIENT_ROTATED_270;
+            p_fmt->video.orientation = ORIENT_ROTATED_270;
             break;
     }
 
     /* Flip, unless already flipped */
     if (p_track->i_flip == 1 && (int)p_track->f_rotation != 180) {
-        video_transform_t transform = (video_transform_t)p_track->fmt.video.orientation;
+        video_transform_t transform = (video_transform_t)p_fmt->video.orientation;
         /* Flip first then rotate */
-        p_track->fmt.video.orientation = ORIENT_HFLIPPED;
-        video_format_TransformBy(&p_track->fmt.video, transform);
+        p_fmt->video.orientation = ORIENT_HFLIPPED;
+        video_format_TransformBy(&p_fmt->video, transform);
     }
 
     /* Set 360 video mode */
-    p_track->fmt.video.projection_mode = PROJECTION_MODE_RECTANGULAR;
+    p_fmt->video.projection_mode = PROJECTION_MODE_RECTANGULAR;
     const MP4_Box_t *p_uuid = MP4_BoxGet( p_track->p_track, "uuid" );
     for( ; p_uuid; p_uuid = p_uuid->p_next)
     {
@@ -324,17 +346,17 @@ int SetupVideoES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
             && !CmpUUID( &p_uuid->i_uuid, &XML360BoxUUID )
             && p_uuid->data.p_360 )
         {
-            p_track->fmt.video.projection_mode = p_uuid->data.p_360->i_projection_mode;
+            p_fmt->video.projection_mode = p_uuid->data.p_360->i_projection_mode;
             switch (p_uuid->data.p_360->e_stereo_mode)
             {
             case XML360_STEREOSCOPIC_TOP_BOTTOM:
-                p_track->fmt.video.multiview_mode = MULTIVIEW_STEREO_TB;
+                p_fmt->video.multiview_mode = MULTIVIEW_STEREO_TB;
                 break;
             case XML360_STEREOSCOPIC_LEFT_RIGHT:
-                p_track->fmt.video.multiview_mode = MULTIVIEW_STEREO_SBS;
+                p_fmt->video.multiview_mode = MULTIVIEW_STEREO_SBS;
                 break;
             default:
-                p_track->fmt.video.multiview_mode = MULTIVIEW_2D;
+                p_fmt->video.multiview_mode = MULTIVIEW_2D;
                 break;
             }
         }
@@ -346,13 +368,13 @@ int SetupVideoES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
         switch( BOXDATA(p_st3d)->i_stereo_mode )
         {
         case ST3D_MONOSCOPIC:
-            p_track->fmt.video.multiview_mode = MULTIVIEW_2D;
+            p_fmt->video.multiview_mode = MULTIVIEW_2D;
             break;
         case ST3D_STEREOSCOPIC_TOP_BOTTOM:
-            p_track->fmt.video.multiview_mode = MULTIVIEW_STEREO_TB;
+            p_fmt->video.multiview_mode = MULTIVIEW_STEREO_TB;
             break;
         case ST3D_STEREOSCOPIC_LEFT_RIGHT:
-            p_track->fmt.video.multiview_mode = MULTIVIEW_STEREO_SBS;
+            p_fmt->video.multiview_mode = MULTIVIEW_STEREO_SBS;
             break;
         default:
             msg_Warn( p_demux, "Unknown stereo mode %d", BOXDATA(p_st3d)->i_stereo_mode );
@@ -370,7 +392,7 @@ int SetupVideoES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
                 p_uuid->data.p_binary->i_blob == 4 &&
                 !memcmp( p_uuid->data.p_binary->p_blob, "\x82\x81\x10\x02", 4 ) )
             {
-                p_track->fmt.video.multiview_mode = MULTIVIEW_STEREO_FRAME;
+                p_fmt->video.multiview_mode = MULTIVIEW_STEREO_FRAME;
                 break;
             }
         }
@@ -379,71 +401,71 @@ int SetupVideoES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
     const MP4_Box_t *p_prhd = MP4_BoxGet( p_sample, "sv3d/proj/prhd" );
     if (p_prhd && BOXDATA(p_prhd))
     {
-        p_track->fmt.video.pose.yaw = BOXDATA(p_prhd)->f_pose_yaw_degrees;
-        p_track->fmt.video.pose.pitch = BOXDATA(p_prhd)->f_pose_pitch_degrees;
-        p_track->fmt.video.pose.roll = BOXDATA(p_prhd)->f_pose_roll_degrees;
+        p_fmt->video.pose.yaw = BOXDATA(p_prhd)->f_pose_yaw_degrees;
+        p_fmt->video.pose.pitch = BOXDATA(p_prhd)->f_pose_pitch_degrees;
+        p_fmt->video.pose.roll = BOXDATA(p_prhd)->f_pose_roll_degrees;
     }
 
     const MP4_Box_t *p_equi = MP4_BoxGet( p_sample, "sv3d/proj/equi" );
     const MP4_Box_t *p_cbmp = MP4_BoxGet( p_sample, "sv3d/proj/cbmp" );
     if (p_equi && BOXDATA(p_equi))
-        p_track->fmt.video.projection_mode = PROJECTION_MODE_EQUIRECTANGULAR;
+        p_fmt->video.projection_mode = PROJECTION_MODE_EQUIRECTANGULAR;
     else if (p_cbmp && BOXDATA(p_cbmp))
-        p_track->fmt.video.projection_mode = PROJECTION_MODE_CUBEMAP_LAYOUT_STANDARD;
+        p_fmt->video.projection_mode = PROJECTION_MODE_CUBEMAP_LAYOUT_STANDARD;
 
     /* It's a little ugly but .. there are special cases */
-    switch( p_sample->i_type )
+    switch( i_sample_type )
     {
         case( VLC_FOURCC( 's', '2', '6', '3' ) ):
-            p_track->fmt.i_codec = VLC_CODEC_H263;
+            p_fmt->i_codec = VLC_CODEC_H263;
             break;
         case VLC_FOURCC('y','v','1','2'):
-            p_track->fmt.i_codec = VLC_CODEC_YV12;
+            p_fmt->i_codec = VLC_CODEC_YV12;
             break;
         case VLC_FOURCC('y','u','v','2'):
-            p_track->fmt.i_codec = VLC_CODEC_YUV2;
+            p_fmt->i_codec = VLC_CODEC_YUV2;
             break;
         case VLC_FOURCC('A','B','G','R'):
-            p_track->fmt.i_codec = VLC_CODEC_ARGB;
-            p_track->fmt.video.i_rmask = 0x0000FF;
-            p_track->fmt.video.i_gmask = 0x00FF00;
-            p_track->fmt.video.i_bmask = 0xFF0000;
+            p_fmt->i_codec = VLC_CODEC_ARGB;
+            p_fmt->video.i_rmask = 0x0000FF;
+            p_fmt->video.i_gmask = 0x00FF00;
+            p_fmt->video.i_bmask = 0xFF0000;
             break;
         case VLC_FOURCC('2','4','B','G'):
-            p_track->fmt.i_codec = VLC_CODEC_RGB24;
-            p_track->fmt.video.i_rmask = 0x0000FF;
-            p_track->fmt.video.i_gmask = 0x00FF00;
-            p_track->fmt.video.i_bmask = 0xFF0000;
+            p_fmt->i_codec = VLC_CODEC_RGB24;
+            p_fmt->video.i_rmask = 0x0000FF;
+            p_fmt->video.i_gmask = 0x00FF00;
+            p_fmt->video.i_bmask = 0xFF0000;
             break;
         case VLC_FOURCC('r','a','w',' '):
             switch( p_vide->i_depth ) {
                 case 16:
-                    p_track->fmt.i_codec = VLC_CODEC_RGB15;
+                    p_fmt->i_codec = VLC_CODEC_RGB15;
                     break;
                 case 24:
-                    p_track->fmt.i_codec = VLC_CODEC_RGB24;
+                    p_fmt->i_codec = VLC_CODEC_RGB24;
                     break;
                 case 32:
-                    p_track->fmt.i_codec = VLC_CODEC_ARGB;
+                    p_fmt->i_codec = VLC_CODEC_ARGB;
                     break;
                 case 32 + 8:
-                    p_track->fmt.i_codec = VLC_CODEC_GREY;
+                    p_fmt->i_codec = VLC_CODEC_GREY;
                     break;
                 default:
                     msg_Dbg( p_demux, "Unrecognized raw video format (depth = %d)",
                              p_vide->i_depth );
-                    p_track->fmt.i_codec = p_sample->i_type;
+                    p_fmt->i_codec = i_sample_type;
                     break;
             }
             break;
         case( VLC_FOURCC( 'r', 'r', 't', 'p' ) ): /* RTP Reception Hint Track */
         {
-            if( !SetupRTPReceptionHintTrack( p_demux, p_track, p_sample ) )
-                p_track->fmt.i_codec = p_sample->i_type;
+            if( !SetupRTPReceptionHintTrack( p_demux, p_track, p_sample, p_fmt, params ) )
+                p_fmt->i_codec = i_sample_type;
             break;
         }
         default:
-            p_track->fmt.i_codec = p_sample->i_type;
+            p_fmt->i_codec = i_sample_type;
             break;
     }
 
@@ -455,14 +477,14 @@ int SetupVideoES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
     if( p_pasp && BOXDATA(p_pasp) && BOXDATA(p_pasp)->i_horizontal_spacing > 0 &&
                   BOXDATA(p_pasp)->i_vertical_spacing > 0 )
     {
-        p_track->fmt.video.i_sar_num = BOXDATA(p_pasp)->i_horizontal_spacing;
-        p_track->fmt.video.i_sar_den = BOXDATA(p_pasp)->i_vertical_spacing;
+        p_fmt->video.i_sar_num = BOXDATA(p_pasp)->i_horizontal_spacing;
+        p_fmt->video.i_sar_den = BOXDATA(p_pasp)->i_vertical_spacing;
     }
 
     const MP4_Box_t *p_fiel = MP4_BoxGet( p_sample, "fiel" );
     if( p_fiel && BOXDATA(p_fiel) )
     {
-        p_track->i_block_flags = BOXDATA(p_fiel)->i_flags;
+        p_cfg->i_block_flags = BOXDATA(p_fiel)->i_flags;
     }
 
     const MP4_Box_t *p_colr = MP4_BoxGet( p_sample, "colr" );
@@ -471,31 +493,31 @@ int SetupVideoES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
         if ( BOXDATA(p_colr)->i_type == VLC_FOURCC( 'n', 'c', 'l', 'c' ) ||
              BOXDATA(p_colr)->i_type == VLC_FOURCC( 'n', 'c', 'l', 'x' ) )
         {
-            p_track->fmt.video.primaries =
+            p_fmt->video.primaries =
                     iso_23001_8_cp_to_vlc_primaries( BOXDATA( p_colr )->nclc.i_primary_idx );
-            p_track->fmt.video.transfer =
+            p_fmt->video.transfer =
                     iso_23001_8_tc_to_vlc_xfer( BOXDATA( p_colr )->nclc.i_transfer_function_idx );
-            p_track->fmt.video.space =
+            p_fmt->video.space =
                     iso_23001_8_mc_to_vlc_coeffs( BOXDATA( p_colr )->nclc.i_matrix_idx );
             if ( BOXDATA(p_colr)->i_type == VLC_FOURCC( 'n', 'c', 'l', 'x' ) &&
                     (BOXDATA(p_colr)->nclc.i_full_range >> 7) != 0 )
-                p_track->fmt.video.color_range = COLOR_RANGE_FULL;
+                p_fmt->video.color_range = COLOR_RANGE_FULL;
             else
-                p_track->fmt.video.color_range = COLOR_RANGE_LIMITED;
+                p_fmt->video.color_range = COLOR_RANGE_LIMITED;
         }
     }
 
-    SetupGlobalExtensions( p_track, p_sample );
+    SetupGlobalExtensions( p_sample, p_fmt );
 
     /* now see if esds is present and if so create a data packet
         with decoder_specific_info  */
     MP4_Box_t *p_esds = MP4_BoxGet( p_sample, "esds" );
     if ( p_esds && BOXDATA(p_esds) && BOXDATA(p_esds)->es_descriptor.p_decConfigDescr )
     {
-        assert(p_sample->i_type == ATOM_mp4v);
-        SetupESDS( p_demux, p_track, BOXDATA(p_esds)->es_descriptor.p_decConfigDescr );
+        assert(i_sample_type == ATOM_mp4v);
+        SetupESDS( p_demux, p_track, BOXDATA(p_esds)->es_descriptor.p_decConfigDescr, p_fmt );
     }
-    else switch( p_sample->i_type )
+    else switch( i_sample_type )
     {
         /* qt decoder, send the complete chunk */
         case VLC_FOURCC ('h', 'd', 'v', '1'): // HDV 720p30
@@ -510,7 +532,7 @@ int SetupVideoES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
         case VLC_FOURCC ('m', 'x', '3', 'p'): // MPEG2 IMX PAL 625/50 30mb/s produced by FCP
         case VLC_FOURCC ('x', 'd', 'v', '2'): // XDCAM HD 1080i60
         case VLC_FOURCC ('A', 'V', 'm', 'p'): // AVID IMX PAL
-            p_track->fmt.i_codec = VLC_CODEC_MPGV;
+            p_fmt->i_codec = VLC_CODEC_MPGV;
             break;
         /* qt decoder, send the complete chunk */
         case VLC_CODEC_SVQ1:
@@ -521,18 +543,18 @@ int SetupVideoES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
         {
             CopyExtradata( p_sample->data.p_sample_vide->p_qt_image_description,
                            p_sample->data.p_sample_vide->i_qt_image_description,
-                           &p_track->fmt );
+                           p_fmt );
             break;
         }
 
         case VLC_FOURCC( 'A', 'V', 'j', '2' ):
-            p_track->fmt.i_codec = VLC_CODEC_JPEG2000;
+            p_fmt->i_codec = VLC_CODEC_JPEG2000;
             /* final decoded resolution stored in ARES w, h, nbfields to group
              * but since avcodec can't tell... */
             break;
 
         case VLC_FOURCC('j', 'p', 'e', 'g'):
-            p_track->fmt.i_codec = VLC_CODEC_MJPG;
+            p_fmt->i_codec = VLC_CODEC_MJPG;
            break;
 
         case VLC_CODEC_FFV1:
@@ -542,7 +564,7 @@ int SetupVideoES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
             {
                 CopyExtradata( BOXDATA(p_binary)->p_blob,
                                BOXDATA(p_binary)->i_blob,
-                               &p_track->fmt );
+                               p_fmt );
             }
             break;
         }
@@ -554,7 +576,7 @@ int SetupVideoES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
             {
                 CopyExtradata( BOXDATA(p_dvc1)->p_vc1,
                                BOXDATA(p_dvc1)->i_vc1,
-                               &p_track->fmt );
+                               p_fmt );
             }
             else
             {
@@ -569,11 +591,11 @@ int SetupVideoES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
             MP4_Box_t *p_av1C = MP4_BoxGet( p_sample, "av1C" );
             if( p_av1C && BOXDATA(p_av1C) )
             {
-                p_track->fmt.i_profile = BOXDATA(p_av1C)->i_profile;
-                p_track->fmt.i_level = BOXDATA(p_av1C)->i_level;
+                p_fmt->i_profile = BOXDATA(p_av1C)->i_profile;
+                p_fmt->i_level = BOXDATA(p_av1C)->i_level;
                 CopyExtradata( BOXDATA(p_av1C)->p_av1C,
                                BOXDATA(p_av1C)->i_av1C,
-                               &p_track->fmt );
+                               p_fmt );
             }
             break;
         }
@@ -588,11 +610,11 @@ int SetupVideoES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
 
             if( p_avcC && BOXDATA(p_avcC) )
             {
-                p_track->fmt.i_profile = BOXDATA(p_avcC)->i_profile;
-                p_track->fmt.i_level = BOXDATA(p_avcC)->i_level;
+                p_fmt->i_profile = BOXDATA(p_avcC)->i_profile;
+                p_fmt->i_level = BOXDATA(p_avcC)->i_level;
                 CopyExtradata( BOXDATA(p_avcC)->p_avcC,
                                BOXDATA(p_avcC)->i_avcC,
-                               &p_track->fmt );
+                               p_fmt );
             }
             else
             {
@@ -608,14 +630,14 @@ int SetupVideoES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
             MP4_Box_t *p_hvcC = MP4_BoxGet( p_sample, "hvcC" );
 
             /* Handle DV fourcc collision at demux level */
-            if( p_sample->i_type == VLC_FOURCC( 'd', 'v', 'h', '1' ) )
-                p_track->fmt.i_codec = VLC_FOURCC( 'd', 'v', 'h', 'e' );
+            if( i_sample_type == VLC_FOURCC( 'd', 'v', 'h', '1' ) )
+                p_fmt->i_codec = VLC_FOURCC( 'd', 'v', 'h', 'e' );
 
             if( p_hvcC && p_hvcC->data.p_binary )
             {
                 CopyExtradata( p_hvcC->data.p_binary->p_blob,
                                p_hvcC->data.p_binary->i_blob,
-                               &p_track->fmt );
+                               p_fmt );
             }
             else
             {
@@ -632,14 +654,14 @@ int SetupVideoES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
             if( p_vpcC && BOXDATA(p_vpcC) )
             {
                 const MP4_Box_data_vpcC_t *p_data = BOXDATA(p_vpcC);
-                if( p_sample->i_type == ATOM_vp10 )
-                    p_track->fmt.i_codec = VLC_CODEC_VP10;
-                else if( p_sample->i_type == ATOM_vp09 )
-                    p_track->fmt.i_codec = VLC_CODEC_VP9;
+                if( i_sample_type == ATOM_vp10 )
+                    p_fmt->i_codec = VLC_CODEC_VP10;
+                else if( i_sample_type == ATOM_vp09 )
+                    p_fmt->i_codec = VLC_CODEC_VP9;
                 else
-                    p_track->fmt.i_codec = VLC_CODEC_VP8;
-                p_track->fmt.i_profile = p_data->i_profile;
-                p_track->fmt.i_level = p_data->i_level;
+                    p_fmt->i_codec = VLC_CODEC_VP8;
+                p_fmt->i_profile = p_data->i_profile;
+                p_fmt->i_level = p_data->i_level;
 
                 if( p_data->i_version == 0 ) /* old deprecated */
                 {
@@ -655,41 +677,41 @@ int SetupVideoES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
                         COLOR_SPACE_SRGB,
                     };
                     if( p_data->i_color_primaries < ARRAY_SIZE(colorspacesmapping) )
-                        p_track->fmt.video.space = colorspacesmapping[p_data->i_color_primaries];
+                        p_fmt->video.space = colorspacesmapping[p_data->i_color_primaries];
 
                     if( p_data->i_xfer_function == 0 )
-                        p_track->fmt.video.transfer = TRANSFER_FUNC_BT709;
+                        p_fmt->video.transfer = TRANSFER_FUNC_BT709;
                     else if ( p_data->i_xfer_function == 1 )
-                        p_track->fmt.video.transfer = TRANSFER_FUNC_SMPTE_ST2084;
+                        p_fmt->video.transfer = TRANSFER_FUNC_SMPTE_ST2084;
                 }
                 else
                 {
-                    p_track->fmt.video.primaries =
+                    p_fmt->video.primaries =
                             iso_23001_8_cp_to_vlc_primaries( p_data->i_color_primaries );
-                    p_track->fmt.video.transfer =
+                    p_fmt->video.transfer =
                             iso_23001_8_tc_to_vlc_xfer( p_data->i_xfer_function );
-                    p_track->fmt.video.space =
+                    p_fmt->video.space =
                             iso_23001_8_mc_to_vlc_coeffs( p_data->i_matrix_coeffs );
                 }
 
-                p_track->fmt.video.color_range = p_data->i_fullrange ? COLOR_RANGE_FULL : COLOR_RANGE_LIMITED;
-                p_track->fmt.video.i_bits_per_pixel = p_data->i_bit_depth;
+                p_fmt->video.color_range = p_data->i_fullrange ? COLOR_RANGE_FULL : COLOR_RANGE_LIMITED;
+                p_fmt->video.i_bits_per_pixel = p_data->i_bit_depth;
 
                 CopyExtradata( p_data->p_codec_init_data,
                                p_data->i_codec_init_datasize,
-                               &p_track->fmt );
+                               p_fmt );
 
                 const MP4_Box_t *p_SmDm = MP4_BoxGet( p_sample, "SmDm" );
                 if( !p_SmDm )
                     p_SmDm = MP4_BoxGet( p_sample, "mdcv" );
                 if( p_SmDm && BOXDATA(p_SmDm) )
                 {
-                    memcpy( p_track->fmt.video.mastering.primaries,
+                    memcpy( p_fmt->video.mastering.primaries,
                             BOXDATA(p_SmDm)->primaries, sizeof(uint16_t) * 6 );
-                    memcpy( p_track->fmt.video.mastering.white_point,
+                    memcpy( p_fmt->video.mastering.white_point,
                             BOXDATA(p_SmDm)->white_point, sizeof(uint16_t) * 2 );
-                    p_track->fmt.video.mastering.max_luminance = BOXDATA(p_SmDm)->i_luminanceMax;
-                    p_track->fmt.video.mastering.min_luminance = BOXDATA(p_SmDm)->i_luminanceMin;
+                    p_fmt->video.mastering.max_luminance = BOXDATA(p_SmDm)->i_luminanceMax;
+                    p_fmt->video.mastering.min_luminance = BOXDATA(p_SmDm)->i_luminanceMin;
                 }
 
                 const MP4_Box_t *p_CoLL = MP4_BoxGet( p_sample, "CoLL" );
@@ -697,15 +719,15 @@ int SetupVideoES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
                     p_CoLL = MP4_BoxGet( p_sample, "clli" );
                 if( p_CoLL && BOXDATA(p_CoLL) )
                 {
-                    p_track->fmt.video.lighting.MaxCLL = BOXDATA(p_CoLL)->i_maxCLL;
-                    p_track->fmt.video.lighting.MaxFALL = BOXDATA(p_CoLL)->i_maxFALL;
+                    p_fmt->video.lighting.MaxCLL = BOXDATA(p_CoLL)->i_maxCLL;
+                    p_fmt->video.lighting.MaxFALL = BOXDATA(p_CoLL)->i_maxFALL;
                 }
             }
         }
         break;
 
         case ATOM_WMV3:
-            p_track->p_asf = MP4_BoxGet( p_sample, "ASF " );
+            p_cfg->p_asf = MP4_BoxGet( p_sample, "ASF " );
             /* fallthrough */
         case ATOM_H264:
         case VLC_FOURCC('W','V','C','1'):
@@ -713,14 +735,14 @@ int SetupVideoES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
             MP4_Box_t *p_strf = MP4_BoxGet(  p_sample, "strf", 0 );
             if ( p_strf && BOXDATA(p_strf) )
             {
-                p_track->fmt.video.i_width = BOXDATA(p_strf)->bmiHeader.biWidth;
-                p_track->fmt.video.i_visible_width = p_track->fmt.video.i_width;
-                p_track->fmt.video.i_height = BOXDATA(p_strf)->bmiHeader.biHeight;
-                p_track->fmt.video.i_visible_height =p_track->fmt.video.i_height;
-                p_track->fmt.video.i_bits_per_pixel = BOXDATA(p_strf)->bmiHeader.biBitCount;
+                p_fmt->video.i_width = BOXDATA(p_strf)->bmiHeader.biWidth;
+                p_fmt->video.i_visible_width = p_fmt->video.i_width;
+                p_fmt->video.i_height = BOXDATA(p_strf)->bmiHeader.biHeight;
+                p_fmt->video.i_visible_height =p_fmt->video.i_height;
+                p_fmt->video.i_bits_per_pixel = BOXDATA(p_strf)->bmiHeader.biBitCount;
                 CopyExtradata( BOXDATA(p_strf)->p_extra,
                                BOXDATA(p_strf)->i_extra,
-                               &p_track->fmt );
+                               p_fmt );
             }
             break;
         }
@@ -738,25 +760,25 @@ int SetupVideoES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
         case VLC_FOURCC( 'a', 'i', '1', '5' ):
         case VLC_FOURCC( 'a', 'i', '1', '6' ):
         {
-            if( !p_track->fmt.i_extra && p_track->fmt.video.i_width < UINT16_MAX &&
+            if( !p_fmt->i_extra && p_fmt->video.i_width < UINT16_MAX &&
                 p_fiel && BOXDATA(p_fiel) )
             {
-                p_track->fmt.p_extra =
-                        AVCi_create_AnnexB( p_track->fmt.video.i_width,
-                                            !!BOXDATA(p_fiel)->i_flags, &p_track->fmt.i_extra );
+                p_fmt->p_extra =
+                        AVCi_create_AnnexB( p_fmt->video.i_width,
+                                            !!BOXDATA(p_fiel)->i_flags, &p_fmt->i_extra );
             }
             break;
         }
 
         default:
-            msg_Dbg( p_demux, "Unrecognized FourCC %4.4s", (char *)&p_sample->i_type );
+            msg_Dbg( p_demux, "Unrecognized FourCC %4.4s", (char *)&i_sample_type );
             break;
     }
 
     return 1;
 }
 
-static bool SetupAudioFromWaveFormatEx( es_format_t *p_fmt, const MP4_Box_t *p_WMA2 )
+static bool SetupAudioFromWaveFormatEx( const MP4_Box_t *p_WMA2, es_format_t *p_fmt )
 {
     if( p_WMA2 && BOXDATA(p_WMA2) )
     {
@@ -774,46 +796,48 @@ static bool SetupAudioFromWaveFormatEx( es_format_t *p_fmt, const MP4_Box_t *p_W
     return false;
 }
 
-int SetupAudioES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
+int SetupAudioES( demux_t *p_demux, const mp4_track_t *p_track,
+                  const MP4_Box_t *p_sample, es_format_t *p_fmt,
+                  track_config_t *p_cfg )
 {
-    MP4_Box_data_sample_soun_t *p_soun = p_sample->data.p_sample_soun;
+    const MP4_Box_data_sample_soun_t *p_soun = p_sample->data.p_sample_soun;
     if(!p_soun)
         return 0;
 
-    p_track->fmt.audio.i_channels = p_soun->i_channelcount;
-    p_track->fmt.audio.i_rate = p_soun->i_sampleratehi;
+    const uint32_t i_sample_type = GetSampleType( p_demux, p_sample );
+    p_fmt->i_original_fourcc = i_sample_type;
+
+    p_fmt->audio.i_channels = p_soun->i_channelcount;
+    p_fmt->audio.i_rate = p_soun->i_sampleratehi;
     if( p_soun->i_qt_version == 0 ) /* otherwise defaults to meaningless 16 */
     {
-        p_track->fmt.audio.i_bitspersample = p_soun->i_samplesize;
-        p_track->fmt.i_bitrate = p_soun->i_channelcount * p_soun->i_sampleratehi *
-                                 p_soun->i_samplesize;
+        p_fmt->audio.i_bitspersample = p_soun->i_samplesize;
+        p_fmt->i_bitrate = p_soun->i_channelcount * p_soun->i_sampleratehi *
+                           p_soun->i_samplesize;
     }
 
-    p_track->fmt.i_original_fourcc = p_sample->i_type;
-
-
     /* Endianness atom */
     const MP4_Box_t *p_enda = MP4_BoxGet( p_sample, "wave/enda" );
     if( !p_enda )
         p_enda = MP4_BoxGet( p_sample, "enda" );
 
     /* It's a little ugly but .. there are special cases */
-    switch( p_sample->i_type )
+    switch( i_sample_type )
     {
         case( VLC_FOURCC( 'r', 'r', 't', 'p' ) ): /* RTP Reception Hint Track */
         {
-            if( !SetupRTPReceptionHintTrack( p_demux, p_track, p_sample ) )
+            if( !SetupRTPReceptionHintTrack( p_demux, p_track, p_sample, p_fmt, p_cfg ) )
                 return 0;
             break;
         }
         case ATOM_agsm: /* Apple gsm 33 bytes != MS GSM (agsm fourcc, 65 bytes) */
-            p_track->fmt.i_codec = VLC_CODEC_GSM;
+            p_fmt->i_codec = VLC_CODEC_GSM;
             break;
         case( VLC_FOURCC( '.', 'm', 'p', '3' ) ):
         case( VLC_FOURCC( 'm', 's', 0x00, 0x55 ) ):
         {
-            p_track->fmt.i_codec = VLC_CODEC_MP3;
-            p_track->fmt.b_packetized = false;
+            p_fmt->i_codec = VLC_CODEC_MP3;
+            p_fmt->b_packetized = false;
             break;
         }
         case ATOM_XiVs:
@@ -837,11 +861,11 @@ int SetupAudioES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
                     p_vCtC->data.p_binary->p_blob
                 };
 
-                if( xiph_PackHeaders( &p_track->fmt.i_extra, &p_track->fmt.p_extra,
+                if( xiph_PackHeaders( &p_fmt->i_extra, &p_fmt->p_extra,
                                       headers_sizes, headers, 3 ) == VLC_SUCCESS )
                 {
-                    p_track->fmt.i_codec = VLC_CODEC_VORBIS;
-                    p_track->fmt.b_packetized = false;
+                    p_fmt->i_codec = VLC_CODEC_VORBIS;
+                    p_fmt->b_packetized = false;
                 }
             }
             break;
@@ -855,15 +879,15 @@ int SetupAudioES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
                 uint8_t *p_extra = malloc(i_extra);
                 if( p_extra )
                 {
-                    p_track->fmt.i_extra = i_extra;
-                    p_track->fmt.p_extra = p_extra;
+                    p_fmt->i_extra = i_extra;
+                    p_fmt->p_extra = p_extra;
                     memcpy( p_extra, "fLaC", 4 );
                     SetDWBE( &p_extra[4], p_fCtS->data.p_binary->i_blob ); /* want the lowest 24bits */
                     p_extra[4] = 0x80; /* 0x80 Last metablock | 0x00 StreamInfo */
                     memcpy( &p_extra[8], p_fCtS->data.p_binary->p_blob, p_fCtS->data.p_binary->i_blob );
 
-                    p_track->fmt.i_codec = VLC_CODEC_FLAC;
-                    p_track->fmt.b_packetized = false;
+                    p_fmt->i_codec = VLC_CODEC_FLAC;
+                    p_fmt->b_packetized = false;
                 }
             }
             break;
@@ -878,38 +902,38 @@ int SetupAudioES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
                 uint8_t *p_extra = malloc(i_extra);
                 if( likely( p_extra ) )
                 {
-                    p_track->fmt.i_extra = i_extra;
-                    p_track->fmt.p_extra = p_extra;
+                    p_fmt->i_extra = i_extra;
+                    p_fmt->p_extra = p_extra;
                     memcpy( p_extra, p_dfLa->data.p_binary->p_blob, p_dfLa->data.p_binary->i_blob);
                     memcpy( p_extra, "fLaC", 4 );
-                    p_track->fmt.i_codec = VLC_CODEC_FLAC;
+                    p_fmt->i_codec = VLC_CODEC_FLAC;
                 }
             }
             break;
         }
         case( ATOM_eac3 ):
         {
-            p_track->fmt.i_codec = VLC_CODEC_EAC3;
+            p_fmt->i_codec = VLC_CODEC_EAC3;
             /* TS 102.366. F6 The values of the ChannelCount and SampleSize fields
              *             within the EC3SampleEntry Box shall be ignored. */
-            p_track->fmt.audio.i_channels = 0;
-            p_track->fmt.audio.i_bitspersample = 0;
+            p_fmt->audio.i_channels = 0;
+            p_fmt->audio.i_bitspersample = 0;
 
             const MP4_Box_t *p_dec3 = MP4_BoxGet(  p_sample, "dec3", 0 );
             if( p_dec3 && BOXDATA(p_dec3) )
             {
-                p_track->fmt.i_bitrate = BOXDATA(p_dec3)->i_data_rate * 1000;
+                p_fmt->i_bitrate = BOXDATA(p_dec3)->i_data_rate * 1000;
             }
             break;
         }
         case( ATOM_AC3 ):
         case( ATOM_ac3 ):
         {
-            p_track->fmt.i_codec = VLC_CODEC_A52;
+            p_fmt->i_codec = VLC_CODEC_A52;
             /* TS 102.366. F3 The values of the ChannelCount and SampleSize fields
              *             within the AC3SampleEntry Box shall be ignored */
-            p_track->fmt.audio.i_channels = 0;
-            p_track->fmt.audio.i_bitspersample = 0;
+            p_fmt->audio.i_channels = 0;
+            p_fmt->audio.i_bitspersample = 0;
 
             MP4_Box_t *p_dac3 = MP4_BoxGet(  p_sample, "dac3", 0 );
             if( p_dac3 && BOXDATA(p_dac3) )
@@ -921,10 +945,10 @@ int SetupAudioES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
                     256, 320, 384, 448,
                     512, 576, 640,
                 };
-                p_track->fmt.i_bitrate = 0;
+                p_fmt->i_bitrate = 0;
                 if( BOXDATA(p_dac3)->i_bitrate_code < sizeof(pi_bitrate)/sizeof(*pi_bitrate) )
                 {
-                    p_track->fmt.i_bitrate = pi_bitrate[BOXDATA(p_dac3)->i_bitrate_code] * 1000;
+                    p_fmt->i_bitrate = pi_bitrate[BOXDATA(p_dac3)->i_bitrate_code] * 1000;
 
                     if (pi_bitrate[BOXDATA(p_dac3)->i_bitrate_code] == 640
                      && BOXDATA(p_dac3)->i_acmod == 7
@@ -937,7 +961,7 @@ int SetupAudioES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
                          * detect it (this is needed for aout passhthrough
                          * configuration). */
 
-                        p_track->fmt.b_packetized = false;
+                        p_fmt->b_packetized = false;
                     }
                 }
             }
@@ -946,34 +970,34 @@ int SetupAudioES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
 
         case ATOM_dtsc: /* DTS */
         {
-            p_track->fmt.i_codec = VLC_CODEC_DTS;
-            p_track->fmt.i_profile = PROFILE_DTS;
+            p_fmt->i_codec = VLC_CODEC_DTS;
+            p_fmt->i_profile = PROFILE_DTS;
             break;
         }
         case ATOM_dtse: /* DTS LBR */
         {
-            p_track->fmt.i_codec = VLC_CODEC_DTS;
-            p_track->fmt.i_profile = PROFILE_DTS_EXPRESS;
+            p_fmt->i_codec = VLC_CODEC_DTS;
+            p_fmt->i_profile = PROFILE_DTS_EXPRESS;
             break;
         }
         case ATOM_dtsh: /* DTS‐HD audio formats */
         case ATOM_dtsl: /* DTS‐HD Lossless formats */
         {
-            p_track->fmt.i_codec = VLC_CODEC_DTS;
-            p_track->fmt.i_profile = PROFILE_DTS_HD;
+            p_fmt->i_codec = VLC_CODEC_DTS;
+            p_fmt->i_profile = PROFILE_DTS_HD;
             break;
         }
 
         case VLC_FOURCC( 't', 'w', 'o', 's' ):
-            p_track->fmt.i_codec = VLC_CODEC_S16B;
-            p_track->fmt.i_original_fourcc = p_sample->i_type;
-            p_track->fmt.audio.i_bitspersample = 16;
+            p_fmt->i_codec = VLC_CODEC_S16B;
+            p_fmt->i_original_fourcc = i_sample_type;
+            p_fmt->audio.i_bitspersample = 16;
             break;
 
         case VLC_FOURCC( 's', 'o', 'w', 't' ):
-            p_track->fmt.i_codec = VLC_CODEC_S16L;
-            p_track->fmt.i_original_fourcc = p_sample->i_type;
-            p_track->fmt.audio.i_bitspersample = 16;
+            p_fmt->i_codec = VLC_CODEC_S16L;
+            p_fmt->i_original_fourcc = i_sample_type;
+            p_fmt->audio.i_bitspersample = 16;
             break;
 
         case 0x0000000:
@@ -982,15 +1006,15 @@ int SetupAudioES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
         {
             if( (p_soun->i_samplesize+7)/8 == 1 )
             {
-                p_track->fmt.i_codec = VLC_CODEC_U8;
-                p_track->fmt.audio.i_bitspersample = 8;
+                p_fmt->i_codec = VLC_CODEC_U8;
+                p_fmt->audio.i_bitspersample = 8;
             }
             else
             {
-                p_track->fmt.i_codec = VLC_CODEC_S16B;
-                p_track->fmt.audio.i_bitspersample = 16;
+                p_fmt->i_codec = VLC_CODEC_S16B;
+                p_fmt->audio.i_bitspersample = 16;
             }
-            p_track->fmt.i_original_fourcc = p_track->fmt.i_codec;
+            p_fmt->i_original_fourcc = p_fmt->i_codec;
 
             /* Buggy files workaround */
             if( (p_track->i_timescale != p_soun->i_sampleratehi) )
@@ -1000,31 +1024,31 @@ int SetupAudioES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
                           p_track->i_timescale, p_soun->i_sampleratehi );
 
                 if( p_soun->i_sampleratehi != 0 )
-                    p_track->i_timescale = p_soun->i_sampleratehi;
+                    p_cfg->i_timescale_override = p_soun->i_sampleratehi;
                 else
-                    p_soun->i_sampleratehi = p_track->i_timescale;
+                    p_fmt->audio.i_rate = p_track->i_timescale;
             }
             break;
         }
 
         case ATOM_in24:
-            p_track->fmt.i_original_fourcc =
-            p_track->fmt.i_codec = p_enda && BOXDATA(p_enda)->i_little_endian == 1 ?
+            p_fmt->i_original_fourcc =
+            p_fmt->i_codec = p_enda && BOXDATA(p_enda)->i_little_endian == 1 ?
                                     VLC_CODEC_S24L : VLC_CODEC_S24B;
             break;
         case ATOM_in32:
-            p_track->fmt.i_original_fourcc =
-            p_track->fmt.i_codec = p_enda && BOXDATA(p_enda)->i_little_endian == 1 ?
+            p_fmt->i_original_fourcc =
+            p_fmt->i_codec = p_enda && BOXDATA(p_enda)->i_little_endian == 1 ?
                                     VLC_CODEC_S32L : VLC_CODEC_S32B;
             break;
         case ATOM_fl32:
-            p_track->fmt.i_original_fourcc =
-            p_track->fmt.i_codec = p_enda && BOXDATA(p_enda)->i_little_endian == 1 ?
+            p_fmt->i_original_fourcc =
+            p_fmt->i_codec = p_enda && BOXDATA(p_enda)->i_little_endian == 1 ?
                                     VLC_CODEC_F32L : VLC_CODEC_F32B;
             break;
         case ATOM_fl64:
-            p_track->fmt.i_original_fourcc =
-            p_track->fmt.i_codec = p_enda && BOXDATA(p_enda)->i_little_endian == 1 ?
+            p_fmt->i_original_fourcc =
+            p_fmt->i_codec = p_enda && BOXDATA(p_enda)->i_little_endian == 1 ?
                                     VLC_CODEC_F64L : VLC_CODEC_F64B;
             break;
 
@@ -1074,11 +1098,11 @@ int SetupAudioES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
                     if( p_formats[i].i_bits == p_soun->i_constbitsperchannel &&
                         (p_soun->i_formatflags & p_formats[i].i_mask) == p_formats[i].i_flags )
                     {
-                        p_track->fmt.i_codec = p_formats[i].i_codec;
-                        p_track->fmt.audio.i_bitspersample = p_soun->i_constbitsperchannel;
-                        p_track->fmt.audio.i_blockalign =
+                        p_fmt->i_codec = p_formats[i].i_codec;
+                        p_fmt->audio.i_bitspersample = p_soun->i_constbitsperchannel;
+                        p_fmt->audio.i_blockalign =
                                 p_soun->i_channelcount * p_soun->i_constbitsperchannel / 8;
-                        p_track->i_sample_size = p_track->fmt.audio.i_blockalign;
+                        p_cfg->i_sample_size_override = p_fmt->audio.i_blockalign;
                         break;
                     }
                 }
@@ -1086,7 +1110,7 @@ int SetupAudioES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
             break;
         }
         default:
-            p_track->fmt.i_codec = p_sample->i_type;
+            p_fmt->i_codec = i_sample_type;
             break;
     }
 
@@ -1109,24 +1133,24 @@ int SetupAudioES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
         }
         else if( i_vlc_mapping )
         {
-            const unsigned i_bps = aout_BitsPerSample( p_track->fmt.i_codec );
+            const unsigned i_bps = aout_BitsPerSample( p_fmt->i_codec );
             /* Uncompressed audio */
             if( i_bps && aout_CheckChannelReorder( p_rg_chans_order, NULL,
                                                    i_vlc_mapping,
-                                                   p_track->rgi_chans_reordering ) )
-                p_track->b_chans_reorder = true;
+                                                   p_cfg->rgi_chans_reordering ) )
+                p_cfg->b_chans_reorder = true;
 
             /* we can only set bitmap for VLC mapping or [re]mapped pcm audio
              * as vlc can't enumerate channels for compressed content */
             if( i_bps )
             {
-                p_track->fmt.audio.i_channels = vlc_popcount(i_vlc_mapping);
-                p_track->fmt.audio.i_physical_channels = i_vlc_mapping;
+                p_fmt->audio.i_channels = vlc_popcount(i_vlc_mapping);
+                p_fmt->audio.i_physical_channels = i_vlc_mapping;
             }
         }
     }
 
-    SetupGlobalExtensions( p_track, p_sample );
+    SetupGlobalExtensions( p_sample, p_fmt );
 
     /* now see if esds is present and if so create a data packet
         with decoder_specific_info  */
@@ -1134,16 +1158,16 @@ int SetupAudioES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
     if ( !p_esds ) p_esds = MP4_BoxGet( p_sample, "wave/esds" );
     if ( p_esds && BOXDATA(p_esds) && BOXDATA(p_esds)->es_descriptor.p_decConfigDescr )
     {
-        assert(p_sample->i_type == ATOM_mp4a);
-        SetupESDS( p_demux, p_track, BOXDATA(p_esds)->es_descriptor.p_decConfigDescr );
+        assert(i_sample_type == ATOM_mp4a);
+        SetupESDS( p_demux, p_track, BOXDATA(p_esds)->es_descriptor.p_decConfigDescr, p_fmt );
     }
-    else switch( p_sample->i_type )
+    else switch( i_sample_type )
     {
         case VLC_CODEC_AMR_NB:
-            p_track->fmt.audio.i_rate = 8000;
+            p_fmt->audio.i_rate = 8000;
             break;
         case VLC_CODEC_AMR_WB:
-            p_track->fmt.audio.i_rate = 16000;
+            p_fmt->audio.i_rate = 16000;
             break;
         case VLC_CODEC_QDMC:
         case VLC_CODEC_QDM2:
@@ -1151,11 +1175,11 @@ int SetupAudioES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
         {
             CopyExtradata( p_sample->data.p_sample_soun->p_qt_description,
                            p_sample->data.p_sample_soun->i_qt_description,
-                           &p_track->fmt );
-            if( p_track->fmt.i_extra == 56 && p_sample->i_type == VLC_CODEC_ALAC )
+                           p_fmt );
+            if( p_fmt->i_extra == 56 && i_sample_type == VLC_CODEC_ALAC )
             {
-                p_track->fmt.audio.i_channels = *((uint8_t*)p_track->fmt.p_extra + 41);
-                p_track->fmt.audio.i_rate = GetDWBE((uint8_t*)p_track->fmt.p_extra + 52);
+                p_fmt->audio.i_channels = *((uint8_t*)p_fmt->p_extra + 41);
+                p_fmt->audio.i_rate = GetDWBE((uint8_t*)p_fmt->p_extra + 52);
             }
             break;
         }
@@ -1163,15 +1187,14 @@ int SetupAudioES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
         case VLC_CODEC_ADPCM_IMA_WAV:
         case VLC_CODEC_QCELP:
         {
-            p_track->fmt.audio.i_blockalign = p_sample->data.p_sample_soun->i_bytes_per_frame;
+            p_fmt->audio.i_blockalign = p_sample->data.p_sample_soun->i_bytes_per_frame;
             break;
         }
         case ATOM_WMA2:
         {
-            if( SetupAudioFromWaveFormatEx( &p_track->fmt,
-                                            MP4_BoxGet( p_sample, "wave/WMA2" ) ) )
+            if( SetupAudioFromWaveFormatEx( MP4_BoxGet( p_sample, "wave/WMA2" ), p_fmt ) )
             {
-                p_track->p_asf = MP4_BoxGet( p_sample, "wave/ASF " );
+                p_cfg->p_asf = MP4_BoxGet( p_sample, "wave/ASF " );
             }
             else
             {
@@ -1181,43 +1204,47 @@ int SetupAudioES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
         }
         case ATOM_wma: /* isml wmapro */
         {
-            if( !SetupAudioFromWaveFormatEx( &p_track->fmt, MP4_BoxGet( p_sample, "wfex" ) ) )
+            if( !SetupAudioFromWaveFormatEx( MP4_BoxGet( p_sample, "wfex" ), p_fmt ) )
                 msg_Err( p_demux, "missing wfex for wma" );
             break;
         }
 
         default:
-            if(p_track->fmt.i_codec == 0)
-                msg_Dbg( p_demux, "Unrecognized FourCC %4.4s", (char *)&p_sample->i_type );
+            if(p_fmt->i_codec == 0)
+                msg_Dbg( p_demux, "Unrecognized FourCC %4.4s", (char *)&i_sample_type );
             break;
     }
 
     /* Ambisonics */
     const MP4_Box_t *p_SA3D = MP4_BoxGet(p_sample, "SA3D");
     if (p_SA3D && BOXDATA(p_SA3D))
-        p_track->fmt.audio.channel_type = AUDIO_CHANNEL_TYPE_AMBISONICS;
+        p_fmt->audio.channel_type = AUDIO_CHANNEL_TYPE_AMBISONICS;
 
     return 1;
 }
 
-int SetupSpuES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
+int SetupSpuES( demux_t *p_demux, const mp4_track_t *p_track,
+                const MP4_Box_t *p_sample, es_format_t *p_fmt,
+                track_config_t *p_cfg )
 {
+    const uint32_t i_sample_type = GetSampleType( p_demux, p_sample );
+
     /* It's a little ugly but .. there are special cases */
-    switch( p_sample->i_type )
+    switch( i_sample_type )
     {
         case VLC_FOURCC('s','t','p','p'):
-            p_track->fmt.i_codec = VLC_CODEC_TTML;
+            p_fmt->i_codec = VLC_CODEC_TTML;
             break;
         case ATOM_wvtt:
-            p_track->fmt.i_codec = VLC_CODEC_WEBVTT;
+            p_fmt->i_codec = VLC_CODEC_WEBVTT;
             break;
         case ATOM_c608: /* EIA608 closed captions */
-            p_track->fmt.i_codec = VLC_CODEC_CEA608;
-            p_track->fmt.subs.cc.i_reorder_depth = -1;
+            p_fmt->i_codec = VLC_CODEC_CEA608;
+            p_fmt->subs.cc.i_reorder_depth = -1;
             break;
         case ATOM_c708: /* EIA708 closed captions */
-            p_track->fmt.i_codec = VLC_CODEC_CEA708;
-            p_track->fmt.subs.cc.i_reorder_depth = -1;
+            p_fmt->i_codec = VLC_CODEC_CEA708;
+            p_fmt->subs.cc.i_reorder_depth = -1;
             break;
 
         case( VLC_FOURCC( 't', 'e', 'x', 't' ) ):
@@ -1227,42 +1254,40 @@ int SetupSpuES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample )
             if(!p_text)
                 return 0;
 
-            if( p_sample->i_type == VLC_FOURCC( 't', 'e', 'x', 't' ) )
-                p_track->fmt.i_codec = VLC_CODEC_QTXT;
+            if( i_sample_type == VLC_FOURCC( 't', 'e', 'x', 't' ) )
+                p_fmt->i_codec = VLC_CODEC_QTXT;
             else
-                p_track->fmt.i_codec = VLC_CODEC_TX3G;
+                p_fmt->i_codec = VLC_CODEC_TX3G;
 
             if( p_text->i_data > 4 && GetDWBE(p_text->p_data) & 0xC0000000 )
             {
-                p_track->fmt.i_priority = ES_PRIORITY_SELECTABLE_MIN + 1;
-                p_track->b_forced_spu = true;
+                p_fmt->i_priority = ES_PRIORITY_SELECTABLE_MIN + 1;
+                p_cfg->b_forced_spu = true;
             }
 
-            CopyExtradata( p_text->p_data,
-                           p_text->i_data,
-                           &p_track->fmt );
+            CopyExtradata( p_text->p_data, p_text->i_data, p_fmt );
 
             /* FIXME UTF-8 doesn't work here ? */
             if( p_track->b_mac_encoding )
-                p_track->fmt.subs.psz_encoding = strdup( "MAC" );
+                p_fmt->subs.psz_encoding = strdup( "MAC" );
             else
-                p_track->fmt.subs.psz_encoding = strdup( "UTF-8" );
+                p_fmt->subs.psz_encoding = strdup( "UTF-8" );
             break;
         }
 
         default:
-            p_track->fmt.i_codec = p_sample->i_type;
+            p_fmt->i_codec = i_sample_type;
             break;
     }
 
-    SetupGlobalExtensions( p_track, p_sample );
+    SetupGlobalExtensions( p_sample, p_fmt );
 
     /* now see if esds is present and if so create a data packet
         with decoder_specific_info  */
     MP4_Box_t *p_esds = MP4_BoxGet( p_sample, "esds" );
     if ( p_esds && BOXDATA(p_esds) && BOXDATA(p_esds)->es_descriptor.p_decConfigDescr )
     {
-        SetupESDS( p_demux, p_track, BOXDATA(p_esds)->es_descriptor.p_decConfigDescr );
+        SetupESDS( p_demux, p_track, BOXDATA(p_esds)->es_descriptor.p_decConfigDescr, p_fmt );
     }
 
     return 1;
diff --git a/modules/demux/mp4/mp4.c b/modules/demux/mp4/mp4.c
index 3b18f734a2..629eb4552a 100644
--- a/modules/demux/mp4/mp4.c
+++ b/modules/demux/mp4/mp4.c
@@ -324,11 +324,12 @@ static MP4_Box_t * MP4_GetTrafByTrackID( MP4_Box_t *p_moof, const uint32_t i_id
     return p_traf;
 }
 
-static es_out_id_t * MP4_AddTrackES( es_out_t *out, mp4_track_t *p_track )
+static es_out_id_t * MP4_CreateES( es_out_t *out, const es_format_t *p_fmt,
+                                   bool b_forced_spu )
 {
-    es_out_id_t *p_es = es_out_Add( out, &p_track->fmt );
+    es_out_id_t *p_es = es_out_Add( out, p_fmt );
     /* Force SPU which isn't selected/defaulted */
-    if( p_track->fmt.i_cat == SPU_ES && p_es && p_track->b_forced_spu )
+    if( p_fmt->i_cat == SPU_ES && p_es && b_forced_spu )
         es_out_Control( out, ES_OUT_SET_ES_DEFAULT, p_es );
 
     return p_es;
@@ -2783,6 +2784,67 @@ static void TrackGetESSampleRate( demux_t *p_demux,
                      UINT16_MAX);
 }
 
+static void TrackConfigInit( track_config_t *p_cfg )
+{
+    memset( p_cfg, 0, sizeof(*p_cfg) );
+}
+
+static void TrackConfigApply( const track_config_t *p_cfg,
+                              mp4_track_t *p_track )
+{
+    if( p_cfg->i_timescale_override )
+        p_track->i_timescale = p_cfg->i_timescale_override;
+     if( p_cfg->i_sample_size_override )
+        p_track->i_sample_size = p_cfg->i_sample_size_override;
+     p_track->p_asf = p_cfg->p_asf;
+     memcpy( p_track->rgi_chans_reordering, p_cfg->rgi_chans_reordering,
+             AOUT_CHAN_MAX * sizeof(p_cfg->rgi_chans_reordering[0]) );
+     p_track->b_chans_reorder = p_cfg->b_chans_reorder;
+     p_track->b_forced_spu = p_cfg->b_forced_spu;
+     p_track->i_block_flags = p_cfg->i_block_flags;
+}
+
+static int TrackFillConfig( demux_t *p_demux, const mp4_track_t *p_track,
+                            const MP4_Box_t *p_sample,unsigned i_chunk,
+                            es_format_t *p_fmt, track_config_t *p_cfg )
+{
+    /* */
+    switch( p_track->fmt.i_cat )
+    {
+    case VIDEO_ES:
+        if ( p_sample->i_handler != ATOM_vide ||
+             !SetupVideoES( p_demux, p_track, p_sample, p_fmt, p_cfg ) )
+            return VLC_EGENERIC;
+
+        /* Set frame rate */
+        TrackGetESSampleRate( p_demux,
+                              &p_fmt->video.i_frame_rate,
+                              &p_fmt->video.i_frame_rate_base,
+                              p_track, p_sample->i_index, i_chunk );
+        break;
+
+    case AUDIO_ES:
+        if ( p_sample->i_handler != ATOM_soun ||
+             !SetupAudioES( p_demux, p_track, p_sample, p_fmt, p_cfg ) )
+            return VLC_EGENERIC;
+        break;
+
+    case SPU_ES:
+        if ( ( p_sample->i_handler != ATOM_text &&
+               p_sample->i_handler != ATOM_subt &&
+               p_sample->i_handler != ATOM_sbtl &&
+               p_sample->i_handler != ATOM_clcp ) ||
+             !SetupSpuES( p_demux, p_track, p_sample, p_fmt, p_cfg ) )
+           return VLC_EGENERIC;
+        break;
+
+    default:
+        break;
+    }
+
+    return VLC_SUCCESS;
+}
+
 /*
  * TrackCreateES:
  * Create ES and PES to init decoder if needed, for a track starting at i_chunk
@@ -2809,9 +2871,8 @@ static int TrackCreateES( demux_t *p_demux, mp4_track_t *p_track,
         return VLC_EGENERIC;
     }
 
-    MP4_Box_t *p_sample = MP4_BoxGet(  p_track->p_stsd, "[%d]",
-                            i_sample_description_index - 1 );
-
+    const MP4_Box_t *p_sample = MP4_BoxGet( p_track->p_stsd, "[%d]",
+                                            i_sample_description_index - 1 );
     if( !p_sample ||
         ( !p_sample->data.p_payload && p_track->fmt.i_cat != SPU_ES ) )
     {
@@ -2820,75 +2881,54 @@ static int TrackCreateES( demux_t *p_demux, mp4_track_t *p_track,
         return VLC_EGENERIC;
     }
 
-    MP4_Box_t   *p_frma;
-    if( ( p_frma = MP4_BoxGet( p_sample, "sinf/frma" ) ) && p_frma->data.p_frma )
-    {
-        msg_Warn( p_demux, "Original Format Box: %4.4s", (char *)&p_frma->data.p_frma->i_type );
+    track_config_t cfg;
+    TrackConfigInit( &cfg );
+    es_format_t *p_fmt = &p_track->fmt;
 
-        p_sample->i_type = p_frma->data.p_frma->i_type;
-    }
-
-    p_track->p_sample = p_sample;
     p_track->fmt.i_id = p_track->i_track_ID;
 
-    /* */
-    switch( p_track->fmt.i_cat )
+    if( TrackFillConfig( p_demux, p_track, p_sample, i_chunk,
+                         p_fmt, &cfg ) != VLC_SUCCESS )
     {
-    case VIDEO_ES:
-        if ( p_sample->i_handler != ATOM_vide ||
-             !SetupVideoES( p_demux, p_track, p_sample ) )
-            return VLC_EGENERIC;
-
-        /* Set frame rate */
-        TrackGetESSampleRate( p_demux,
-                              &p_track->fmt.video.i_frame_rate,
-                              &p_track->fmt.video.i_frame_rate_base,
-                              p_track, i_sample_description_index, i_chunk );
+        return VLC_EGENERIC;
+    }
 
-        p_sys->f_fps = (float)p_track->fmt.video.i_frame_rate /
-                       (float)p_track->fmt.video.i_frame_rate_base;
+    TrackConfigApply( &cfg, p_track );
 
-        break;
-
-    case AUDIO_ES:
-        if ( p_sample->i_handler != ATOM_soun ||
-             !SetupAudioES( p_demux, p_track, p_sample ) )
-            return VLC_EGENERIC;
-        if( p_sys->p_meta )
-        {
-            audio_replay_gain_t *p_arg = &p_track->fmt.audio_replay_gain;
-            const char *psz_meta = vlc_meta_GetExtra( p_sys->p_meta, "replaygain_track_gain" );
-            if( psz_meta )
-            {
-                double f_gain = us_atof( psz_meta );
-                p_arg->pf_gain[AUDIO_REPLAY_GAIN_TRACK] = f_gain;
-                p_arg->pb_gain[AUDIO_REPLAY_GAIN_TRACK] = f_gain != 0;
-            }
-            psz_meta = vlc_meta_GetExtra( p_sys->p_meta, "replaygain_track_peak" );
-            if( psz_meta )
+    switch( p_fmt->i_cat )
+    {
+        case VIDEO_ES:
+            p_sys->f_fps = (float)p_fmt->video.i_frame_rate /
+                    (float)p_fmt->video.i_frame_rate_base;
+            break;
+        case AUDIO_ES:
+            if( p_sys->p_meta )
             {
-                double f_gain = us_atof( psz_meta );
-                p_arg->pf_peak[AUDIO_REPLAY_GAIN_TRACK] = f_gain;
-                p_arg->pb_peak[AUDIO_REPLAY_GAIN_TRACK] = f_gain > 0;
+                audio_replay_gain_t *p_arg = &p_fmt->audio_replay_gain;
+                const char *psz_meta = vlc_meta_GetExtra( p_sys->p_meta, "replaygain_track_gain" );
+                if( psz_meta )
+                {
+                    double f_gain = us_atof( psz_meta );
+                    p_arg->pf_gain[AUDIO_REPLAY_GAIN_TRACK] = f_gain;
+                    p_arg->pb_gain[AUDIO_REPLAY_GAIN_TRACK] = f_gain != 0;
+                }
+                psz_meta = vlc_meta_GetExtra( p_sys->p_meta, "replaygain_track_peak" );
+                if( psz_meta )
+                {
+                    double f_gain = us_atof( psz_meta );
+                    p_arg->pf_peak[AUDIO_REPLAY_GAIN_TRACK] = f_gain;
+                    p_arg->pb_peak[AUDIO_REPLAY_GAIN_TRACK] = f_gain > 0;
+                }
             }
-        }
-        break;
-
-    case SPU_ES:
-        if ( ( p_sample->i_handler != ATOM_text &&
-               p_sample->i_handler != ATOM_subt &&
-               p_sample->i_handler != ATOM_sbtl &&
-               p_sample->i_handler != ATOM_clcp ) ||
-             !SetupSpuES( p_demux, p_track, p_sample ) )
-           return VLC_EGENERIC;
-        break;
-
-    default:
-        break;
+            break;
+        default:
+            break;
     }
 
+    p_track->p_sample = p_sample;
+
     if( pp_es )
-        *pp_es = MP4_AddTrackES( p_demux->out, p_track );
+        *pp_es = MP4_CreateES( p_demux->out, p_fmt, p_track->b_forced_spu );
 
     return ( !pp_es || *pp_es ) ? VLC_SUCCESS : VLC_EGENERIC;
 }
@@ -3320,7 +3360,6 @@ static void MP4_TrackSetup( demux_t *p_demux, mp4_track_t *p_track,
                 msg_Warn( p_demux, "Malformed track SDP message: %s", sdp_media_type );
                 return;
             }
-            p_track->p_sdp = p_sdp;
             break;
 
         case( ATOM_tx3g ):
diff --git a/modules/demux/mp4/mp4.h b/modules/demux/mp4/mp4.h
index 90a9f70675..1d363b3399 100644
--- a/modules/demux/mp4/mp4.h
+++ b/modules/demux/mp4/mp4.h
@@ -80,6 +80,19 @@ enum
     USEAS_TIMECODE = 1 << 1,
 };
 
+typedef struct
+{
+    uint32_t i_timescale_override;
+    uint32_t i_sample_size_override;
+    const MP4_Box_t *p_asf;
+    uint8_t     rgi_chans_reordering[AOUT_CHAN_MAX];
+    bool        b_chans_reorder;
+
+    bool b_forced_spu; /* forced track selection (never done by default/priority) */
+
+    uint32_t    i_block_flags;
+} track_config_t;
+
  /* Contain all needed information for read all track with vlc */
 typedef struct
 {
@@ -148,14 +161,6 @@ typedef struct
 
     stime_t i_time; // track scaled
 
-    /* rrtp reception hint track */
-    MP4_Box_t *p_sdp;                         /* parsed for codec and other info */
-    RTP_timstamp_synchronization_t sync_mode; /* whether track is already in sync */
-
-    /* First recorded RTP timestamp offset.
-     * Needed for rrtp synchronization */
-    int32_t         i_tsro_offset;
-
     struct
     {
         /* for moof parsing */
@@ -184,10 +189,12 @@ typedef struct
     asf_track_info_t asfinfo;
 } mp4_track_t;
 
-int SetupVideoES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample );
-int SetupAudioES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample );
-int SetupSpuES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample );
-int SetupCCES( demux_t *p_demux, mp4_track_t *p_track, MP4_Box_t *p_sample );
+int SetupVideoES( demux_t *p_demux, const mp4_track_t *p_track,
+                  const MP4_Box_t *p_sample, es_format_t *, track_config_t *);
+int SetupAudioES( demux_t *p_demux, const mp4_track_t *p_track,
+                  const MP4_Box_t *p_sample, es_format_t *, track_config_t * );
+int SetupSpuES( demux_t *p_demux, const mp4_track_t *p_track,
+                const MP4_Box_t *p_sample, es_format_t *, track_config_t * );
 void SetupMeta( vlc_meta_t *p_meta, const MP4_Box_t *p_udta );
 
 /* format of RTP reception hint track sample constructor */



More information about the vlc-commits mailing list