[vlc-commits] demux: subtitle: workaround multibytes regression
Francois Cartegnie
git at videolan.org
Sat Jul 22 01:44:30 CEST 2017
vlc | branch: master | Francois Cartegnie <fcvlcdev at free.fr> | Fri Jul 21 17:04:22 2017 +0200| [f9eac7933383323fcfe53051fa8c36b5fe8b89ee] | committer: Francois Cartegnie
demux: subtitle: workaround multibytes regression
As long as readline can't peek
> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=f9eac7933383323fcfe53051fa8c36b5fe8b89ee
---
modules/demux/Makefile.am | 2 +-
modules/demux/subtitle.c | 101 ++++++++++++++++++++++++++++++++++++++--------
2 files changed, 86 insertions(+), 17 deletions(-)
diff --git a/modules/demux/Makefile.am b/modules/demux/Makefile.am
index b267c7d20d..b2e9443c3f 100644
--- a/modules/demux/Makefile.am
+++ b/modules/demux/Makefile.am
@@ -56,7 +56,7 @@ demux_LTLIBRARIES += libaiff_plugin.la
libmjpeg_plugin_la_SOURCES = demux/mjpeg.c demux/mxpeg_helper.h
demux_LTLIBRARIES += libmjpeg_plugin.la
-libsubtitle_plugin_la_SOURCES = demux/subtitle.c demux/subtitle_helper.h
+libsubtitle_plugin_la_SOURCES = demux/subtitle.c
libsubtitle_plugin_la_LIBADD = $(LIBM)
demux_LTLIBRARIES += libsubtitle_plugin.la
diff --git a/modules/demux/subtitle.c b/modules/demux/subtitle.c
index 3a4137d852..ebba2602d6 100644
--- a/modules/demux/subtitle.c
+++ b/modules/demux/subtitle.c
@@ -43,8 +43,6 @@
#include <vlc_demux.h>
#include <vlc_charset.h>
-#include "subtitle_helper.h"
-
/*****************************************************************************
* Module descriptor
*****************************************************************************/
@@ -318,19 +316,88 @@ static int Open ( vlc_object_t *p_this )
#ifndef NDEBUG
const uint64_t i_start_pos = vlc_stream_Tell( p_demux->s );
#endif
- uint64_t i_read_offset = 0;
+ size_t i_peek;
+ const uint8_t *p_peek;
+ if( vlc_stream_Peek( p_demux->s, &p_peek, 16 ) < 16 )
+ return VLC_EGENERIC;
+
+ enum
+ {
+ UTF8BOM,
+ UTF16LE,
+ UTF16BE,
+ NOBOM,
+ } e_bom = NOBOM;
+ const char *psz_bom = NULL;
+
+ i_peek = 4096;
/* Detect Unicode while skipping the UTF-8 Byte Order Mark */
- bool unicode = false;
- const uint8_t *p_data;
- if( vlc_stream_Peek( p_demux->s, &p_data, 3 ) >= 3
- && !memcmp( p_data, "\xEF\xBB\xBF", 3 ) )
- {
- unicode = true;
- i_read_offset = 3; /* skip BOM */
- msg_Dbg( p_demux, "detected Unicode Byte Order Mark" );
+ if( !memcmp( p_peek, "\xEF\xBB\xBF", 3 ) )
+ {
+ e_bom = UTF8BOM;
+ psz_bom = "UTF-8";
+ }
+ else if( !memcmp( p_peek, "\xFF\xFE", 2 ) )
+ {
+ e_bom = UTF16LE;
+ psz_bom = "UTF-16LE";
+ i_peek *= 2;
+ }
+ else if( !memcmp( p_peek, "\xFE\xFF", 2 ) )
+ {
+ e_bom = UTF16BE;
+ psz_bom = "UTF-16BE";
+ i_peek *= 2;
}
+ if( e_bom != NOBOM )
+ msg_Dbg( p_demux, "detected %s Byte Order Mark", psz_bom );
+
+ i_peek = vlc_stream_Peek( p_demux->s, &p_peek, i_peek );
+ if( unlikely(i_peek < 16) )
+ return VLC_EGENERIC;
+
+ stream_t *p_probestream = NULL;
+ if( e_bom != UTF8BOM && e_bom != NOBOM )
+ {
+ if( i_peek > 16 )
+ {
+ vlc_iconv_t handle = vlc_iconv_open( "UTF-8", psz_bom );
+ if( handle )
+ {
+ char *p_outbuf = malloc( i_peek );
+ if( p_outbuf )
+ {
+ const char *p_inbuf = (const char *) p_peek;
+ char *psz_converted = p_outbuf;
+ const size_t i_outbuf_size = i_peek;
+ size_t i_inbuf_remain = i_peek;
+ size_t i_outbuf_remain = i_peek;
+ if ( VLC_ICONV_ERR != vlc_iconv( handle,
+ &p_inbuf, &i_inbuf_remain,
+ &p_outbuf, &i_outbuf_remain ) )
+ {
+ p_probestream = vlc_stream_MemoryNew( p_demux, (uint8_t *) psz_converted,
+ i_outbuf_size - i_outbuf_remain,
+ false ); /* free p_outbuf on release */
+ }
+ else free( p_outbuf );
+ }
+ vlc_iconv_close( handle );
+ }
+ }
+ }
+ else
+ {
+ const size_t i_skip = (e_bom == UTF8BOM) ? 3 : 0;
+ p_probestream = vlc_stream_MemoryNew( p_demux, (uint8_t *) &p_peek[i_skip],
+ i_peek - i_skip, true );
+ }
+
+ if( p_probestream == NULL )
+ return VLC_EGENERIC;
+
/* Probe if unknown type */
if( p_sys->props.i_type == SUB_TYPE_UNKNOWN )
{
@@ -343,7 +410,7 @@ static int Open ( vlc_object_t *p_this )
int i_dummy;
char p_dummy;
- if( (s = peek_Readline( p_demux->s, &i_read_offset )) == NULL )
+ if( (s = vlc_stream_ReadLine( p_probestream ) ) == NULL )
break;
if( strcasestr( s, "<SAMI>" ) )
@@ -497,6 +564,8 @@ static int Open ( vlc_object_t *p_this )
free( s );
}
+ vlc_stream_Delete( p_probestream );
+
/* Quit on unknown subtitles */
if( p_sys->props.i_type == SUB_TYPE_UNKNOWN )
{
@@ -522,8 +591,8 @@ static int Open ( vlc_object_t *p_this )
msg_Dbg( p_demux, "loading all subtitles..." );
- if( unicode && /* skip BOM */
- vlc_stream_Seek( p_demux->s, 3 ) != VLC_SUCCESS )
+ if( e_bom == UTF8BOM && /* skip BOM */
+ vlc_stream_Read( p_demux->s, NULL, 3 ) != 3 )
{
Close( p_this );
return VLC_EGENERIC;
@@ -588,8 +657,8 @@ static int Open ( vlc_object_t *p_this )
p_demux->psz_location );
}
- if( unicode )
- fmt.subs.psz_encoding = strdup( "UTF-8" );
+ if( psz_bom )
+ fmt.subs.psz_encoding = strdup( psz_bom );
char *psz_description = var_InheritString( p_demux, "sub-description" );
if( psz_description && *psz_description )
fmt.psz_description = psz_description;
More information about the vlc-commits
mailing list