[vlc-commits] subsdec: Fix multiple html parsing issues
Hugo Beauzée-Luyssen
git at videolan.org
Tue Jul 28 16:01:59 CEST 2015
vlc | branch: master | Hugo Beauzée-Luyssen <hugo at beauzee.fr> | Tue Jul 21 16:49:46 2015 +0200| [b65a9f3666d2bb317aecfd29de8c544f581d8802] | committer: Jean-Baptiste Kempf
subsdec: Fix multiple html parsing issues
> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=b65a9f3666d2bb317aecfd29de8c544f581d8802
---
modules/codec/subsdec.c | 46 +++++++++++++++++++++++++++++++++++++++-------
1 file changed, 39 insertions(+), 7 deletions(-)
diff --git a/modules/codec/subsdec.c b/modules/codec/subsdec.c
index 0465d29..73389ab 100644
--- a/modules/codec/subsdec.c
+++ b/modules/codec/subsdec.c
@@ -648,7 +648,7 @@ static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
static bool AppendCharacter( text_segment_t* p_segment, char c )
{
char* tmp;
- if ( asprintf( &tmp, "%s%c", p_segment->psz_text, c ) < 0 )
+ if ( asprintf( &tmp, "%s%c", p_segment->psz_text ? p_segment->psz_text : "", c ) < 0 )
return false;
free( p_segment->psz_text );
p_segment->psz_text = tmp;
@@ -671,7 +671,7 @@ static char* ConsumeAttribute( const char** ppsz_subtitle, char** psz_attribute_
psz_subtitle++;
attr_len++;
}
- if ( !*psz_subtitle )
+ if ( !*psz_subtitle || attr_len == 0 )
return NULL;
psz_attribute_name = malloc( attr_len + 1 );
if ( unlikely( !psz_attribute_name ) )
@@ -682,13 +682,19 @@ static char* ConsumeAttribute( const char** ppsz_subtitle, char** psz_attribute_
// Skip over to the attribute value
while ( *psz_subtitle && *psz_subtitle != '=' )
psz_subtitle++;
+ // Skip the '=' sign
+ psz_subtitle++;
// Aknoledge the delimiter if any
while ( *psz_subtitle && isspace( *psz_subtitle) )
psz_subtitle++;
if ( *psz_subtitle == '\'' || *psz_subtitle == '"' )
+ {
+ // Save the delimiter and skip it
delimiter = *psz_subtitle;
+ psz_subtitle++;
+ }
else
delimiter = 0;
@@ -698,24 +704,42 @@ static char* ConsumeAttribute( const char** ppsz_subtitle, char** psz_attribute_
attr_len = 0;
while ( *psz_subtitle && ( ( delimiter != 0 && *psz_subtitle != delimiter ) ||
- ( delimiter == 0 && !isalpha( *psz_subtitle ) ) ) )
+ ( delimiter == 0 && ( isalnum( *psz_subtitle ) || *psz_subtitle == '#' ) ) ) )
{
psz_subtitle++;
attr_len++;
}
- if ( !*psz_subtitle || unlikely( !( *psz_attribute_value = malloc( attr_len + 1 ) ) ) )
+ if ( unlikely( !( *psz_attribute_value = malloc( attr_len + 1 ) ) ) )
{
free( psz_attribute_name );
return NULL;
}
strncpy( *psz_attribute_value, psz_subtitle - attr_len, attr_len );
(*psz_attribute_value)[attr_len] = 0;
+ // Finally, skip over the final delimiter
+ if (delimiter != 0 && *psz_subtitle)
+ psz_subtitle++;
*ppsz_subtitle = psz_subtitle;
return psz_attribute_name;
}
static int GetColor( const char* psz_color )
{
+ if ( *psz_color == '#' )
+ return strtol( psz_color + 1, NULL, 16 );
+ // Check if the string can be converted as an hex number
+ bool ok = true;
+ for (int i = 0; psz_color[i]; ++i )
+ {
+ if ( !isxdigit( psz_color[i] ) )
+ {
+ ok = false;
+ break;
+ }
+ }
+ if ( ok )
+ return strtol( psz_color, NULL, 16 );
+
for( int i = 0; p_html_colors[i].psz_name != NULL; i++ )
{
if( !strcasecmp( psz_color, p_html_colors[i].psz_name ) )
@@ -757,6 +781,8 @@ static text_style_t* DuplicateAndPushStyle(style_stack_t** pp_stack)
static void PopStyle(style_stack_t** pp_stack)
{
style_stack_t* p_old = *pp_stack;
+ if ( !p_old )
+ return;
*pp_stack = p_old->p_next;
// Don't free the style, it is now owned by the text_segment_t
free( p_old );
@@ -778,9 +804,9 @@ static text_segment_t* NewTextSegmentPopStyle( text_segment_t* p_segment, style_
text_segment_t* p_new = text_segment_New( NULL );
if ( unlikely( p_new == NULL ) )
return NULL;
- PopStyle( pp_stack );
// We shouldn't have an empty stack since this happens when closing a tag,
// but better be safe than sorry if (/when) we encounter a broken subtitle file.
+ PopStyle( pp_stack );
text_style_t* p_dup = *pp_stack ? text_style_Duplicate( (*pp_stack)->p_style ) : text_style_New();
p_new->style = p_dup;
p_segment->p_next = p_new;
@@ -899,10 +925,13 @@ static text_segment_t* ParseSubtitles( int *pi_align, const char *psz_subtitle )
// Skip potential spaces & end tag
while ( *psz_subtitle && *psz_subtitle != '>' )
psz_subtitle++;
+ if ( *psz_subtitle == '>' )
+ psz_subtitle++;
}
else if( !strncmp( psz_subtitle, "</", 2 ))
{
size_t tag_length = 0;
+ psz_subtitle += 2;
const char* p_old_pos = psz_subtitle;
while ( *psz_subtitle && *psz_subtitle != '>' )
{
@@ -918,13 +947,16 @@ static text_segment_t* ParseSubtitles( int *pi_align, const char *psz_subtitle )
// A closing tag for one of the tags we handle, meaning
// we pushed a style onto the stack earlier
p_segment = NewTextSegmentPopStyle( p_segment, &p_stack );
+ // Also skip the '>'
+ psz_subtitle++;
}
else
{
- // Unknown closing tag, just append the '<', and go on.
+ // Unknown closing tag, just append the "</", and go on.
// This will make the unknown tag appear as text
AppendCharacter( p_segment, '<' );
- psz_subtitle = p_old_pos + 1;
+ AppendCharacter( p_segment, '/' );
+ psz_subtitle = p_old_pos + 2;
}
}
else
More information about the vlc-commits
mailing list