[vlc-devel] Re: [patch] "xtag" xml parser improvement
Derk-Jan Hartman
d.hartman at student.utwente.nl
Tue Jan 24 20:05:09 CET 2006
Do you happen to know XML where we can test this against ?
DJ
On 24-jan-2006, at 19:27, Daniel Stränger wrote:
> hi all!
> i've made some enhancements to the xtag parser.
> it now ignores
> - commments <!-- ... -->
> - processing instructions <? ... ?>
> - doctype declarations <!DOCTYPE ... >
> and can handle
> - CDATA element content <![CDATA[ ... ]]>
>
> cheers, daniel
>
> Index: modules/misc/xml/xtag.c
> ===================================================================
> --- modules/misc/xml/xtag.c (Revision 14010)
> +++ modules/misc/xml/xtag.c (Arbeitskopie)
> @@ -165,19 +165,28 @@
> static xml_reader_t *ReaderCreate( xml_t *p_xml, stream_t *s )
> {
> xml_reader_t *p_reader;
> - char *p_buffer;
> + char *p_buffer, *p_new;
> int i_size, i_pos = 0, i_buffer = 2048;
> XTag *p_root;
>
> /* Open and read file */
> p_buffer = malloc( i_buffer );
> - if( p_buffer == NULL ) return NULL;
> + if( p_buffer == NULL ) {
> + msg_Err( p_xml, "ENOMEM: alloc buffer" );
> + return NULL;
> + }
>
> while( ( i_size = stream_Read( s, &p_buffer[i_pos], 2048 ) )
> == 2048 )
> {
> i_pos += i_size;
> i_buffer += i_size;
> - p_buffer = realloc( p_buffer, i_buffer );
> + p_new = realloc( p_buffer, i_buffer );
> + if (!p_new) {
> + msg_Err( p_xml, "ENOMEM: realloc buffer" );
> + free( p_buffer );
> + return NULL;
> + }
> + p_buffer = p_new;
> }
> p_buffer[ i_pos + i_size ] = 0; /* 0 terminated string */
>
> @@ -367,6 +376,7 @@
> #define X_SLASH 1<<6
> #define X_QMARK 1<<7
> #define X_DASH 1<<8
> +#define X_EMARK 1<<9
>
> static int xtag_cin( char c, int char_class )
> {
> @@ -377,8 +387,9 @@
> if( char_class & X_SQUOTE ) if( c == '\'' ) return VLC_TRUE;
> if( char_class & X_EQUAL ) if( c == '=' ) return VLC_TRUE;
> if( char_class & X_SLASH ) if( c == '/' ) return VLC_TRUE;
> - if( char_class & X_QMARK ) if( c == '!' ) return VLC_TRUE;
> + if( char_class & X_QMARK ) if( c == '?' ) return VLC_TRUE;
> if( char_class & X_DASH ) if( c == '-' ) return VLC_TRUE;
> + if( char_class & X_EMARK ) if( c == '!' ) return VLC_TRUE;
>
> return VLC_FALSE;
> }
> @@ -548,44 +559,21 @@
> char *name;
> char *pcdata;
> char *s;
> + int xi;
>
> if( !parser->valid ) return NULL;
>
> -#if 0 /* Do we really want all the whitespace pcdata ? */
> - xtag_skip_whitespace( parser );
> -#endif
> -
> - if( (pcdata = xtag_slurp_to( parser, X_OPENTAG, X_NONE )) !=
> NULL )
> - {
> - tag = malloc( sizeof(*tag) );
> - tag->name = NULL;
> - tag->pcdata = pcdata;
> - tag->parent = parser->current_tag;
> - tag->attributes = NULL;
> - tag->children = NULL;
> - tag->current_child = NULL;
> -
> - return tag;
> - }
> -
> s = parser->start;
>
> - /* if this starts a close tag, return NULL and let the parent
> take it */
> - if( xtag_cin( s[0], X_OPENTAG ) && xtag_cin( s[1], X_SLASH ) )
> - return NULL;
> -
> /* if this starts a comment tag, skip until end */
> - if( xtag_cin( s[0], X_OPENTAG ) && xtag_cin( s[1], X_QMARK ) &&
> + if( (parser->end - parser->start) > 7 &&
> + xtag_cin( s[0], X_OPENTAG ) && xtag_cin( s[1], X_EMARK ) &&
> xtag_cin( s[2], X_DASH ) && xtag_cin( s[3], X_DASH ) )
> {
> - int xi;
> -
> parser->start = s = &s[4];
> -
> while( (xi = xtag_index( parser, X_DASH )) >= 0 )
> {
> parser->start = s = &s[xi+1];
> -
> if( xtag_cin( s[0], X_DASH ) && xtag_cin( s[1],
> X_CLOSETAG ) )
> {
> parser->start = &s[2];
> @@ -593,21 +581,81 @@
> return xtag_parse_tag( parser );
> }
> }
> + return NULL;
> + }
>
> + /* ignore processing instructions '<?' ... '?>' */
> + if( (parser->end - parser->start) > 4 &&
> + xtag_cin( s[0], X_OPENTAG ) && xtag_cin( s[1], X_QMARK ) )
> + {
> + parser->start = s = &s[2];
> + while ((xi = xtag_index( parser, X_QMARK )) >= 0) {
> + if (xtag_cin( s[xi+1], X_CLOSETAG )) {
> + parser->start = &s[xi+2];
> + xtag_skip_whitespace( parser );
> + return xtag_parse_tag( parser );
> + }
> + }
> return NULL;
> }
>
> - /* FIXME: if this starts a DOCTYPE tag, skip until end */
> - if( xtag_cin( s[0], X_OPENTAG ) && xtag_cin( s[1], X_QMARK ) )
> + /* ignore doctype '<!DOCTYPE' ... '>' */
> + if ( (parser->end - parser->start) > 8 &&
> + !strncmp( s, "<!DOCTYPE", 9 ) ) {
> + xi = xtag_index( parser, X_CLOSETAG );
> + if ( xi > 0 ) {
> + parser->start = s = &s[xi+1];
> + xtag_skip_whitespace( parser );
> + return xtag_parse_tag( parser );
> + }
> + else {
> + return NULL;
> + }
> + }
> +
> + if( (pcdata = xtag_slurp_to( parser, X_OPENTAG, X_NONE )) !=
> NULL )
> {
> - int xi = xtag_index( parser, X_CLOSETAG );
> - if( xi <= 0 ) return NULL;
> + tag = malloc( sizeof(*tag) );
> + tag->name = NULL;
> + tag->pcdata = pcdata;
> + tag->parent = parser->current_tag;
> + tag->attributes = NULL;
> + tag->children = NULL;
> + tag->current_child = NULL;
>
> - parser->start = &s[xi+1];
> - xtag_skip_whitespace( parser );
> - return xtag_parse_tag( parser );
> + return tag;
> }
>
> + /* if this starts a close tag, return NULL and let the parent
> take it */
> + if( xtag_cin( s[0], X_OPENTAG ) && xtag_cin( s[1], X_SLASH ) )
> + return NULL;
> +
> + /* parse CDATA content */
> + if ( (parser->end - parser->start) > 8 &&
> + !strncmp( s, "<![CDATA[", 9 ) ) {
> + parser->start = s = &s[9];
> + while (parser->end - s > 2) {
> + if (strncmp( s, "]]>", 3 ) == 0) {
> + if ( !(tag = malloc( sizeof(*tag))) ) return NULL;
> + if ( !(pcdata = malloc( sizeof(char)*(s - parser-
> >start + 1))) ) return NULL;
> + strncpy( pcdata, parser->start, s - parser->start );
> + pcdata[s - parser->start]='\0';
> + parser->start = s = &s[3];
> + tag->name = NULL;
> + tag->pcdata = pcdata;
> + tag->parent = parser->current_tag;
> + tag->attributes = NULL;
> + tag->children = NULL;
> + tag->current_child = NULL;
> + return tag;
> + }
> + else {
> + s++;
> + }
> + }
> + return NULL;
> + }
> +
> if( !xtag_assert_and_pass( parser, X_OPENTAG ) ) return NULL;
>
> name = xtag_slurp_to( parser, X_WHITESPACE|X_SLASH|X_CLOSETAG,
> X_NONE );
> @@ -670,12 +718,13 @@
>
> xtag_skip_whitespace( parser );
> xtag_assert_and_pass( parser, X_CLOSETAG );
> -
> + xtag_skip_whitespace( parser );
> }
> else
> {
> xtag_assert_and_pass( parser, X_SLASH );
> xtag_assert_and_pass( parser, X_CLOSETAG );
> + xtag_skip_whitespace( parser );
> }
>
> return tag;
--
This is the vlc-devel mailing-list, see http://www.videolan.org/vlc/
To unsubscribe, please read http://developers.videolan.org/lists.html
More information about the vlc-devel
mailing list