[vlc-commits] asx: convert document to XML

Pierre Lamot git at videolan.org
Sun Dec 10 23:34:01 CET 2017


vlc/vlc-3.0 | branch: master | Pierre Lamot <pierre at videolabs.io> | Wed Dec  6 16:06:55 2017 +0100| [46849d49d5bad782ce1849cde406302dd4e2f802] | committer: Jean-Baptiste Kempf

asx: convert document to XML

ASX isn't xml compliant, esp.:
   - it is case insensitive
   - attributes don't require to be escaped

Really close #14062 (and all the duplicates)

Signed-off-by: Jean-Baptiste Kempf <jb at videolan.org>
(cherry picked from commit 570ccc373402645dfc82b43025c630704621dfb0)
Signed-off-by: Jean-Baptiste Kempf <jb at videolan.org>

> http://git.videolan.org/gitweb.cgi/vlc/vlc-3.0.git/?a=commit;h=46849d49d5bad782ce1849cde406302dd4e2f802
---

 modules/demux/playlist/asx.c | 193 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 183 insertions(+), 10 deletions(-)

diff --git a/modules/demux/playlist/asx.c b/modules/demux/playlist/asx.c
index 6c33b79d8e..8472cb5d70 100644
--- a/modules/demux/playlist/asx.c
+++ b/modules/demux/playlist/asx.c
@@ -37,6 +37,7 @@
 #include <vlc_xml.h>
 #include <vlc_strings.h>
 #include <vlc_charset.h>
+#include <vlc_memstream.h>
 
 #include <assert.h>
 #include <ctype.h>
@@ -352,7 +353,172 @@ end:
     free( psz_description );
 }
 
-static stream_t* UTF8Stream( stream_t *p_demux )
+/// this looks for patterns like &name; &#DEC; or &#xHEX;
+static bool isXmlEncoded(const char* psz_str)
+{
+    //look for special characters
+    if( strpbrk(psz_str, "<>'\"") != NULL )
+        return false;
+
+    bool is_escaped = false;
+    while (psz_str != NULL)
+    {
+        const char* psz_amp = strchr(psz_str, '&');
+        if( psz_amp == NULL )
+            return is_escaped;
+        const char* psz_end = strchr(psz_amp, ';');
+        if(  psz_end == NULL )
+            return false;
+
+        else if(psz_amp[1] == '#')
+        {
+            if( psz_amp[2] == 'x' )
+            {
+                const char* psz_ptr = &psz_amp[3];
+                if( psz_ptr  ==  psz_end )
+                    return false;
+                for (  ; psz_ptr < psz_end; psz_ptr++)
+                    if( ! isxdigit( *psz_ptr ) )
+                        return false;
+            }
+            else
+            {
+                const char* psz_ptr = &(psz_amp[2]);
+                if( psz_ptr ==  psz_end )
+                    return false;
+                for (  ; psz_ptr < psz_end; psz_ptr++)
+                    if( ! isdigit( *psz_ptr ) )
+                        return false;
+            }
+        }
+        else
+        {
+            const char* psz_ptr = &(psz_amp[1]);
+            if( psz_ptr ==  psz_end )
+                return false;
+            for (  ; psz_ptr < psz_end; psz_ptr++)
+                if( ! isalnum( *psz_ptr ) )
+                    return false;
+        }
+        is_escaped = true;
+        psz_str = psz_end;
+    }
+    return is_escaped;
+}
+
+static void memstream_puts_xmlencoded(struct vlc_memstream* p_stream, const char* psz_begin, const char* psz_end)
+{
+    char *psz_tmp = NULL;
+    if(psz_end == NULL)
+        psz_tmp = strdup( psz_begin );
+    else
+        psz_tmp = strndup( psz_begin, psz_end - psz_begin );
+
+    if( isXmlEncoded( psz_tmp ) )
+        vlc_memstream_puts( p_stream, psz_tmp );
+    else
+    {
+        char *psz_tmp_encoded = vlc_xml_encode( psz_tmp );
+        vlc_memstream_puts( p_stream, psz_tmp_encoded );
+        free( psz_tmp_encoded );
+    }
+    free(psz_tmp);
+}
+
+/**
+ * ASX doesn't requires to be a strict XML document, this function will
+ *  - make tags and attributes upercase
+ *  - escape strings when required
+ */
+static char* ASXToXML( char* psz_source )
+{
+    bool b_in_string= false;
+    char *psz_source_cur = psz_source;
+    char *psz_source_old = psz_source;
+    char c_string_delim;
+
+    struct vlc_memstream stream_out;
+    if( vlc_memstream_open( &stream_out ) != 0 )
+        return NULL;
+
+    while ( psz_source_cur != NULL && *psz_source_cur != '\0' )
+    {
+        psz_source_old = psz_source_cur;
+        //search tag start
+        if( ( psz_source_cur = strchr( psz_source_cur, '<' ) ) == NULL )
+        {
+            memstream_puts_xmlencoded(&stream_out, psz_source_old, NULL);
+            //vlc_memstream_puts( &stream_out, psz_source_old );
+            break;
+        }
+
+        memstream_puts_xmlencoded(&stream_out, psz_source_old, psz_source_cur);
+        psz_source_old = psz_source_cur;
+
+        //skip if comment, no need to copy them to the ouput.
+        if( strncmp( psz_source_cur, "<!--", 4 ) == 0 )
+        {
+            psz_source_cur += 4;
+            psz_source_cur =  strstr( psz_source_cur, "-->" );
+            if( psz_source_cur == NULL)
+                break;
+            else
+            {
+                psz_source_cur += 3;
+                continue;
+            }
+        }
+        else
+        {
+            vlc_memstream_putc( &stream_out, '<' );
+            psz_source_cur++;
+        }
+
+        for (  ; *psz_source_cur != '\0'; psz_source_cur++ )
+        {
+            if( b_in_string == false )
+            {
+                if( *psz_source_cur == '>')
+                {
+                    vlc_memstream_putc( &stream_out, '>' );
+                    psz_source_cur++;
+                    break;
+                }
+                if( *psz_source_cur == '"' || *psz_source_cur == '\'' )
+                {
+                    c_string_delim = *psz_source_cur;
+                    b_in_string = true;
+                    vlc_memstream_putc( &stream_out, c_string_delim );
+                }
+                else
+                {
+                    //convert tag and attributes to upper case
+                    vlc_memstream_putc( &stream_out, vlc_ascii_toupper( *psz_source_cur ) );
+                }
+            }
+            else
+            {
+                psz_source_old = psz_source_cur;
+                psz_source_cur = strchr( psz_source_cur, c_string_delim );
+                if( psz_source_cur == NULL )
+                    break;
+
+                memstream_puts_xmlencoded(&stream_out, psz_source_old, psz_source_cur);
+                vlc_memstream_putc( &stream_out, c_string_delim );
+                b_in_string = false;
+            }
+        }
+    }
+    if( vlc_memstream_close( &stream_out ) != 0 )
+    {
+        free( stream_out.ptr );
+        return NULL;
+    }
+
+    return stream_out.ptr;
+}
+
+static stream_t* PreparseStream( stream_t *p_demux )
 {
     stream_t *s = p_demux->p_source;
     uint64_t streamSize;
@@ -379,17 +545,24 @@ static stream_t* UTF8Stream( stream_t *p_demux )
          i_read += i_ret;
      } while ( streamSize > 0 );
      psz_source[i_read] = 0;
-     if ( IsUTF8( psz_source ) )
-         return vlc_stream_MemoryNew( p_demux, (uint8_t*)psz_source, i_read, false );
 
-     char *psz_utf8 = FromLatin1( psz_source );
-     if( psz_utf8 == NULL )
-     {
-         free( psz_source );
+    char* psz_source_xml = ASXToXML( psz_source );
+    free( psz_source );
+    if( psz_source_xml == NULL )
          return NULL;
-     }
+
+    if( IsUTF8( psz_source_xml ) )
+        return vlc_stream_MemoryNew( p_demux, (uint8_t*)psz_source_xml, strlen(psz_source_xml), false );
+
+    char *psz_utf8 = FromLatin1( psz_source_xml );
+    if( psz_utf8 == NULL )
+    {
+        free( psz_source_xml );
+        return NULL;
+    }
+
      stream_t * p_stream = vlc_stream_MemoryNew( p_demux, (uint8_t*)psz_utf8, strlen(psz_utf8), false );
-     free( psz_source );
+     free( psz_source_xml );
      return p_stream;
 }
 
@@ -409,7 +582,7 @@ static int ReadDir( stream_t *p_demux, input_item_node_t *p_subitems )
 
     xml_reader_t *p_xml_reader = NULL;
     input_item_t *p_current_input = GetCurrentItem( p_demux );
-    stream_t* p_stream = UTF8Stream( p_demux );
+    stream_t* p_stream = PreparseStream( p_demux );
 
     bool b_first_node = false;
     int i_type;



More information about the vlc-commits mailing list