[vlc-devel] commit: Zip: fix handling of special characters (#2467) ( Jean-Philippe Andre )

git version control git at videolan.org
Tue May 19 18:16:53 CEST 2009


vlc | branch: 1.0-bugfix | Jean-Philippe Andre <jpeg at via.ecp.fr> | Wed May 13 01:56:17 2009 +0800| [2c113555e9fe760a4cdd27f67f82f8029bd6ce24] | committer: Jean-Philippe Andre 

Zip: fix handling of special characters (#2467)

> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=2c113555e9fe760a4cdd27f67f82f8029bd6ce24
---

 modules/access/zip/zip.h       |    3 +
 modules/access/zip/zipaccess.c |   66 +++++++++++++++++++++++-----
 modules/access/zip/zipstream.c |   94 ++++++++++++++++++++++++++++++++++-----
 3 files changed, 140 insertions(+), 23 deletions(-)

diff --git a/modules/access/zip/zip.h b/modules/access/zip/zip.h
index c78a6ca..d52f8d5 100644
--- a/modules/access/zip/zip.h
+++ b/modules/access/zip/zip.h
@@ -58,6 +58,9 @@ void StreamClose( vlc_object_t* );
 int AccessOpen( vlc_object_t *p_this );
 void AccessClose( vlc_object_t *p_this );
 
+/** Common function */
+bool isAllowedChar( char c );
+
 /** **************************************************************************
  * zipIO function headers : how to use vlc_stream to read the zip
  * Note: static because the implementations differ
diff --git a/modules/access/zip/zipaccess.c b/modules/access/zip/zipaccess.c
index 3689e12..1c5d678 100644
--- a/modules/access/zip/zipaccess.c
+++ b/modules/access/zip/zipaccess.c
@@ -22,7 +22,7 @@
  *****************************************************************************/
 
 /** @todo:
- * - implement crypto (using url zip://user:password@path-to-archive#ZIP#file
+ * - implement crypto (using url zip://user:password@path-to-archive|file
  * - read files in zip with long name (use unz_file_info.size_filename)
  * - multi-volume archive support ?
  */
@@ -31,8 +31,6 @@
 # include "config.h"
 #endif
 
-#ifdef HAVE_ZLIB_H
-
 #include "zip.h"
 #include <vlc_access.h>
 
@@ -53,6 +51,47 @@ static int AccessControl( access_t *p_access, int i_query, va_list args );
 static ssize_t AccessRead( access_t *, uint8_t *, size_t );
 static int AccessSeek( access_t *, int64_t );
 static int OpenFileInZip( access_t *p_access, int i_pos );
+static char *unescapeXml( const char *psz_text );
+
+/** **************************************************************************
+ * \brief Unescape valid XML string
+ * The exact reverse of escapeToXml (zipstream.c)
+ *****************************************************************************/
+static char *unescapeXml( const char *psz_text )
+{
+    char *psz_ret = malloc( strlen( psz_text ) + 1 );
+    if( !psz_ret ) return NULL;
+
+    char *psz_tmp = psz_ret;
+    for( char *psz_iter = (char*) psz_text; *psz_iter; ++psz_iter, ++psz_tmp )
+    {
+        if( *psz_iter == '?' )
+        {
+            int i_value;
+            if( !sscanf( ++psz_iter, "%02x", &i_value ) )
+            {
+                /* Invalid number: URL incorrectly encoded */
+                free( psz_ret );
+                return NULL;
+            }
+            *psz_tmp = (char) i_value;
+            psz_iter++;
+        }
+        else if( isAllowedChar( *psz_iter ) )
+        {
+            *psz_tmp = *psz_iter;
+        }
+        else
+        {
+            /* Invalid character encoding for the URL */
+            free( psz_ret );
+            return NULL;
+        }
+    }
+    *psz_tmp = '\0';
+
+    return psz_ret;
+}
 
 /** **************************************************************************
  * \brief Open access
@@ -78,8 +117,19 @@ int AccessOpen( vlc_object_t *p_this )
         return VLC_EGENERIC;
 
     *psz_sep = '\0';
-    psz_pathToZip = unescape_URI_duplicate( psz_path );
-    p_sys->psz_fileInzip = strdup( psz_sep + 1 );
+    psz_pathToZip = unescapeXml( psz_path );
+    if( !psz_pathToZip )
+    {
+        /* Maybe this was not an encoded string */
+        msg_Dbg( p_access, "this is not an encoded url. Trying file '%s'",
+                 psz_path );
+        psz_pathToZip = strdup( psz_path );
+    }
+    p_sys->psz_fileInzip = unescapeXml( psz_sep + 1 );
+    if( !p_sys->psz_fileInzip )
+    {
+        p_sys->psz_fileInzip = strdup( psz_sep + 1 );
+    }
 
     /* Define IO functions */
     zlib_filefunc_def *p_func = (zlib_filefunc_def*)
@@ -409,9 +459,3 @@ static int ZCALLBACK ZipIO_Error( void* opaque, void* stream )
     //msg_Dbg( p_access, "error" );
     return 0;
 }
-
-
-
-#else
-# error Can not compile zip demuxer without zlib support
-#endif
diff --git a/modules/access/zip/zipstream.c b/modules/access/zip/zipstream.c
index c29d11f..0250312 100644
--- a/modules/access/zip/zipstream.c
+++ b/modules/access/zip/zipstream.c
@@ -360,8 +360,6 @@ static int CreatePlaylist( stream_t *s, char **pp_buffer )
         goto exit;
     }
 
-    // msg_Dbg( s, "%d files in Zip", vlc_array_count( p_filenames ) );
-
     /* Close archive */
     unzClose( file );
     s->p_sys->zipFile = NULL;
@@ -470,6 +468,81 @@ static int GetFilesInZip( stream_t *p_this, unzFile file,
  *****************************************************************************/
 
 /** **************************************************************************
+ * \brief Check a character for allowance in the Xml.
+ * Allowed chars are: a-z, A-Z, 0-9, \, /, ., ' ', _ and :
+ *****************************************************************************/
+bool isAllowedChar( char c )
+{
+    return ( c >= 'a' && c <= 'z' )
+           || ( c >= 'A' && c <= 'Z' )
+           || ( c >= '0' && c <= '9' )
+           || ( c == ':' ) || ( c == '/' )
+           || ( c == '\\' ) || ( c == '.' )
+           || ( c == ' ' ) || ( c == '_' );
+}
+
+/** **************************************************************************
+ * \brief Escape string to be XML valid
+ * Allowed chars are defined by the above function isAllowedChar()
+ * Invalid chars are escaped using non standard '?XX' notation.
+ * NOTE: We cannot trust VLC internal Web encoding functions
+ *       because they are not able to encode and decode some rare utf-8
+ *       characters properly. Also, we don't control exactly when they are
+ *       called (from this module).
+ *****************************************************************************/
+static int escapeToXml( char **ppsz_encoded, const char *psz_url )
+{
+    char *psz_iter, *psz_tmp;
+
+    /* Count number of unallowed characters in psz_url */
+    size_t i_num = 0, i_len = 0;
+    for( psz_iter = (char*) psz_url; *psz_iter; ++psz_iter )
+    {
+        if( isAllowedChar( *psz_iter ) )
+        {
+            i_len++;
+        }
+        else
+        {
+            i_len++;
+            i_num++;
+        }
+    }
+
+    /* Special case */
+    if( i_num == 0 )
+    {
+        *ppsz_encoded = malloc( i_len + 1 );
+        memcpy( *ppsz_encoded, psz_url, i_len + 1 );
+        return VLC_SUCCESS;
+    }
+
+    /* Copy string, replacing invalid characters */
+    char *psz_ret = malloc( i_len + 3*i_num + 2 );
+    if( !psz_ret ) return VLC_ENOMEM;
+
+    for( psz_iter = (char*) psz_url, psz_tmp = psz_ret;
+         *psz_iter; ++psz_iter, ++psz_tmp )
+    {
+        if( isAllowedChar( *psz_iter ) )
+        {
+            *psz_tmp = *psz_iter;
+        }
+        else
+        {
+            *(psz_tmp++) = '?';
+            snprintf( psz_tmp, 3, "%02x", ( *psz_iter & 0x000000FF ) );
+            psz_tmp++;
+        }
+    }
+    *psz_tmp = '\0';
+
+    /* Return success */
+    *ppsz_encoded = psz_ret;
+    return VLC_SUCCESS;
+}
+
+/** **************************************************************************
  * \brief Write the XSPF playlist given the list of files
  *****************************************************************************/
 static int WriteXSPF( char **pp_buffer, vlc_array_t *p_filenames,
@@ -488,9 +561,10 @@ static int WriteXSPF( char **pp_buffer, vlc_array_t *p_filenames,
     /* Root node */
     node *playlist = new_node( psz_zip );
 
-    /* Web-Encode the URI and append '!' */
-    char *psz_pathtozip = vlc_UrlEncode( psz_zippath );
-    if( astrcatf( &psz_pathtozip, ZIP_SEP ) < 0 ) return -1;
+    /* Encode the URI and append ZIP_SEP */
+    char *psz_pathtozip;
+    escapeToXml( &psz_pathtozip, psz_zippath );
+    if( astrcatf( &psz_pathtozip, "%s", ZIP_SEP ) < 0 ) return -1;
 
     int i_track = 0;
     for( int i = 0; i < vlc_array_count( p_filenames ); ++i )
@@ -514,12 +588,9 @@ static int WriteXSPF( char **pp_buffer, vlc_array_t *p_filenames,
 
             /* Build full MRL */
             char *psz_path = strdup( psz_pathtozip );
-            if( astrcatf( &psz_path, psz_name ) < 0 ) return -1;
-
-            /* Double url-encode */
-            char *psz_tmp = psz_path;
-            psz_path = vlc_UrlEncode( psz_tmp );
-            free( psz_tmp );
+            char *psz_escapedName;
+            escapeToXml( &psz_escapedName, psz_name );
+            if( astrcatf( &psz_path, "%s", psz_escapedName ) < 0 ) return -1;
 
             /* Track information */
             if( astrcatf( pp_buffer,
@@ -771,7 +842,6 @@ static int ZCALLBACK ZipIO_Error( void* opaque, void* stream )
 {
     (void)opaque;
     (void)stream;
-    //msg_Dbg( p_access, "error" );
     return 0;
 }
 




More information about the vlc-devel mailing list