[vlc-commits] commit: DVB: resync with TS demux ( Rémi Denis-Courmont )

git at videolan.org git at videolan.org
Tue Oct 26 19:27:49 CEST 2010


vlc | branch: master | Rémi Denis-Courmont <remi at remlab.net> | Tue Oct 26 20:27:40 2010 +0300| [67a62cc7bbb9f0241f14183d89da58f7e0aada41] | committer: Rémi Denis-Courmont 

DVB: resync with TS demux

> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=67a62cc7bbb9f0241f14183d89da58f7e0aada41
---

 modules/access/dvb/en50221.c |   98 +++++++++++++++++++++--------------------
 1 files changed, 50 insertions(+), 48 deletions(-)

diff --git a/modules/access/dvb/en50221.c b/modules/access/dvb/en50221.c
index 6a24fce..8e5bfd9 100644
--- a/modules/access/dvb/en50221.c
+++ b/modules/access/dvb/en50221.c
@@ -2338,107 +2338,109 @@ void en50221_End( access_t * p_access )
      * program. */
 }
 
-static inline void *FixUTF8( char *p )
-{
-    EnsureUTF8( p );
-    return p;
-}
-
+/* FIXME same than EITConvertToUTF8 from TS demux */
 char *dvbsi_to_utf8( const char *psz_instring, size_t i_length )
 {
-    const char *psz_encoding, *psz_stringstart;
-    char *psz_outstring, *psz_tmp;
-    char psz_encbuf[12];
-    size_t i_in, i_out;
-    vlc_iconv_t iconv_handle;
+    const char *psz_encoding;
+    char psz_encbuf[sizeof( "ISO_8859-123" )];
+    size_t offset = 1;
+
     if( i_length < 1 ) return NULL;
-    if( psz_instring[0] < 0 || psz_instring[0] >= 0x20 )
+    if( psz_instring[0] >= 0x20 )
     {
-        psz_stringstart = psz_instring;
-        psz_encoding = "ISO_8859-1"; /* should be ISO6937 according to spec, but this seems to be the one used */
-    } else switch( psz_instring[0] )
+        psz_encoding = "ISO_6937";
+        offset = 0;
+    }
+    else switch( psz_instring[0] )
     {
     case 0x01:
-        psz_stringstart = &psz_instring[1];
         psz_encoding = "ISO_8859-5";
         break;
     case 0x02:
-        psz_stringstart = &psz_instring[1];
         psz_encoding = "ISO_8859-6";
         break;
     case 0x03:
-        psz_stringstart = &psz_instring[1];
         psz_encoding = "ISO_8859-7";
         break;
     case 0x04:
-        psz_stringstart = &psz_instring[1];
         psz_encoding = "ISO_8859-8";
         break;
     case 0x05:
-        psz_stringstart = &psz_instring[1];
         psz_encoding = "ISO_8859-9";
         break;
     case 0x06:
-        psz_stringstart = &psz_instring[1];
         psz_encoding = "ISO_8859-10";
         break;
     case 0x07:
-        psz_stringstart = &psz_instring[1];
         psz_encoding = "ISO_8859-11";
         break;
     case 0x08:
-        psz_stringstart = &psz_instring[1]; /*possibly reserved?*/
         psz_encoding = "ISO_8859-12";
         break;
     case 0x09:
-        psz_stringstart = &psz_instring[1];
         psz_encoding = "ISO_8859-13";
         break;
     case 0x0a:
-        psz_stringstart = &psz_instring[1];
         psz_encoding = "ISO_8859-14";
         break;
     case 0x0b:
-        psz_stringstart = &psz_instring[1];
         psz_encoding = "ISO_8859-15";
         break;
     case 0x10:
-        if( i_length < 3 || psz_instring[1] != '\0' || psz_instring[2] > 0x0f
-            || psz_instring[2] == 0 )
-            return FixUTF8(strndup(psz_instring,i_length));
-        sprintf( psz_encbuf, "ISO_8859-%d", psz_instring[2] );
-        psz_stringstart = &psz_instring[3];
-        psz_encoding = psz_encbuf;
+#warning Is Latin-10 (psz_instring[2] == 16) really illegal?
+        if( i_length < 3 || psz_instring[1] != 0x00 || psz_instring[2] > 15
+         || psz_instring[2] == 0 )
+        {
+            psz_encoding = "UTF-8";
+            offset = 0;
+        }
+        else
+        {
+            sprintf( psz_encbuf, "ISO_8859-%u", psz_instring[2] );
+            psz_encoding = psz_encbuf;
+            offset = 3;
+        }
         break;
     case 0x11:
-        psz_stringstart = &psz_instring[1];
+#warning Is there a BOM or do we use a fixed endianess?
         psz_encoding = "UTF-16";
         break;
     case 0x12:
-        psz_stringstart = &psz_instring[1];
         psz_encoding = "KSC5601-1987";
         break;
     case 0x13:
-        psz_stringstart = &psz_instring[1];
-        psz_encoding = "GB2312";/*GB-2312-1980 */
+        psz_encoding = "GB2312"; /* GB-2312-1980 */
         break;
     case 0x14:
-        psz_stringstart = &psz_instring[1];
         psz_encoding = "BIG-5";
         break;
     case 0x15:
-        return FixUTF8(strndup(&psz_instring[1],i_length-1));
+        psz_encoding = "UTF-8";
         break;
     default:
         /* invalid */
-        return FixUTF8(strndup(psz_instring,i_length));
-    }
-    iconv_handle = vlc_iconv_open( "UTF-8", psz_encoding );
-    i_in = i_length - (psz_stringstart - psz_instring );
-    i_out = i_in * 6;
-    psz_outstring = psz_tmp = (char*)xmalloc( i_out + 1 );
-    vlc_iconv( iconv_handle, &psz_stringstart, &i_in, &psz_tmp, &i_out );
-    vlc_iconv_close( iconv_handle );
-    *psz_tmp = '\0';
-    return psz_outstring;
+        psz_encoding = "UTF-8";
+        offset = 0;
+    }
+
+    psz_instring += offset;
+    i_length -= offset;
+
+    char *psz = FromCharset( psz_encoding, psz_instring, i_length );
+    if( psz == NULL )
+    {    /* Invalid character set (e.g. ISO_8859-12) */
+        psz = strndup( (const char *)psz_instring, i_length );
+        if( unlikely(psz == NULL) )
+            return NULL;
+        EnsureUTF8( psz );
+    }
+
+    /* Convert EIT-coded CR/LFs */
+    for(char *p = strstr( psz, "\xc2\x8a" ); p != NULL;
+        p = strstr( p, "\xc2\x8a" ))
+    {
+        p[0] = ' ';
+        p[1] = '\n';
+    }
+    return psz;
 }



More information about the vlc-commits mailing list