[vlc-commits] commit: DVB: resync with TS demux ( Rémi Denis-Courmont )
git at videolan.org
git at videolan.org
Tue Oct 26 19:27:49 CEST 2010
vlc | branch: master | Rémi Denis-Courmont <remi at remlab.net> | Tue Oct 26 20:27:40 2010 +0300| [67a62cc7bbb9f0241f14183d89da58f7e0aada41] | committer: Rémi Denis-Courmont
DVB: resync with TS demux
> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=67a62cc7bbb9f0241f14183d89da58f7e0aada41
---
modules/access/dvb/en50221.c | 98 +++++++++++++++++++++--------------------
1 files changed, 50 insertions(+), 48 deletions(-)
diff --git a/modules/access/dvb/en50221.c b/modules/access/dvb/en50221.c
index 6a24fce..8e5bfd9 100644
--- a/modules/access/dvb/en50221.c
+++ b/modules/access/dvb/en50221.c
@@ -2338,107 +2338,109 @@ void en50221_End( access_t * p_access )
* program. */
}
-static inline void *FixUTF8( char *p )
-{
- EnsureUTF8( p );
- return p;
-}
-
+/* FIXME same than EITConvertToUTF8 from TS demux */
char *dvbsi_to_utf8( const char *psz_instring, size_t i_length )
{
- const char *psz_encoding, *psz_stringstart;
- char *psz_outstring, *psz_tmp;
- char psz_encbuf[12];
- size_t i_in, i_out;
- vlc_iconv_t iconv_handle;
+ const char *psz_encoding;
+ char psz_encbuf[sizeof( "ISO_8859-123" )];
+ size_t offset = 1;
+
if( i_length < 1 ) return NULL;
- if( psz_instring[0] < 0 || psz_instring[0] >= 0x20 )
+ if( psz_instring[0] >= 0x20 )
{
- psz_stringstart = psz_instring;
- psz_encoding = "ISO_8859-1"; /* should be ISO6937 according to spec, but this seems to be the one used */
- } else switch( psz_instring[0] )
+ psz_encoding = "ISO_6937";
+ offset = 0;
+ }
+ else switch( psz_instring[0] )
{
case 0x01:
- psz_stringstart = &psz_instring[1];
psz_encoding = "ISO_8859-5";
break;
case 0x02:
- psz_stringstart = &psz_instring[1];
psz_encoding = "ISO_8859-6";
break;
case 0x03:
- psz_stringstart = &psz_instring[1];
psz_encoding = "ISO_8859-7";
break;
case 0x04:
- psz_stringstart = &psz_instring[1];
psz_encoding = "ISO_8859-8";
break;
case 0x05:
- psz_stringstart = &psz_instring[1];
psz_encoding = "ISO_8859-9";
break;
case 0x06:
- psz_stringstart = &psz_instring[1];
psz_encoding = "ISO_8859-10";
break;
case 0x07:
- psz_stringstart = &psz_instring[1];
psz_encoding = "ISO_8859-11";
break;
case 0x08:
- psz_stringstart = &psz_instring[1]; /*possibly reserved?*/
psz_encoding = "ISO_8859-12";
break;
case 0x09:
- psz_stringstart = &psz_instring[1];
psz_encoding = "ISO_8859-13";
break;
case 0x0a:
- psz_stringstart = &psz_instring[1];
psz_encoding = "ISO_8859-14";
break;
case 0x0b:
- psz_stringstart = &psz_instring[1];
psz_encoding = "ISO_8859-15";
break;
case 0x10:
- if( i_length < 3 || psz_instring[1] != '\0' || psz_instring[2] > 0x0f
- || psz_instring[2] == 0 )
- return FixUTF8(strndup(psz_instring,i_length));
- sprintf( psz_encbuf, "ISO_8859-%d", psz_instring[2] );
- psz_stringstart = &psz_instring[3];
- psz_encoding = psz_encbuf;
+#warning Is Latin-10 (psz_instring[2] == 16) really illegal?
+ if( i_length < 3 || psz_instring[1] != 0x00 || psz_instring[2] > 15
+ || psz_instring[2] == 0 )
+ {
+ psz_encoding = "UTF-8";
+ offset = 0;
+ }
+ else
+ {
+ sprintf( psz_encbuf, "ISO_8859-%u", psz_instring[2] );
+ psz_encoding = psz_encbuf;
+ offset = 3;
+ }
break;
case 0x11:
- psz_stringstart = &psz_instring[1];
+#warning Is there a BOM or do we use a fixed endianess?
psz_encoding = "UTF-16";
break;
case 0x12:
- psz_stringstart = &psz_instring[1];
psz_encoding = "KSC5601-1987";
break;
case 0x13:
- psz_stringstart = &psz_instring[1];
- psz_encoding = "GB2312";/*GB-2312-1980 */
+ psz_encoding = "GB2312"; /* GB-2312-1980 */
break;
case 0x14:
- psz_stringstart = &psz_instring[1];
psz_encoding = "BIG-5";
break;
case 0x15:
- return FixUTF8(strndup(&psz_instring[1],i_length-1));
+ psz_encoding = "UTF-8";
break;
default:
/* invalid */
- return FixUTF8(strndup(psz_instring,i_length));
- }
- iconv_handle = vlc_iconv_open( "UTF-8", psz_encoding );
- i_in = i_length - (psz_stringstart - psz_instring );
- i_out = i_in * 6;
- psz_outstring = psz_tmp = (char*)xmalloc( i_out + 1 );
- vlc_iconv( iconv_handle, &psz_stringstart, &i_in, &psz_tmp, &i_out );
- vlc_iconv_close( iconv_handle );
- *psz_tmp = '\0';
- return psz_outstring;
+ psz_encoding = "UTF-8";
+ offset = 0;
+ }
+
+ psz_instring += offset;
+ i_length -= offset;
+
+ char *psz = FromCharset( psz_encoding, psz_instring, i_length );
+ if( psz == NULL )
+ { /* Invalid character set (e.g. ISO_8859-12) */
+ psz = strndup( (const char *)psz_instring, i_length );
+ if( unlikely(psz == NULL) )
+ return NULL;
+ EnsureUTF8( psz );
+ }
+
+ /* Convert EIT-coded CR/LFs */
+ for(char *p = strstr( psz, "\xc2\x8a" ); p != NULL;
+ p = strstr( p, "\xc2\x8a" ))
+ {
+ p[0] = ' ';
+ p[1] = '\n';
+ }
+ return psz;
}
More information about the vlc-commits
mailing list