No subject
Wed Aug 5 14:32:43 CEST 2015
This patch follows on to an earlier one I made that displayed USF=20
subtitles in styled text. This one adds support for SubViewer (excluding=20
the metadata header information for which I can't find a specification),=20
MicroDVD, SubRip and VPlayer - ie. all the generics subtitle formats=20
except SAMI.
SAMI isn't straightforward, and the existing demux code strips out any=20
tags which might have once been present in its context. It isn't a trivial =
job to pass the stylesheet information which can be contained in that=20
format. A major hurdle in passing it lies in the fact that its XML is not=20
well-formed.
Because I couldn't find specs for any of these formats except SAMI (and=20
ASS and USF) I made an executive decision to restrict the list of=20
supported tags to the same ones used by Matroska in the USF DTD -- as=20
explained in the comments in the code. If anyone has any corrections to=20
apply to this assumption I'd like to hear from them, though.
SSA/ASS is not included in this patch, but only because I haven't gotten=20
around to it yet.
This also contains some bugfixes on the earlier submission.
Bernie
Jean-Baptiste Kempf <jb at videolan.org>=20
24/03/2007 01:01 PM
To
Bernie Purcell <b.purcell at adbglobal.com>
cc
Subject
Re: Fw: problem mailing to mailing list
On Fri, Mar 23, 2007, Bernie Purcell wrote :
> Hi Jean-Baptiste,
The mailing lists are back.
Can you send your patch in a text format ? :=FE
Thanks a lot,
--=20
Jean-Baptiste Kempf
--=_alternative 007F9B40CA2572A9_=
Content-Type: text/html; charset="ISO-8859-1"
Content-Transfer-Encoding: quoted-printable
<br><font size=3D2 face=3D"sans-serif">Hi Jean-Baptiste,</font>
<br>
<br><font size=3D2 face=3D"sans-serif">Sorry, the last patch file submission
did come out in a messed up format - sorry. Hope this one works:</font>
<br>
<br>
<br><font size=3D2 face=3D"sans-serif">From original mail:</font>
<br><font size=3D2 face=3D"sans-serif">This patch follows on to an earlier
one I made that displayed USF subtitles in styled text. This one adds suppo=
rt
for SubViewer (excluding the metadata header information for which I can't
find a specification), MicroDVD, SubRip and VPlayer - ie. all the generics
subtitle formats except SAMI.<br>
SAMI isn't straightforward, and the existing demux code strips out any
tags which might have once been present in its context. It isn't a trivial
job to pass the stylesheet information which can be contained in that forma=
t.
A major hurdle in passing it lies in the fact that its XML is not well-form=
ed.<br>
Because I couldn't find specs for any of these formats except SAMI (and
ASS and USF) I made an executive decision to restrict the list of supported
tags to the same ones used by Matroska in the USF DTD -- as explained in
the comments in the code. If anyone has any corrections to apply to this
assumption I'd like to hear from them, though.<br>
SSA/ASS is not included in this patch, but only because I haven't gotten
around to it yet.<br>
<br>
This also contains some bugfixes on the earlier submission.</font>
<br>
<br><font size=3D2 face=3D"sans-serif">Bernie</font>
<br>
<br>
<br>
<table width=3D100%>
<tr valign=3Dtop>
<td width=3D40%><font size=3D1 face=3D"sans-serif"><b>Jean-Baptiste Kempf &=
lt;jb at videolan.org></b>
</font>
<p><font size=3D1 face=3D"sans-serif">24/03/2007 01:01 PM</font>
<td width=3D59%>
<table width=3D100%>
<tr>
<td>
<div align=3Dright><font size=3D1 face=3D"sans-serif">To</font></div>
<td valign=3Dtop><font size=3D1 face=3D"sans-serif">Bernie Purcell <b.pu=
rcell at adbglobal.com></font>
<tr>
<td>
<div align=3Dright><font size=3D1 face=3D"sans-serif">cc</font></div>
<td valign=3Dtop>
<tr>
<td>
<div align=3Dright><font size=3D1 face=3D"sans-serif">Subject</font></div>
<td valign=3Dtop><font size=3D1 face=3D"sans-serif">Re: Fw: problem mailing=
to
mailing list</font></table>
<br>
<table>
<tr valign=3Dtop>
<td>
<td></table>
<br></table>
<br>
<br>
<br><font size=3D2><tt>On Fri, Mar 23, 2007, Bernie Purcell wrote :<br>
> Hi Jean-Baptiste,<br>
<br>
The mailing lists are back.<br>
Can you send your patch in a text format ? :=FE<br>
<br>
Thanks a lot,<br>
<br>
-- <br>
Jean-Baptiste Kempf<br>
</tt></font>
<br>
--=_alternative 007F9B40CA2572A9_=--
--=_mixed 007F9B40CA2572A9_=
Content-Type: text/plain; name="styling.svn-diff.txt"
Content-Disposition: attachment; filename="styling.svn-diff.txt"
Content-Transfer-Encoding: quoted-printable
Index: modules/misc/freetype.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- modules/misc/freetype.c (revision 19381)
+++ modules/misc/freetype.c (working copy)
@@ -1481,7 +1481,7 @@
}
else
{
- PushFont( &p=5Ffonts, FC=5FDEFAULT=5FFONT, 24, 0xffffff, 0 );
+ PushFont( &p=5Ffonts, FC=5FDEFAULT=5FFONT, p=5Fsys->i=5Ffont=5Fsiz=
e, 0xffffff, 0 );
}
=20
while ( ( xml=5FReaderRead( p=5Fxml=5Freader ) =3D=3D 1 ) && ( rv =3D=
=3D VLC=5FSUCCESS ) )
Index: modules/codec/subsdec.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- modules/codec/subsdec.c (revision 19381)
+++ modules/codec/subsdec.c (working copy)
@@ -77,7 +77,8 @@
static void ParseSSAString ( decoder=5Ft *, char *, subpicture=5Ft=
* );
static void ParseUSFString ( decoder=5Ft *, char *, subpicture=5Ft=
* );
static void ParseColor ( decoder=5Ft *, char *, int *, int * );
-static void StripTags ( char * );
+static char *StripTags ( char * );
+static char *CreateHtmlSubtitle ( char * );
=20
#define DEFAULT=5FNAME "Default"
#define MAX=5FLINE 8192
@@ -451,10 +452,10 @@
p=5Fspu->i=5Fy =3D 10;
=20
/* Remove formatting from string */
- StripTags( psz=5Fsubtitle );
=20
- p=5Fspu->p=5Fregion->psz=5Ftext =3D psz=5Fsubtitle;
- p=5Fspu->p=5Fregion->psz=5Fhtml =3D NULL;
+ p=5Fspu->p=5Fregion->psz=5Ftext =3D StripTags( psz=5Fsubtitle );
+ p=5Fspu->p=5Fregion->psz=5Fhtml =3D CreateHtmlSubtitle( psz=5Fsubt=
itle );
+
p=5Fspu->i=5Fstart =3D p=5Fblock->i=5Fpts;
p=5Fspu->i=5Fstop =3D p=5Fblock->i=5Fpts + p=5Fblock->i=5Flength;
p=5Fspu->b=5Fephemer =3D (p=5Fblock->i=5Flength =3D=3D 0);
@@ -474,8 +475,9 @@
p=5Fspu->b=5Fabsolute =3D VLC=5FFALSE;
p=5Fspu->i=5Foriginal=5Fpicture=5Fwidth =3D p=5Fsys->i=5Foriginal=
=5Fwidth;
p=5Fspu->i=5Foriginal=5Fpicture=5Fheight =3D p=5Fsys->i=5Foriginal=
=5Fheight;
- if( psz=5Fsubtitle ) free( psz=5Fsubtitle );
}
+ if( psz=5Fsubtitle ) free( psz=5Fsubtitle );
+
return p=5Fspu;
}
=20
@@ -501,8 +503,7 @@
p=5Fstyle =3D p=5Fsys->pp=5Fssa=5Fstyles[i];
}
=20
- /* The StripTags() function doesn't handle HTML tags that have attribu=
te/values with
- * them, or properly translate <br/> sequences into newlines, or handl=
e &' sequences
+ /* The StripTags() function doesn't do everything we need (eg. <br/> t=
ag )
* so do it here ourselves.
*/
psz=5Ftext=5Fstart =3D malloc( strlen( psz=5Fsubtitle ));
@@ -514,7 +515,7 @@
{
if( !strncasecmp( psz=5Fsubtitle, "<br/>", 5 ))
*psz=5Ftext++ =3D '\n';
- else if( strncasecmp( psz=5Fsubtitle, "<text ", 6 ))
+ else if( !strncasecmp( psz=5Fsubtitle, "<text ", 6 ))
{
char *psz=5Fstyle =3D strcasestr( psz=5Fsubtitle, "style=
=3D\"" );
=20
@@ -523,7 +524,7 @@
int i=5Flen;
=20
psz=5Fstyle +=3D strspn( psz=5Fstyle, "\"" ) + 1;
- i=5Flen =3D strspn( psz=5Fstyle, "\"" );
+ i=5Flen =3D strcspn( psz=5Fstyle, "\"" );
=20
psz=5Fstyle[ i=5Flen ] =3D '\0';
=20
@@ -1166,45 +1167,294 @@
return;
}
=20
-static void StripTags( char *psz=5Ftext )
+/* Function now handles tags which has attribute values, and tries
+ * to deal with &' commands too. It no longer modifies the string
+ * in place, so that the original text can be reused
+ */
+static char *StripTags( char *psz=5Fsubtitle )
{
- int i=5Fleft=5Fmoves =3D 0;
- vlc=5Fbool=5Ft b=5Finside=5Ftag =3D VLC=5FFALSE;
- int i =3D 0;
- int i=5Ftag=5Fstart =3D -1;
- while( psz=5Ftext[ i ] )
+ char *psz=5Ftext=5Fstart;
+
+ psz=5Ftext=5Fstart =3D malloc( strlen( psz=5Fsubtitle ) + 1 );
+
+ if( psz=5Ftext=5Fstart !=3D NULL )
{
- if( !b=5Finside=5Ftag )
+ char *psz=5Ftext =3D psz=5Ftext=5Fstart;
+
+ while( *psz=5Fsubtitle )
{
- if( psz=5Ftext[ i ] =3D=3D '<' )
+ if( *psz=5Fsubtitle =3D=3D '<' )
{
- b=5Finside=5Ftag =3D VLC=5FTRUE;
- i=5Ftag=5Fstart =3D i;
+ psz=5Fsubtitle +=3D strcspn( psz=5Fsubtitle, ">" );
}
- psz=5Ftext[ i - i=5Fleft=5Fmoves ] =3D psz=5Ftext[ i ];
+ else if( *psz=5Fsubtitle =3D=3D '&' )
+ {
+ if( !strncasecmp( psz=5Fsubtitle, "<", 4 ))
+ {
+ *psz=5Ftext++ =3D '<';
+ psz=5Fsubtitle +=3D strcspn( psz=5Fsubtitle, ";" );
+ }
+ else if( !strncasecmp( psz=5Fsubtitle, ">", 4 ))
+ {
+ *psz=5Ftext++ =3D '>';
+ psz=5Fsubtitle +=3D strcspn( psz=5Fsubtitle, ";" );
+ }
+ else if( !strncasecmp( psz=5Fsubtitle, "&", 5 ))
+ {
+ *psz=5Ftext++ =3D '&';
+ psz=5Fsubtitle +=3D strcspn( psz=5Fsubtitle, ";" );
+ }
+ else
+ {
+ /* Assume it is just a normal ampersand */
+ *psz=5Ftext++ =3D '&';
+ }
+ }
+ else
+ {
+ *psz=5Ftext++ =3D *psz=5Fsubtitle;
+ }
+
+ psz=5Fsubtitle++;
}
- else
+ *psz=5Ftext =3D '\0';
+ psz=5Ftext=5Fstart =3D realloc( psz=5Ftext=5Fstart, strlen( psz=5F=
text=5Fstart ) + 1 );
+ }
+ return psz=5Ftext=5Fstart;
+}
+
+/* Try to respect any style tags present in the subtitle string. The main
+ * problem here is a lack of adequate specs for the subtitle formats.
+ * SSA/ASS and USF are both detail spec'ed -- but they are handled elsewhe=
re.
+ * SAMI has a detailed spec, but extensive rework is needed in the demux
+ * code to prevent all this style information being excised, as it present=
ly
+ * does.
+ * That leaves the others - none of which were (I guess) originally intend=
ed
+ * to be carrying style information. Over time people have used them that =
way.
+ * In the absence of specifications from which to work, the tags supported
+ * have been restricted to the simple set permitted by the USF DTD, ie. :
+ * Basic: <br>, <i>, <b>, <u>
+ * Extended: <font>
+ * Attributes: face
+ * family
+ * size
+ * color
+ * outline-color
+ * shadow-color
+ * outline-level
+ * shadow-level
+ * back-color
+ * alpha
+ * There is also the further restriction that the subtitle be well-formed
+ * as an XML entity, ie. the HTML sentence:
+ * <b><i>Bold and Italics</b></i>
+ * doesn't qualify because the tags aren't nested one inside the other.
+ * <text> tags are automatically added to the output to ensure
+ * well-formedness.
+ * If the text doesn't qualify for any reason, a NULL string is
+ * returned, and the rendering engine will fall back to the
+ * plain text version of the subtitle.
+ */
+static char *CreateHtmlSubtitle( char *psz=5Fsubtitle )
+{
+ char psz=5FtagStack[ 100 ];
+ size=5Ft i=5Fbuf=5Fsize =3D strlen( psz=5Fsubtitle ) + 100;
+ char *psz=5Fhtml=5Fstart =3D malloc( i=5Fbuf=5Fsize );
+
+ psz=5FtagStack[ 0 ] =3D '\0';
+
+ if( psz=5Fhtml=5Fstart !=3D NULL )
+ {
+ char *psz=5Fhtml =3D psz=5Fhtml=5Fstart;
+
+ strcpy( psz=5Fhtml, "<text>" );
+ psz=5Fhtml +=3D 6;
+
+ while( *psz=5Fsubtitle )
{
- if( ( psz=5Ftext[ i ] =3D=3D ' ' ) ||
- ( psz=5Ftext[ i ] =3D=3D '\t' ) ||
- ( psz=5Ftext[ i ] =3D=3D '\n' ) ||
- ( psz=5Ftext[ i ] =3D=3D '\r' ) )
+ if( *psz=5Fsubtitle =3D=3D '\n' )
{
- b=5Finside=5Ftag =3D VLC=5FFALSE;
- i=5Ftag=5Fstart =3D -1;
+ strcpy( psz=5Fhtml, "<br/>" );
+ psz=5Fhtml +=3D 5;
+ psz=5Fsubtitle++;
}
- else if( psz=5Ftext[ i ] =3D=3D '>' )
+ else if( *psz=5Fsubtitle =3D=3D '<' )
{
- i=5Fleft=5Fmoves +=3D i - i=5Ftag=5Fstart + 1;
- i=5Ftag=5Fstart =3D -1;
- b=5Finside=5Ftag =3D VLC=5FFALSE;
+ if( !strncasecmp( psz=5Fsubtitle, "<br/>", 5 ))
+ {
+ strcpy( psz=5Fhtml, "<br/>" );
+ psz=5Fhtml +=3D 5;
+ psz=5Fsubtitle +=3D 5;
+ }
+ else if( !strncasecmp( psz=5Fsubtitle, "<b>", 3 ) )
+ {
+ strcpy( psz=5Fhtml, "<b>" );
+ strcat( psz=5FtagStack, "b" );
+ psz=5Fhtml +=3D 3;
+ psz=5Fsubtitle +=3D 3;
+ }
+ else if( !strncasecmp( psz=5Fsubtitle, "<i>", 3 ) )
+ {
+ strcpy( psz=5Fhtml, "<i>" );
+ strcat( psz=5FtagStack, "i" );
+ psz=5Fhtml +=3D 3;
+ psz=5Fsubtitle +=3D 3;
+ }
+ else if( !strncasecmp( psz=5Fsubtitle, "<u>", 3 ) )
+ {
+ strcpy( psz=5Fhtml, "<u>" );
+ strcat( psz=5FtagStack, "u" );
+ psz=5Fhtml +=3D 3;
+ psz=5Fsubtitle +=3D 3;
+ }
+ else if( !strncasecmp( psz=5Fsubtitle, "<font ", 6 ))
+ {
+ char *psz=5Fattribs[] =3D { "face=3D\"", "family=3D\""=
, "size=3D\"",
+ "color=3D\"", "outline-color=3D\"", "shadow-co=
lor=3D\"",
+ "outline-level=3D\"", "shadow-level=3D\"", "ba=
ck-color=3D\"",
+ "alpha=3D\"", NULL };
+
+ strcpy( psz=5Fhtml, "<font " );
+ strcat( psz=5FtagStack, "f" );
+ psz=5Fhtml +=3D 6;
+ psz=5Fsubtitle +=3D 6;
+
+ while( *psz=5Fsubtitle !=3D '>' )
+ {
+ int k;
+
+ for( k=3D0; psz=5Fattribs[ k ]; k++ )
+ {
+ int i=5Flen =3D strlen( psz=5Fattribs[ k ] );
+
+ if( !strncasecmp( psz=5Fsubtitle, psz=5Fattrib=
s[ k ], i=5Flen ))=20
+ {
+ i=5Flen +=3D strcspn( psz=5Fsubtitle + i=
=5Flen, "\"" ) + 1;
+
+ strncpy( psz=5Fhtml, psz=5Fsubtitle, i=5Fl=
en );
+ psz=5Fhtml +=3D i=5Flen;
+ psz=5Fsubtitle +=3D i=5Flen;
+ break;
+ }
+ }
+ if( psz=5Fattribs[ k ] =3D=3D NULL )
+ {
+ // Jump over unrecognised tag
+ int i=5Flen =3D strcspn( psz=5Fsubtitle, "\"" =
) + 1;
+
+ i=5Flen +=3D strcspn( psz=5Fsubtitle + i=5Flen=
, "\"" ) + 1;
+ psz=5Fsubtitle +=3D i=5Flen;
+ }
+ while (*psz=5Fsubtitle =3D=3D ' ')
+ *psz=5Fhtml++ =3D *psz=5Fsubtitle++;
+ }
+ *psz=5Fhtml++ =3D *psz=5Fsubtitle++;
+ }
+ else if( !strncmp( psz=5Fsubtitle, "</", 2 ))
+ {
+ vlc=5Fbool=5Ft b=5Fmatch =3D VLC=5FFALSE;
+ int i=5Flen =3D strlen( psz=5FtagStack )=
- 1;
+ char *psz=5FlastTag =3D NULL;
+
+ if( i=5Flen >=3D 0 )
+ {
+ psz=5FlastTag =3D psz=5FtagStack + i=5Flen;
+ i=5Flen =3D 0;
+
+ switch( *psz=5FlastTag )
+ {
+ case 'b':
+ b=5Fmatch =3D !strncasecmp( psz=5Fsubtitle=
, "</b>", 4 );
+ i=5Flen =3D 4;
+ break;
+ case 'i':
+ b=5Fmatch =3D !strncasecmp( psz=5Fsubtitle=
, "</i>", 4 );
+ i=5Flen =3D 4;
+ break;
+ case 'u':
+ b=5Fmatch =3D !strncasecmp( psz=5Fsubtitle=
, "</u>", 4 );
+ i=5Flen =3D 4;
+ break;
+ case 'f':
+ b=5Fmatch =3D !strncasecmp( psz=5Fsubtitle=
, "</font>", 7 );
+ i=5Flen =3D 7;
+ break;
+ }
+ }
+ if( ! b=5Fmatch )
+ {
+ /* Not well formed -- kill everything */
+ free( psz=5Fhtml=5Fstart );
+ psz=5Fhtml=5Fstart =3D NULL;
+ break;
+ }
+ *psz=5FlastTag =3D '\0';
+ strncpy( psz=5Fhtml, psz=5Fsubtitle, i=5Flen );
+ psz=5Fhtml +=3D i=5Flen;
+ psz=5Fsubtitle +=3D i=5Flen;
+ }
+ else
+ {
+ psz=5Fsubtitle +=3D strcspn( psz=5Fsubtitle, ">" );
+ }
}
+ else if( *psz=5Fsubtitle =3D=3D '&' )
+ {
+ if( !strncasecmp( psz=5Fsubtitle, "<", 4 ))
+ {
+ strcpy( psz=5Fhtml, "<" );
+ psz=5Fhtml +=3D 4;
+ psz=5Fsubtitle +=3D 4;
+ }
+ else if( !strncasecmp( psz=5Fsubtitle, ">", 4 ))
+ {
+ strcpy( psz=5Fhtml, ">" );
+ psz=5Fhtml +=3D 4;
+ psz=5Fsubtitle +=3D 4;
+ }
+ else if( !strncasecmp( psz=5Fsubtitle, "&", 5 ))
+ {
+ strcpy( psz=5Fhtml, "&" );
+ psz=5Fhtml +=3D 5;
+ psz=5Fsubtitle +=3D 5;
+ }
+ else
+ {
+ strcpy( psz=5Fhtml, "&" );
+ psz=5Fhtml +=3D 5;
+ psz=5Fsubtitle++;
+ }
+ }
else
{
- psz=5Ftext[ i - i=5Fleft=5Fmoves ] =3D psz=5Ftext[ i ];
+ *psz=5Fhtml++ =3D *psz=5Fsubtitle++;
}
+
+ if( psz=5Fhtml - psz=5Fhtml=5Fstart > i=5Fbuf=5Fsize - 10 )
+ {
+ int i=5Flen =3D psz=5Fhtml - psz=5Fhtml=5Fstart;
+
+ i=5Fbuf=5Fsize +=3D 100;
+ psz=5Fhtml=5Fstart =3D realloc( psz=5Fhtml=5Fstart, i=5Fbu=
f=5Fsize );
+ psz=5Fhtml =3D psz=5Fhtml=5Fstart + i=5Flen;
+ *psz=5Fhtml =3D '\0';
+ }
}
- i++;
+ strcpy( psz=5Fhtml, "</text>" );
+ psz=5Fhtml +=3D 7;
+
+ if( psz=5FtagStack[ 0 ] !=3D '\0' )
+ {
+ /* Not well formed -- kill everything */
+ free( psz=5Fhtml=5Fstart );
+ psz=5Fhtml=5Fstart =3D NULL;
+ }
+ else
+ {
+ /* Shrink the memory requirements */
+ psz=5Fhtml=5Fstart =3D realloc( psz=5Fhtml=5Fstart, psz=5Fhtm=
l - psz=5Fhtml=5Fstart + 1 );
+ }
}
- psz=5Ftext[ i - i=5Fleft=5Fmoves ] =3D '\0';
+ return psz=5Fhtml=5Fstart;
}
+
--=_mixed 007F9B40CA2572A9_=--
--
This is the vlc-devel mailing-list, see http://www.videolan.org/vlc/
To unsubscribe, please read http://developers.videolan.org/lists.html
More information about the vlc-devel
mailing list