No subject


Wed Aug 5 14:32:43 CEST 2015


This patch follows on to an earlier one I made that displayed USF=20
subtitles in styled text. This one adds support for SubViewer (excluding=20
the metadata header information for which I can't find a specification),=20
MicroDVD, SubRip and VPlayer - ie. all the generics subtitle formats=20
except SAMI.
SAMI isn't straightforward, and the existing demux code strips out any=20
tags which might have once been present in its context. It isn't a trivial =

job to pass the stylesheet information which can be contained in that=20
format. A major hurdle in passing it lies in the fact that its XML is not=20
well-formed.
Because I couldn't find specs for any of these formats except SAMI (and=20
ASS and USF) I made an executive decision to restrict the list of=20
supported tags to the same ones used by Matroska in the USF DTD -- as=20
explained in the comments in the code. If anyone has any corrections to=20
apply to this assumption I'd like to hear from them, though.
SSA/ASS is not included in this patch, but only because I haven't gotten=20
around to it yet.

This also contains some bugfixes on the earlier submission.

Bernie



Jean-Baptiste Kempf <jb at videolan.org>=20
24/03/2007 01:01 PM

To
Bernie Purcell <b.purcell at adbglobal.com>
cc

Subject
Re: Fw: problem mailing to mailing list






On Fri, Mar 23, 2007, Bernie Purcell wrote :
>    Hi Jean-Baptiste,

The mailing lists are back.
Can you send your patch in a text format ? :=FE

Thanks a lot,

--=20
Jean-Baptiste Kempf


--=_alternative 007F9B40CA2572A9_=
Content-Type: text/html; charset="ISO-8859-1"
Content-Transfer-Encoding: quoted-printable


<br><font size=3D2 face=3D"sans-serif">Hi Jean-Baptiste,</font>
<br>
<br><font size=3D2 face=3D"sans-serif">Sorry, the last patch file submission
did come out in a messed up format - sorry. Hope this one works:</font>
<br>
<br>
<br><font size=3D2 face=3D"sans-serif">From original mail:</font>
<br><font size=3D2 face=3D"sans-serif">This patch follows on to an earlier
one I made that displayed USF subtitles in styled text. This one adds suppo=
rt
for SubViewer (excluding the metadata header information for which I can't
find a specification), MicroDVD, SubRip and VPlayer - ie. all the generics
subtitle formats except SAMI.<br>
SAMI isn't straightforward, and the existing demux code strips out any
tags which might have once been present in its context. It isn't a trivial
job to pass the stylesheet information which can be contained in that forma=
t.
A major hurdle in passing it lies in the fact that its XML is not well-form=
ed.<br>
Because I couldn't find specs for any of these formats except SAMI (and
ASS and USF) I made an executive decision to restrict the list of supported
tags to the same ones used by Matroska in the USF DTD -- as explained in
the comments in the code. If anyone has any corrections to apply to this
assumption I'd like to hear from them, though.<br>
SSA/ASS is not included in this patch, but only because I haven't gotten
around to it yet.<br>
<br>
This also contains some bugfixes on the earlier submission.</font>
<br>
<br><font size=3D2 face=3D"sans-serif">Bernie</font>
<br>
<br>
<br>
<table width=3D100%>
<tr valign=3Dtop>
<td width=3D40%><font size=3D1 face=3D"sans-serif"><b>Jean-Baptiste Kempf &=
lt;jb at videolan.org></b>
</font>
<p><font size=3D1 face=3D"sans-serif">24/03/2007 01:01 PM</font>
<td width=3D59%>
<table width=3D100%>
<tr>
<td>
<div align=3Dright><font size=3D1 face=3D"sans-serif">To</font></div>
<td valign=3Dtop><font size=3D1 face=3D"sans-serif">Bernie Purcell <b.pu=
rcell at adbglobal.com></font>
<tr>
<td>
<div align=3Dright><font size=3D1 face=3D"sans-serif">cc</font></div>
<td valign=3Dtop>
<tr>
<td>
<div align=3Dright><font size=3D1 face=3D"sans-serif">Subject</font></div>
<td valign=3Dtop><font size=3D1 face=3D"sans-serif">Re: Fw: problem mailing=
 to
mailing list</font></table>
<br>
<table>
<tr valign=3Dtop>
<td>
<td></table>
<br></table>
<br>
<br>
<br><font size=3D2><tt>On Fri, Mar 23, 2007, Bernie Purcell wrote :<br>
>    Hi Jean-Baptiste,<br>
<br>
The mailing lists are back.<br>
Can you send your patch in a text format ? :=FE<br>
<br>
Thanks a lot,<br>
<br>
-- <br>
Jean-Baptiste Kempf<br>
</tt></font>
<br>
--=_alternative 007F9B40CA2572A9_=--
--=_mixed 007F9B40CA2572A9_=
Content-Type: text/plain; name="styling.svn-diff.txt"
Content-Disposition: attachment; filename="styling.svn-diff.txt"
Content-Transfer-Encoding: quoted-printable

Index: modules/misc/freetype.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- modules/misc/freetype.c	(revision 19381)
+++ modules/misc/freetype.c	(working copy)
@@ -1481,7 +1481,7 @@
     }
     else
     {
-        PushFont( &p=5Ffonts, FC=5FDEFAULT=5FFONT, 24, 0xffffff, 0 );
+        PushFont( &p=5Ffonts, FC=5FDEFAULT=5FFONT, p=5Fsys->i=5Ffont=5Fsiz=
e, 0xffffff, 0 );
     }
=20
     while ( ( xml=5FReaderRead( p=5Fxml=5Freader ) =3D=3D 1 ) && ( rv =3D=
=3D VLC=5FSUCCESS ) )
Index: modules/codec/subsdec.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- modules/codec/subsdec.c	(revision 19381)
+++ modules/codec/subsdec.c	(working copy)
@@ -77,7 +77,8 @@
 static void         ParseSSAString ( decoder=5Ft *, char *, subpicture=5Ft=
 * );
 static void         ParseUSFString ( decoder=5Ft *, char *, subpicture=5Ft=
 * );
 static void         ParseColor     ( decoder=5Ft *, char *, int *, int * );
-static void         StripTags      ( char * );
+static char        *StripTags      ( char * );
+static char        *CreateHtmlSubtitle ( char * );
=20
 #define DEFAULT=5FNAME "Default"
 #define MAX=5FLINE 8192
@@ -451,10 +452,10 @@
         p=5Fspu->i=5Fy =3D 10;
=20
         /* Remove formatting from string */
-        StripTags( psz=5Fsubtitle );
=20
-        p=5Fspu->p=5Fregion->psz=5Ftext =3D psz=5Fsubtitle;
-        p=5Fspu->p=5Fregion->psz=5Fhtml =3D NULL;
+        p=5Fspu->p=5Fregion->psz=5Ftext =3D StripTags( psz=5Fsubtitle );
+        p=5Fspu->p=5Fregion->psz=5Fhtml =3D CreateHtmlSubtitle( psz=5Fsubt=
itle );
+
         p=5Fspu->i=5Fstart =3D p=5Fblock->i=5Fpts;
         p=5Fspu->i=5Fstop =3D p=5Fblock->i=5Fpts + p=5Fblock->i=5Flength;
         p=5Fspu->b=5Fephemer =3D (p=5Fblock->i=5Flength =3D=3D 0);
@@ -474,8 +475,9 @@
         p=5Fspu->b=5Fabsolute =3D VLC=5FFALSE;
         p=5Fspu->i=5Foriginal=5Fpicture=5Fwidth =3D p=5Fsys->i=5Foriginal=
=5Fwidth;
         p=5Fspu->i=5Foriginal=5Fpicture=5Fheight =3D p=5Fsys->i=5Foriginal=
=5Fheight;
-        if( psz=5Fsubtitle ) free( psz=5Fsubtitle );
     }
+    if( psz=5Fsubtitle ) free( psz=5Fsubtitle );
+
     return p=5Fspu;
 }
=20
@@ -501,8 +503,7 @@
             p=5Fstyle =3D p=5Fsys->pp=5Fssa=5Fstyles[i];
     }
=20
-    /* The StripTags() function doesn't handle HTML tags that have attribu=
te/values with
-     * them, or properly translate <br/> sequences into newlines, or handl=
e &' sequences
+    /* The StripTags() function doesn't do everything we need (eg. <br/> t=
ag )
      * so do it here ourselves.
      */
     psz=5Ftext=5Fstart =3D malloc( strlen( psz=5Fsubtitle ));
@@ -514,7 +515,7 @@
         {
             if( !strncasecmp( psz=5Fsubtitle, "<br/>", 5 ))
                 *psz=5Ftext++ =3D '\n';
-            else if( strncasecmp( psz=5Fsubtitle, "<text ", 6 ))
+            else if( !strncasecmp( psz=5Fsubtitle, "<text ", 6 ))
             {
                 char *psz=5Fstyle =3D strcasestr( psz=5Fsubtitle, "style=
=3D\"" );
=20
@@ -523,7 +524,7 @@
                     int i=5Flen;
=20
                     psz=5Fstyle +=3D strspn( psz=5Fstyle, "\"" ) + 1;
-                    i=5Flen =3D strspn( psz=5Fstyle, "\"" );
+                    i=5Flen =3D strcspn( psz=5Fstyle, "\"" );
=20
                     psz=5Fstyle[ i=5Flen ] =3D '\0';
=20
@@ -1166,45 +1167,294 @@
     return;
 }
=20
-static void StripTags( char *psz=5Ftext )
+/* Function now handles tags which has attribute values, and tries
+ * to deal with &' commands too. It no longer modifies the string
+ * in place, so that the original text can be reused
+ */
+static char *StripTags( char *psz=5Fsubtitle )
 {
-    int i=5Fleft=5Fmoves =3D 0;
-    vlc=5Fbool=5Ft b=5Finside=5Ftag =3D VLC=5FFALSE;
-    int i =3D 0;
-    int i=5Ftag=5Fstart =3D -1;
-    while( psz=5Ftext[ i ] )
+    char *psz=5Ftext=5Fstart;
+
+    psz=5Ftext=5Fstart =3D malloc( strlen( psz=5Fsubtitle ) + 1 );
+
+    if( psz=5Ftext=5Fstart !=3D NULL )
     {
-        if( !b=5Finside=5Ftag )
+        char *psz=5Ftext =3D psz=5Ftext=5Fstart;
+
+        while( *psz=5Fsubtitle )
         {
-            if( psz=5Ftext[ i ] =3D=3D '<' )
+            if( *psz=5Fsubtitle =3D=3D '<' )
             {
-                b=5Finside=5Ftag =3D VLC=5FTRUE;
-                i=5Ftag=5Fstart =3D i;
+                psz=5Fsubtitle +=3D strcspn( psz=5Fsubtitle, ">" );
             }
-            psz=5Ftext[ i - i=5Fleft=5Fmoves ] =3D psz=5Ftext[ i ];
+            else if( *psz=5Fsubtitle =3D=3D '&' )
+            {
+                if( !strncasecmp( psz=5Fsubtitle, "<", 4 ))
+                {
+                    *psz=5Ftext++ =3D '<';
+                    psz=5Fsubtitle +=3D strcspn( psz=5Fsubtitle, ";" );
+                }
+                else if( !strncasecmp( psz=5Fsubtitle, ">", 4 ))
+                {
+                    *psz=5Ftext++ =3D '>';
+                    psz=5Fsubtitle +=3D strcspn( psz=5Fsubtitle, ";" );
+                }
+                else if( !strncasecmp( psz=5Fsubtitle, "&", 5 ))
+                {
+                    *psz=5Ftext++ =3D '&';
+                    psz=5Fsubtitle +=3D strcspn( psz=5Fsubtitle, ";" );
+                }
+                else
+                {
+                    /* Assume it is just a normal ampersand */
+                    *psz=5Ftext++ =3D '&';
+                }
+            }
+            else
+            {
+                *psz=5Ftext++ =3D *psz=5Fsubtitle;
+            }
+
+            psz=5Fsubtitle++;
         }
-        else
+        *psz=5Ftext =3D '\0';
+        psz=5Ftext=5Fstart =3D realloc( psz=5Ftext=5Fstart, strlen( psz=5F=
text=5Fstart ) + 1 );
+    }
+    return psz=5Ftext=5Fstart;
+}
+
+/* Try to respect any style tags present in the subtitle string. The main
+ * problem here is a lack of adequate specs for the subtitle formats.
+ * SSA/ASS and USF are both detail spec'ed -- but they are handled elsewhe=
re.
+ * SAMI has a detailed spec, but extensive rework is needed in the demux
+ * code to prevent all this style information being excised, as it present=
ly
+ * does.
+ * That leaves the others - none of which were (I guess) originally intend=
ed
+ * to be carrying style information. Over time people have used them that =
way.
+ * In the absence of specifications from which to work, the tags supported
+ * have been restricted to the simple set permitted by the USF DTD, ie. :
+ *  Basic: <br>, <i>, <b>, <u>
+ *  Extended: <font>
+ *    Attributes: face
+ *                family
+ *                size
+ *                color
+ *                outline-color
+ *                shadow-color
+ *                outline-level
+ *                shadow-level
+ *                back-color
+ *                alpha
+ * There is also the further restriction that the subtitle be well-formed
+ * as an XML entity, ie. the HTML sentence:
+ *        <b><i>Bold and Italics</b></i>
+ * doesn't qualify because the tags aren't nested one inside the other.
+ * <text> tags are automatically added to the output to ensure
+ * well-formedness.
+ * If the text doesn't qualify for any reason, a NULL string is
+ * returned, and the rendering engine will fall back to the
+ * plain text version of the subtitle.
+ */
+static char *CreateHtmlSubtitle( char *psz=5Fsubtitle )
+{
+    char    psz=5FtagStack[ 100 ];
+    size=5Ft  i=5Fbuf=5Fsize     =3D strlen( psz=5Fsubtitle ) + 100;
+    char   *psz=5Fhtml=5Fstart =3D malloc( i=5Fbuf=5Fsize );
+
+    psz=5FtagStack[ 0 ] =3D '\0';
+
+    if( psz=5Fhtml=5Fstart !=3D NULL )
+    {
+        char *psz=5Fhtml =3D psz=5Fhtml=5Fstart;
+
+        strcpy( psz=5Fhtml, "<text>" );
+        psz=5Fhtml +=3D 6;
+
+        while( *psz=5Fsubtitle )
         {
-            if( ( psz=5Ftext[ i ] =3D=3D ' ' ) ||
-                ( psz=5Ftext[ i ] =3D=3D '\t' ) ||
-                ( psz=5Ftext[ i ] =3D=3D '\n' ) ||
-                ( psz=5Ftext[ i ] =3D=3D '\r' ) )
+            if( *psz=5Fsubtitle =3D=3D '\n' )
             {
-                b=5Finside=5Ftag =3D VLC=5FFALSE;
-                i=5Ftag=5Fstart =3D -1;
+                strcpy( psz=5Fhtml, "<br/>" );
+                psz=5Fhtml +=3D 5;
+                psz=5Fsubtitle++;
             }
-            else if( psz=5Ftext[ i ] =3D=3D '>' )
+            else if( *psz=5Fsubtitle =3D=3D '<' )
             {
-                i=5Fleft=5Fmoves +=3D i - i=5Ftag=5Fstart + 1;
-                i=5Ftag=5Fstart =3D -1;
-                b=5Finside=5Ftag =3D VLC=5FFALSE;
+                if( !strncasecmp( psz=5Fsubtitle, "<br/>", 5 ))
+                {
+                    strcpy( psz=5Fhtml, "<br/>" );
+                    psz=5Fhtml +=3D 5;
+                    psz=5Fsubtitle +=3D 5;
+                }
+                else if( !strncasecmp( psz=5Fsubtitle, "<b>", 3 ) )
+                {
+                    strcpy( psz=5Fhtml, "<b>" );
+                    strcat( psz=5FtagStack, "b" );
+                    psz=5Fhtml +=3D 3;
+                    psz=5Fsubtitle +=3D 3;
+                }
+                else if( !strncasecmp( psz=5Fsubtitle, "<i>", 3 ) )
+                {
+                    strcpy( psz=5Fhtml, "<i>" );
+                    strcat( psz=5FtagStack, "i" );
+                    psz=5Fhtml +=3D 3;
+                    psz=5Fsubtitle +=3D 3;
+                }
+                else if( !strncasecmp( psz=5Fsubtitle, "<u>", 3 ) )
+                {
+                    strcpy( psz=5Fhtml, "<u>" );
+                    strcat( psz=5FtagStack, "u" );
+                    psz=5Fhtml +=3D 3;
+                    psz=5Fsubtitle +=3D 3;
+                }
+                else if( !strncasecmp( psz=5Fsubtitle, "<font ", 6 ))
+                {
+                    char *psz=5Fattribs[] =3D { "face=3D\"", "family=3D\""=
, "size=3D\"",
+                            "color=3D\"", "outline-color=3D\"", "shadow-co=
lor=3D\"",
+                            "outline-level=3D\"", "shadow-level=3D\"", "ba=
ck-color=3D\"",
+                            "alpha=3D\"", NULL };
+
+                    strcpy( psz=5Fhtml, "<font " );
+                    strcat( psz=5FtagStack, "f" );
+                    psz=5Fhtml +=3D 6;
+                    psz=5Fsubtitle +=3D 6;
+
+                    while( *psz=5Fsubtitle !=3D '>' )
+                    {
+                        int  k;
+
+                        for( k=3D0; psz=5Fattribs[ k ]; k++ )
+                        {
+                            int i=5Flen =3D strlen( psz=5Fattribs[ k ] );
+
+                            if( !strncasecmp( psz=5Fsubtitle, psz=5Fattrib=
s[ k ], i=5Flen ))=20
+                            {
+                                i=5Flen +=3D strcspn( psz=5Fsubtitle + i=
=5Flen, "\"" ) + 1;
+
+                                strncpy( psz=5Fhtml, psz=5Fsubtitle, i=5Fl=
en );
+                                psz=5Fhtml +=3D i=5Flen;
+                                psz=5Fsubtitle +=3D i=5Flen;
+                                break;
+                            }
+                        }
+                        if( psz=5Fattribs[ k ] =3D=3D NULL )
+                        {
+                            // Jump over unrecognised tag
+                            int i=5Flen =3D strcspn( psz=5Fsubtitle, "\"" =
) + 1;
+
+                            i=5Flen +=3D strcspn( psz=5Fsubtitle + i=5Flen=
, "\"" ) + 1;
+                            psz=5Fsubtitle +=3D i=5Flen;
+                        }
+                        while (*psz=5Fsubtitle =3D=3D ' ')
+                            *psz=5Fhtml++ =3D *psz=5Fsubtitle++;
+                    }
+                    *psz=5Fhtml++ =3D *psz=5Fsubtitle++;
+                }
+                else if( !strncmp( psz=5Fsubtitle, "</", 2 ))
+                {
+                    vlc=5Fbool=5Ft  b=5Fmatch     =3D VLC=5FFALSE;
+                    int         i=5Flen       =3D strlen( psz=5FtagStack )=
 - 1;
+                    char       *psz=5FlastTag =3D NULL;
+
+                    if( i=5Flen >=3D 0 )
+                    {
+                        psz=5FlastTag =3D psz=5FtagStack + i=5Flen;
+                        i=5Flen =3D 0;
+
+                        switch( *psz=5FlastTag )
+                        {
+                            case 'b':
+                                b=5Fmatch =3D !strncasecmp( psz=5Fsubtitle=
, "</b>", 4 );
+                                i=5Flen   =3D 4;
+                                break;
+                            case 'i':
+                                b=5Fmatch =3D !strncasecmp( psz=5Fsubtitle=
, "</i>", 4 );
+                                i=5Flen   =3D 4;
+                                break;
+                            case 'u':
+                                b=5Fmatch =3D !strncasecmp( psz=5Fsubtitle=
, "</u>", 4 );
+                                i=5Flen   =3D 4;
+                                break;
+                            case 'f':
+                                b=5Fmatch =3D !strncasecmp( psz=5Fsubtitle=
, "</font>", 7 );
+                                i=5Flen   =3D 7;
+                                break;
+                        }
+                    }
+                    if( ! b=5Fmatch )
+                    {
+                        /* Not well formed -- kill everything */
+                        free( psz=5Fhtml=5Fstart );
+                        psz=5Fhtml=5Fstart =3D NULL;
+                        break;
+                    }
+                    *psz=5FlastTag =3D '\0';
+                    strncpy( psz=5Fhtml, psz=5Fsubtitle, i=5Flen );
+                    psz=5Fhtml +=3D i=5Flen;
+                    psz=5Fsubtitle +=3D i=5Flen;
+                }
+                else
+                {
+                    psz=5Fsubtitle +=3D strcspn( psz=5Fsubtitle, ">" );
+                }
             }
+            else if( *psz=5Fsubtitle =3D=3D '&' )
+            {
+                if( !strncasecmp( psz=5Fsubtitle, "<", 4 ))
+                {
+                    strcpy( psz=5Fhtml, "<" );
+                    psz=5Fhtml +=3D 4;
+                    psz=5Fsubtitle +=3D 4;
+                }
+                else if( !strncasecmp( psz=5Fsubtitle, ">", 4 ))
+                {
+                    strcpy( psz=5Fhtml, ">" );
+                    psz=5Fhtml +=3D 4;
+                    psz=5Fsubtitle +=3D 4;
+                }
+                else if( !strncasecmp( psz=5Fsubtitle, "&", 5 ))
+                {
+                    strcpy( psz=5Fhtml, "&" );
+                    psz=5Fhtml +=3D 5;
+                    psz=5Fsubtitle +=3D 5;
+                }
+                else
+                {
+                    strcpy( psz=5Fhtml, "&" );
+                    psz=5Fhtml +=3D 5;
+                    psz=5Fsubtitle++;
+                }
+            }
             else
             {
-                psz=5Ftext[ i - i=5Fleft=5Fmoves ] =3D psz=5Ftext[ i ];
+                *psz=5Fhtml++ =3D *psz=5Fsubtitle++;
             }
+
+            if( psz=5Fhtml - psz=5Fhtml=5Fstart > i=5Fbuf=5Fsize - 10 )
+            {
+                int i=5Flen =3D psz=5Fhtml - psz=5Fhtml=5Fstart;
+
+                i=5Fbuf=5Fsize +=3D 100;
+                psz=5Fhtml=5Fstart =3D realloc( psz=5Fhtml=5Fstart, i=5Fbu=
f=5Fsize );
+                psz=5Fhtml =3D psz=5Fhtml=5Fstart + i=5Flen;
+                *psz=5Fhtml =3D '\0';
+            }
         }
-        i++;
+        strcpy( psz=5Fhtml, "</text>" );
+        psz=5Fhtml +=3D 7;
+
+        if( psz=5FtagStack[ 0 ] !=3D '\0' )
+        {
+            /* Not well formed -- kill everything */
+            free( psz=5Fhtml=5Fstart );
+            psz=5Fhtml=5Fstart =3D NULL;
+        }
+        else
+        {
+            /* Shrink the memory requirements */
+            psz=5Fhtml=5Fstart =3D realloc( psz=5Fhtml=5Fstart,  psz=5Fhtm=
l - psz=5Fhtml=5Fstart + 1 );
+        }
     }
-    psz=5Ftext[ i - i=5Fleft=5Fmoves ] =3D '\0';
+    return psz=5Fhtml=5Fstart;
 }
+
--=_mixed 007F9B40CA2572A9_=--

-- 
This is the vlc-devel mailing-list, see http://www.videolan.org/vlc/
To unsubscribe, please read http://developers.videolan.org/lists.html



More information about the vlc-devel mailing list