[vlc-devel] [PATCH] libass: srt support

Mathieu Velten matmaul at gmail.com
Sun Sep 11 23:36:44 CEST 2011


Thanks for your review.
Comments below :

2011/9/11 Jean-Baptiste Kempf <jb at videolan.org>:
> On Sun, Sep 11, 2011 at 03:38:10PM +0200, Mathieu Velten wrote :
>> This is a patch which add srt parsing to the libass renderer (using
>> parsing code from mplayer) : it allows to handle correctly all the TAG
>> srts out there by converting "standard" HTML-like tags into ass tags.
>
> Hmm, ok.
>
>> Since libass has higher priority and it now supports srt it becomes
>> the default decoder for srt, I don't know if this is ok for you.
> libass costs a lot more CPU than the normal freetype one, so I am not
> sure...
>
>
>> +/* Default fonts */
>> +#ifdef __APPLE__
>> +# define FC_DEFAULT_FONT "Arial Black"
>> +#elif defined( WIN32 )
>> +# define FC_DEFAULT_FONT "Arial"
>> +#elif defined( HAVE_MAEMO )
>> +# define FC_DEFAULT_FONT "Nokia Sans"
>> +#else
>> +# define FC_DEFAULT_FONT "Sans"
>> +#endif
>> +
>>  /*****************************************************************************
>>   * Module descriptor
>>   *****************************************************************************/
>>  static int  Create ( vlc_object_t * );
>>  static void Destroy( vlc_object_t * );
>>
>> +#ifdef HAVE_FONTCONFIG
>> +#define FONT_LONGTEXT N_("Font family for the font you want to use")
>> +#else
>> +#define FONT_LONGTEXT N_("Fontfile for the font you want to use")
>> +#endif
>> +
>> +#define FONT_TEXT N_("Font")
>> +#define LINESPACING_TEXT N_("Line spacing")
>> +#define LINESPACING_LONGTEXT N_("")
>> +#define BORDERSTYLE_TEXT N_("Border style")
>> +#define BORDERSTYLE_LONGTEXT N_("")
>> +#define OUTLINE_TEXT N_("Outline")
>> +#define OUTLINE_LONGTEXT N_("")
>> +#define SHADOW_TEXT N_("Shadow")
>> +#define SHADOW_LONGTEXT N_("")
>> +#define LEFTMARGIN_TEXT N_("Left margin")
>> +#define RIGHTMARGIN_TEXT N_("Right margin")
>> +#define VERTICALMARGIN_TEXT N_("Vertical margin")
>> +#define MARGIN_LONGTEXT N_("")
>> +#define VERTICALALIGNMENT_TEXT N_("Vertical alignment")
>> +#define HORIZONTALALIGNMENT_TEXT N_("Horizontal alignment")
>> +#define ALIGNMENT_LONGTEXT N_("")
>> +#define BOLD_TEXT N_("Bold")
>> +#define ITALIC_TEXT N_("Italic")
>> +#define FONTSIZE_TEXT N_("Font size")
>> +#define FONTSIZE_LONGTEXT N_("")
>> +#define TEXTCOLOR_TEXT N_("Text color")
>> +#define TEXTTRANSPARENCY_TEXT N_("Text transparency")
>> +#define BORDERCOLOR_TEXT N_("Border color")
>> +#define BORDERTRANSPARENCY_TEXT N_("Border transparency")
>> +#define SHADOWCOLOR_TEXT N_("Shadow color")
>> +#define SHADOWTRANSPARENCY_TEXT N_("Shadow transparency")
>> +#define COLOR_LONGTEXT N_("")
>> +#define TRANSPARENCY_LONGTEXT N_("0 = totally opaque, 255 = transparent")
>> +#define FORCE_STYLE_TEXT N_("Override default style for ASS subtitles")
>> +
>> +#define BORDER_OUTLINE     1
>> +#define BORDER_OPAQUE_BOX  3
>> +
>> +static int const pi_border_style[] = { BORDER_OUTLINE, BORDER_OPAQUE_BOX };
>> +static const char *const ppsz_border_style_text[] = {
>> +    N_("Outline"), N_("Opaque box") };
>> +
>> +static int const pi_vertical_alignment[] = { VALIGN_TOP, VALIGN_CENTER, VALIGN_SUB };
>> +static const char *const ppsz_vertical_alignment_text[] = {
>> +    N_("Top"), N_("Center"), N_("Sub") };
>> +
>> +static int const pi_horizontal_alignment[] = { HALIGN_LEFT, HALIGN_CENTER, HALIGN_RIGHT };
>> +static const char *const ppsz_horizontal_alignment_text[] = {
>> +    N_("Left"), N_("Center"), N_("Right") };
>> +
>> +static const int pi_color_values[] = {
>> +  0x000000, 0x808080, 0xC0C0C0, 0xFFFFFF, 0x800000,
>> +  0xFF0000, 0xFF00FF, 0xFFFF00, 0x008080, 0x008000, 0x008080,
>> +  0x00FF00, 0x800080, 0x000080, 0x0000FF, 0x00FFFF };
>> +
>> +static const char *const ppsz_color_descriptions[] = {
>> +  N_("Black"), N_("Gray"), N_("Silver"), N_("White"), N_("Maroon"),
>> +  N_("Red"), N_("Fuchsia"), N_("Yellow"), N_("Olive"), N_("Green"), N_("Teal"),
>> +  N_("Lime"), N_("Purple"), N_("Navy"), N_("Blue"), N_("Aqua") };
>> +
>> +
>>  vlc_module_begin ()
>>      set_shortname( N_("Subtitles (advanced)"))
>>      set_description( N_("Subtitle renderers using libass") )
>>      set_capability( "decoder", 100 )
>>      set_category( CAT_INPUT )
>>      set_subcategory( SUBCAT_INPUT_SCODEC )
>> +
>> +    add_font( "libass-font", FC_DEFAULT_FONT, FONT_TEXT, FONT_LONGTEXT,
>> +              false )
>> +
>> +    add_integer( "libass-fontsize", 18, FONTSIZE_TEXT,
>> +                 FONTSIZE_LONGTEXT, false )
>> +
>> +    add_bool( "libass-bold", true, BOLD_TEXT, BOLD_TEXT, false )
>> +
>> +    add_bool( "libass-italic", false, ITALIC_TEXT, ITALIC_TEXT, false )
>> +
>> +    /* hook to the color values list, with default 0xffffff = white */
>> +    add_integer_with_range( "libass-text-color", 0xFFFFFF, 0x000000, 0xFFFFFF, TEXTCOLOR_TEXT,
>> +                 COLOR_LONGTEXT, false )
>> +        change_integer_list( pi_color_values, ppsz_color_descriptions )
>> +    add_integer_with_range( "libass-text-transparency", 0x00, 0x00, 0xFF, TEXTTRANSPARENCY_TEXT,
>> +                 TRANSPARENCY_LONGTEXT, false )
>> +    add_integer_with_range( "libass-border-color", 0x000000, 0x000000, 0xFFFFFF, BORDERCOLOR_TEXT,
>> +                 COLOR_LONGTEXT, false )
>> +        change_integer_list( pi_color_values, ppsz_color_descriptions )
>> +    add_integer_with_range( "libass-border-transparency", 0x00, 0x00, 0xFF, BORDERTRANSPARENCY_TEXT,
>> +                 TRANSPARENCY_LONGTEXT, false )
>> +    add_integer_with_range( "libass-shadow-color", 0x000000, 0x000000, 0xFFFFFF, SHADOWCOLOR_TEXT,
>> +                 COLOR_LONGTEXT, false )
>> +        change_integer_list( pi_color_values, ppsz_color_descriptions )
>> +    add_integer_with_range( "libass-shadow-transparency", 0x80, 0x00, 0xFF, SHADOWTRANSPARENCY_TEXT,
>> +                 TRANSPARENCY_LONGTEXT, false )
>> +
>> +    add_integer( "libass-border-style", 2, BORDERSTYLE_TEXT,
>> +                 BORDERSTYLE_LONGTEXT, false )
>> +        change_integer_list( pi_border_style, ppsz_border_style_text )
>> +
>> +    add_float_with_range( "libass-outline", 1.0, 0.0, 4.0,
>> +                          OUTLINE_TEXT, OUTLINE_LONGTEXT, false )
>> +
>> +    add_float_with_range( "libass-shadow", 1.0, 0.0, 4.0,
>> +                          SHADOW_TEXT, SHADOW_LONGTEXT, false )
>> +
>> +    add_integer( "libass-line-spacing", 0, LINESPACING_TEXT,
>> +                 LINESPACING_LONGTEXT, false )
>> +
>> +    add_integer( "libass-left-margin", 20, LEFTMARGIN_TEXT,
>> +                 MARGIN_LONGTEXT, false )
>> +    add_integer( "libass-right-margin", 20, RIGHTMARGIN_TEXT,
>> +                 MARGIN_LONGTEXT, false )
>> +    add_integer( "libass-vertical-margin", 20, VERTICALMARGIN_TEXT,
>> +                 MARGIN_LONGTEXT, false )
>> +
>> +    add_integer( "libass-horizontal-alignment", HALIGN_CENTER, HORIZONTALALIGNMENT_TEXT,
>> +                 ALIGNMENT_LONGTEXT, false )
>> +        change_integer_list( pi_horizontal_alignment, ppsz_horizontal_alignment_text )
>> +
>> +    add_integer( "libass-vertical-alignment", VALIGN_SUB, VERTICALALIGNMENT_TEXT,
>> +                 ALIGNMENT_LONGTEXT, false )
>> +        change_integer_list( pi_vertical_alignment, ppsz_vertical_alignment_text )
>> +
>> +    add_bool( "libass-force-style", false, FORCE_STYLE_TEXT, FORCE_STYLE_TEXT, false )
>> +
>
> NO WAY.
> Duplicating options that are used in freetype is just a no-go.
>

Ok, I will reuse freetype variables.

>
>> +    p_sys->s_fontfamily = var_CreateGetString( p_dec, "libass-font" );
>> +    p_sys->i_font_size = var_CreateGetInteger( p_dec, "libass-fontsize" );
>> +    p_sys->b_bold = var_CreateGetBool( p_dec, "libass-bold" );
>> +    p_sys->b_italic = var_CreateGetBool( p_dec, "libass-italic" );
>> +    p_sys->i_text_color = var_CreateGetInteger( p_dec, "libass-text-color" ) << 8 | var_CreateGetInteger( p_dec, "libass-text-transparency" );
>> +    p_sys->i_border_color = var_CreateGetInteger( p_dec, "libass-border-color" ) << 8 | var_CreateGetInteger( p_dec, "libass-border-transparency" );
>> +    p_sys->i_shadow_color = var_CreateGetInteger( p_dec, "libass-shadow-color" ) << 8 | var_CreateGetInteger( p_dec, "libass-shadow-transparency" );
>> +    p_sys->i_border_style = var_CreateGetInteger( p_dec, "libass-border-style" );
>> +    p_sys->f_outline = var_CreateGetFloat( p_dec, "libass-outline" );
>> +    p_sys->f_shadow = var_CreateGetFloat( p_dec, "libass-shadow" );
>> +    p_sys->i_left_margin = var_CreateGetInteger( p_dec, "libass-left-margin" );
>> +    p_sys->i_right_margin = var_CreateGetInteger( p_dec, "libass-right-margin" );
>> +    p_sys->i_vertical_margin = var_CreateGetInteger( p_dec, "libass-vertical-margin" );
>> +    p_sys->i_horizontal_alignment = var_CreateGetInteger( p_dec, "libass-horizontal-alignment" );
>> +    p_sys->i_vertical_alignment = var_CreateGetInteger( p_dec, "libass-vertical-alignment" );
>> +    p_sys->b_force_style = var_CreateGetBool( p_dec, "libass-force-style" );
>
> THis shouldn't be var_CreateGet* I think.

I don't really get the difference between inherit createGet and get.

>
>> +#define FFMIN(a,b) ((a) > (b) ? (b) : (a))
>
> Seriously? not __MIN ?

See below.

>
>> +    int i;
>> +    ASS_Event* evt = track->events + track->n_events - 1;
>> +
>> +    for (i = 0; i<track->n_events - 1; ++i) // ignoring last event, it is the one we are comparing with
>
> VLC is C99.

I tried to keep the code as close as possible from the mplayer code to
ease a possible update. I will update my patch accordingly.

>
>> +/*
>> + *      SubRip
>> + *
>> + *      Support basic tags (italic, bold, underline, strike-through)
>> + *      and font tag with size, color and face attributes.
>> + *
>> + */
>> +
>> +struct font_tag {
>> +    struct bstr face;
>> +    int size;
>> +    uint32_t color;
>> +};
>> +
>> +static const struct tag_conv {
>> +    const char *from;
>> +    const char *to;
>> +} subrip_basic_tags[] = {
>> +    {"<i>", "{\\i1}"}, {"</i>", "{\\i0}"},
>> +    {"<b>", "{\\b1}"}, {"</b>", "{\\b0}"},
>> +    {"<u>", "{\\u1}"}, {"</u>", "{\\u0}"},
>> +    {"<s>", "{\\s1}"}, {"</s>", "{\\s0}"},
>> +/* don't escape {} because it breaks ass in srt tags
>> +    {"{", "\\{"}, {"}", "\\}"}, */
>> +    {"\n", "\\N"}
>> +};
>> +
>> +static const struct {
>> +    const char *s;
>> +    uint32_t v;
>> +} subrip_web_colors[] = {
>> +    /* 16 named HTML colors in BGR format */
>> +    {"red",     0x0000ff}, {"blue",   0xff0000}, {"lime",   0x00ff00},
>> +    {"aqua",    0xffff00}, {"purple", 0x800080}, {"yellow", 0x00ffff},
>> +    {"fuchsia", 0xff00ff}, {"white",  0xffffff}, {"gray",   0x808080},
>> +    {"maroon",  0x000080}, {"olive",  0x008080}, {"black",  0x000000},
>> +    {"silver",  0xc0c0c0}, {"teal",   0x808000}, {"green",  0x008000},
>> +    {"navy",    0x800000}
>> +};
>
>
> Are you sure there are no regressions with the SubRip test suite?

I don't know what this is and where to find it.

>
>> +void subassconvert_subrip(const char *orig, char *dest, size_t dest_buffer_size)
>> +{
>> +    /* line is not const to avoid warnings with strtol, etc.
>> +     * orig content won't be changed */
>> +    char *line = (char *)orig;
>> +    struct line new_line = {
>> +        .buf     = dest,
>> +        .bufsize = dest_buffer_size,
>> +    };
>> +    struct font_tag font_stack[SUBRIP_MAX_STACKED_FONT_TAGS];
>> +    int sp = 0;
>> +
>> +    font_stack[0] = (struct font_tag){}; // type with all defaults
>> +    while (*line && new_line.len < new_line.bufsize - 1) {
>> +        char *orig_line = line;
>> +        int i;
>> +
>> +        for (i = 0; i < FF_ARRAY_ELEMS(subrip_basic_tags); i++) {
>> +            const struct tag_conv *tag = &subrip_basic_tags[i];
>> +            int from_len = strlen(tag->from);
>> +            if (strncmp(line, tag->from, from_len) == 0) {
>> +                append_text(&new_line, "%s", tag->to);
>> +                line += from_len;
>> +            }
>> +        }
>> +
>> +        if (strncmp(line, "</font>", 7) == 0) {
>> +            /* Closing font tag */
>> +            line += 7;
>> +
>> +            if (sp > 0) {
>> +                struct font_tag *tag      = &font_stack[sp];
>> +                struct font_tag *last_tag = &tag[-1];
>> +                sp--;
>> +
>> +                if (tag->size) {
>> +                    if (!last_tag->size)
>> +                        append_text(&new_line, "{\\fs}");
>> +                    else if (last_tag->size != tag->size)
>> +                        append_text(&new_line, "{\\fs%d}", last_tag->size);
>> +                }
>> +
>> +                if (tag->color & SUBRIP_FLAG_COLOR) {
>> +                    if (!(last_tag->color & SUBRIP_FLAG_COLOR))
>> +                        append_text(&new_line, "{\\c}");
>> +                    else if (last_tag->color != tag->color)
>> +                        append_text(&new_line, "{\\c&H%06X&}",
>> +                                    last_tag->color & 0xffffff);
>> +                }
>> +
>> +                if (tag->face.len) {
>> +                    if (!last_tag->face.len)
>> +                        append_text(&new_line, "{\\fn}");
>> +                    else if (bstrcmp(last_tag->face, tag->face) != 0)
>> +                        append_text(&new_line, "{\\fn%.*s}",
>> +                                    BSTR_P(last_tag->face));
>> +                }
>> +            }
>> +        } else if (strncmp(line, "<font ", 6) == 0
>> +                   && sp + 1 < FF_ARRAY_ELEMS(font_stack)) {
>> +            /* Opening font tag */
>> +            char *potential_font_tag_start = line;
>> +            int len_backup = new_line.len;
>> +            struct font_tag *tag = &font_stack[sp + 1];
>> +            int has_valid_attr = 0;
>> +            int has_comma = 0;
>> +
>> +            *tag = tag[-1]; // keep values from previous tag
>> +            line += 6;
>> +
>> +            while (*line && *line != '>') {
>> +                if ((has_comma = strncmp(line, "size=\"", 6) == 0) || strncmp(line, "size=", 5) == 0) {
>> +                    line += 5;
>> +                    if (has_comma)
>> +                        line++;
>> +                    tag->size = strtol(line, &line, 10);
>> +                    if ((*line != '"' && *line != ' ' && *line != '>') || !tag->size)
>> +                        break;
>> +                    append_text(&new_line, "{\\fs%d}", tag->size);
>> +                    has_valid_attr = 1;
>> +                } else if ((has_comma = strncmp(line, "color=\"", 7) == 0) || strncmp(line, "color=", 6) == 0) {
>> +                    line += 6;
>> +                    if (has_comma)
>> +                        line++;
>> +                    if (*line == '#') {
>> +                        // #RRGGBB format
>> +                        line++;
>> +                        tag->color = strtol(line, &line, 16) & 0x00ffffff;
>> +                        if (*line != '"' && *line != ' ' && *line != '>')
>> +                            break;
>> +                        tag->color = ((tag->color & 0xff) << 16) |
>> +                                      (tag->color & 0xff00) |
>> +                                     ((tag->color & 0xff0000) >> 16) |
>> +                                     SUBRIP_FLAG_COLOR;
>> +                    } else {
>> +                        // Standard web colors
>> +                        int i, len = indexof(line, '"');
>> +                        if (len <= 0)
>> +                            len = indexof(line, ' ');
>> +                        if (len <= 0)
>> +                            len = indexof(line, '>');
>> +                        if (len <= 0)
>> +                            break;
>> +                        for (i = 0; i < FF_ARRAY_ELEMS(subrip_web_colors); i++) {
>> +                            const char *color = subrip_web_colors[i].s;
>> +                            if (strlen(color) == len
>> +                                && strncasecmp(line, color, len) == 0) {
>> +                                tag->color = SUBRIP_FLAG_COLOR | subrip_web_colors[i].v;
>> +                                break;
>> +                            }
>> +                        }
>> +
>> +                        line += len;
>> +
>> +                        if (i == FF_ARRAY_ELEMS(subrip_web_colors)) {
>> +                            /* mp_msg(MSGT_SUBREADER, MSGL_WARN,
>> +                                   MSGTR_SUBTITLES_SubRip_UnknownFontColor, orig); */
>> +                            append_text(&new_line, "{\\c}");
>> +                            continue;
>> +                        }
>> +                    }
>> +                    append_text(&new_line, "{\\c&H%06X&}", tag->color & 0xffffff);
>> +                    has_valid_attr = 1;
>> +                } else if ((has_comma = strncmp(line, "face=\"", 6) == 0) || strncmp(line, "face=", 5) == 0) {
>> +                    /* Font face attribute */
>> +                    int len;
>> +                    line += 5;
>> +                    if (has_comma)
>> +                        line++;
>> +                    len = indexof(line, '"');
>> +                    if (len <= 0)
>> +                        len = indexof(line, ' ');
>> +                    if (len <= 0)
>> +                        len = indexof(line, '>');
>> +                    if (len <= 0)
>> +                        break;
>> +                    tag->face.start = line;
>> +                    tag->face.len   = len;
>> +                    line += len;
>> +                    append_text(&new_line, "{\\fn%.*s}", BSTR_P(tag->face));
>> +                    has_valid_attr = 1;
>> +                }
>> +                line++;
>> +            }
>> +
>> +            if (!has_valid_attr || *line != '>') { /* Not valid font tag */
>> +                line = potential_font_tag_start;
>> +                new_line.len = len_backup;
>> +            } else {
>> +                sp++;
>> +                line++;
>> +            }
>> +        }
>> +
>> +        /* Tag conversion code didn't match */
>> +        if (line == orig_line)
>> +            new_line.buf[new_line.len++] = *line++;
>> +    }
>> +    new_line.buf[new_line.len] = 0;
>> +}
>
> Has this code been fuzzed?

Not by me, I guess it has been done before inclusion in mplayer.
How can I do that easily ? Isolate the function and launch it on
random generated string ?



More information about the vlc-devel mailing list