[vlc-devel] [vlc-commits] Add a SAPI synthetizer for Windows

Sun Oct 11 23:20:57 CEST 2015

Hi,

On 10/11/2015 09:49 PM, Jean-Baptiste Kempf wrote:
> vlc | branch: master | Jean-Baptiste Kempf <jb at videolan.org> | Sun Oct 11 18:26:11 2015 +0200| [a7eb0f0aa5c3003535a091364160c918db97c4ec] | committer: Jean-Baptiste Kempf
>
> Add a SAPI synthetizer for Windows
>
> This is the work from Moti Zilberman, modified by me to build and
> integrate in-tree
>
> It's heavily inspired on the OS X one.
>
> Be careful: you need a very recent Mingw-W64 to hope to compile it.
>
> Ref #11893
>
>> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=a7eb0f0aa5c3003535a091364160c918db97c4ec
> ---
>
>   NEWS                              |    1 +
>   modules/MODULES_LIST              |    1 +
>   modules/text_renderer/Makefile.am |    6 +
>   modules/text_renderer/sapi.cpp    |  236 +++++++++++++++++++++++++++++++++++++
>   po/POTFILES.in                    |    1 +
>   5 files changed, 245 insertions(+)
>
> diff --git a/NEWS b/NEWS
> index 2023bc9..16ea9e3 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -163,6 +163,7 @@ Misc
>    * remove ZPL playlist format
>    * Update libVLC doxygen modules
>    * Add a text-to-speech renderer for subtitles on OS X/iOS
> + * Add a text-to-speech renderer for subtitles on Windows
>
>   Removed modules
>    * Atmo video filter
> diff --git a/modules/MODULES_LIST b/modules/MODULES_LIST
> index 472ad05..a34dc6e 100644
> --- a/modules/MODULES_LIST
> +++ b/modules/MODULES_LIST
> @@ -319,6 +319,7 @@ $Id$
>    * rv32: RV32 image format conversion module
>    * samplerate: Secret Rabbit Code (libsamplerate) audio resampler
>    * sap: Interface module to read SAP/SDP announcements
> + * sapi: Windows Text to Speech Synthetizer using the SAPI 5.1 API
>    * scale: Images rescaler
>    * scaletempo: Scale audio tempo in sync with playback rate
>    * scene: scene video filter
> diff --git a/modules/text_renderer/Makefile.am b/modules/text_renderer/Makefile.am
> index 43ccb67..7634a4f 100644
> --- a/modules/text_renderer/Makefile.am
> +++ b/modules/text_renderer/Makefile.am
> @@ -42,6 +42,12 @@ libnsspeechsynthesizer_plugin_la_LDFLAGS = -Wl,-framework,Cocoa
>   text_LTLIBRARIES += libnsspeechsynthesizer_plugin.la
>   endif
>
> +libsapi_plugin_la_SOURCES = text_renderer/sapi.cpp
> +libsapi_plugin_la_LIBADD = -lole32
> +if HAVE_WIN32
> +text_LTLIBRARIES += libsapi_plugin.la
> +endif
> +
>   libsvg_plugin_la_SOURCES = text_renderer/svg.c
>   libsvg_plugin_la_CPPFLAGS = $(AM_CPPFLAGS) $(SVG_CFLAGS)
>   libsvg_plugin_la_LDFLAGS = $(AM_LDFLAGS) -rpath '$(textdir)'
> diff --git a/modules/text_renderer/sapi.cpp b/modules/text_renderer/sapi.cpp
> new file mode 100644
> index 0000000..5df8dcf
> --- /dev/null
> +++ b/modules/text_renderer/sapi.cpp
> @@ -0,0 +1,236 @@
> +/*****************************************************************************
> + * sapi.cpp: Simple text to Speech renderer for Windows, based on SAPI
> + *****************************************************************************
> + * Copyright (c) 2015 Moti Zilberman
> + *
> + * Authors: Moti Zilberman
> + *          Jean-Baptiste Kempf
> + *
> + * The MIT License (MIT)
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in all
> + * copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + *****************************************************************************/
> +
> +#ifdef HAVE_CONFIG_H
> +# include "config.h"
> +#endif
> +
> +/* VLC core API headers */
> +#include <vlc_common.h>
> +#include <vlc_plugin.h>
> +#include <vlc_filter.h>
> +#include <vlc_charset.h>
> +
> +#define INITGUID
> +
> +#include <windows.h>
> +#include <sapi.h>
> +#include <sphelper.h>
> +
> +static int Create (vlc_object_t *);
> +static void Destroy(vlc_object_t *);
> +static int RenderText(filter_t *,
> +                      subpicture_region_t *,
> +                      subpicture_region_t *,
> +                      const vlc_fourcc_t *);
> +
> +vlc_module_begin ()
> + set_description(N_("Speech synthesis for Windows"))
> +
> + set_category(CAT_VIDEO)
> + set_subcategory(SUBCAT_VIDEO_SUBPIC)
> +
> + set_capability("text renderer", 0)
> + set_callbacks(Create, Destroy)
> + add_integer("sapi-voice", -1, "Voice Index", "Voice index", false)
> +vlc_module_end ()
> +
> +struct filter_sys_t
> +{
> +    ISpVoice* cpVoice;
> +    char* lastString;
> +};
> +
> +/* MTA functions */
> +static int TryEnterMTA(vlc_object_t *obj)
> +{
> +    HRESULT hr = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
> +    if (unlikely(FAILED(hr)))
> +    {
> +        msg_Err (obj, "cannot initialize COM (error 0x%lx)", hr);
> +        return -1;
> +    }
> +    return 0;
> +}
> +#define TryEnterMTA(o) TryEnterMTA(VLC_OBJECT(o))
> +
> +static void EnterMTA(void)
> +{
> +    HRESULT hr = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
> +    if (unlikely(FAILED(hr)))
> +        abort();
> +}
> +
> +static void LeaveMTA(void)
> +{
> +    CoUninitialize();
> +}
> +
> +static int Create (vlc_object_t *p_this)
> +{
> +    filter_t *p_filter = (filter_t *)p_this;
> +    filter_sys_t *p_sys;
> +    HRESULT hr;
> +
> +    if (TryEnterMTA(p_this))
> +        return VLC_EGENERIC;
> +
> +    p_filter->p_sys = p_sys = (filter_sys_t*) malloc(sizeof(filter_sys_t));
> +    if (!p_sys)
> +        goto error;
> +
> +    p_sys->cpVoice = nullptr;
> +    p_sys->lastString = nullptr;

Do you really want to require C++11 ? As far as I can see, nullptr is 
pretty much the only C++ feature used here (beside methods)

> +
> +    hr = CoCreateInstance(CLSID_SpVoice, nullptr, CLSCTX_INPROC_SERVER, IID_ISpVoice, (void**) &p_sys->cpVoice);
> +    if (SUCCEEDED(hr)) {
> +        ISpObjectToken*        cpVoiceToken = nullptr;
> +        IEnumSpObjectTokens*   cpEnum = nullptr;
> +        ULONG ulCount = 0;
> +
> +        hr = SpEnumTokens(SPCAT_VOICES, nullptr, nullptr, &cpEnum);
> +        if (SUCCEEDED(hr))
> +        {
> +            // Get the number of voices.
> +            hr = cpEnum->GetCount(&ulCount);
> +            if (SUCCEEDED (hr))
> +            {
> +                int voiceIndex = var_InheritInteger(p_this, "sapi-voice");
> +                if (voiceIndex > -1)
> +                {
> +                    if ((unsigned)voiceIndex <= ulCount) {
> +                        hr = cpEnum->Item(voiceIndex, &cpVoiceToken);
> +                        if (SUCCEEDED(hr)) {
> +                            hr = p_sys->cpVoice->SetVoice(cpVoiceToken);
> +                            if (SUCCEEDED(hr)) {
> +                                msg_Dbg(p_this, "Selected voice %d", voiceIndex);
> +                            }
> +                            else {
> +                                msg_Err(p_this, "Failed to set voice %d", voiceIndex);
> +                            }
> +                            cpVoiceToken->Release();
> +                            cpVoiceToken = nullptr;
> +                        }
> +                    }
> +                    else
> +                        msg_Err(p_this, "Voice index exceeds available count");
> +                }
> +            }
> +            cpEnum->Release();
> +            cpEnum = nullptr;

Why the assignment?

> +
> +            /* Set Output */
> +            hr = p_sys->cpVoice->SetOutput(nullptr, TRUE);
> +        }
> +    }
> +    else
> +    {
> +        msg_Err(p_filter, "Could not create SpVoice");
> +        goto error;
> +    }
> +
> +    LeaveMTA();
> +
> +    p_filter->pf_render = RenderText;
> +
> +    return VLC_SUCCESS;
> +
> +error:
> +    LeaveMTA();
> +    free(p_sys);
> +    return VLC_EGENERIC;
> +}
> +
> +static void Destroy(vlc_object_t *p_this)
> +{
> +    filter_t *p_filter = (filter_t *)p_this;
> +    filter_sys_t *p_sys = p_filter->p_sys;
> +
> +    if (p_sys->cpVoice) {
> +        p_sys->cpVoice->Release();
> +        p_sys->cpVoice = nullptr;

Same question here and below about the assignment

> +    }
> +
> +    if (p_sys->lastString) {

Unnecessary if

> +        free(p_sys->lastString);
> +        p_sys->lastString = nullptr;
> +    }
> +
> +    free(p_sys);
> +}
> +
> +static int RenderText(filter_t *p_filter,
> +        subpicture_region_t *p_region_out,
> +        subpicture_region_t *p_region_in,
> +        const vlc_fourcc_t *p_chroma_list)
> +{
> +    VLC_UNUSED(p_region_out);
> +    VLC_UNUSED(p_chroma_list);

You could omit the parameter name instead

> +
> +    filter_sys_t *p_sys = p_filter->p_sys;
> +    text_segment_t *p_segment = p_region_in->p_text;
> +
> +    if (!p_segment)
> +        return VLC_EGENERIC;
> +
Is that really an error? If not, this should probably be merged with the 
loop below

> +    for (const text_segment_t *s = p_segment; s != nullptr; s = s->p_next ) {
> +        if (!s->psz_text )
> +            continue;
> +
> +        if (strlen(s->psz_text) == 0)
> +            continue;
> +
> +        try {
> +            if (p_sys->lastString && !strcmp(p_sys->lastString, s->psz_text))
> +                continue;
> +
> +            if (!strcmp(s->psz_text, "\n"))
> +                continue;
> +
> +            p_sys->lastString = strdup(s->psz_text);

This appears to be leaking

> +            if (p_sys->lastString) {
> +                msg_Dbg(p_filter, "Speaking '%s'", s->psz_text);
> +
> +                EnterMTA();
> +                wchar_t* wideText = ToWide(s->psz_text);
> +                HRESULT hr = p_sys->cpVoice->Speak(wideText, SPF_ASYNC, nullptr);
> +                free(wideText);
> +                if (!SUCCEEDED(hr)) {
> +                    msg_Err(p_filter, "Speak() error");
> +                }
> +                LeaveMTA();
> +            }
> +        }
> +        catch (...) {
> +            msg_Err(p_filter, "Caught an exception!");

If you're assuming that the underlying code may throw exceptions, you 
should enclose your allocations and Enter/LeaveMTA calls in a RAII object.

> +        }
> +    }
> +
> +    return VLC_SUCCESS;
> +}
> diff --git a/po/POTFILES.in b/po/POTFILES.in
> index 49e4d25..8077a0c 100644
> --- a/po/POTFILES.in
> +++ b/po/POTFILES.in
> @@ -1063,6 +1063,7 @@ modules/stream_out/transcode/transcode.c
>   modules/text_renderer/freetype.c
>   modules/text_renderer/nsspeechsynthesizer.m
>   modules/text_renderer/quartztext.c
> +modules/text_renderer/sapi.cpp
>   modules/text_renderer/svg.c
>   modules/text_renderer/tdummy.c
>   modules/text_renderer/win32text.c
>
> _______________________________________________
> vlc-commits mailing list
> vlc-commits at videolan.org
> https://mailman.videolan.org/listinfo/vlc-commits
>