[vlc-devel] [PATCH 2/4] ytdl: access module for YoutubeDL

Thomas Guillem thomas at gllm.fr
Tue Sep 22 10:29:06 CEST 2020



On Tue, Sep 22, 2020, at 10:23, Rémi Denis-Courmont wrote:
> Hi,
> 
> For each HTTP URL, this checks if an extractor exists.
> 
> In the normal case, the URL is already a playable media and it won't match, so we will just fallback to the normal access. This takes some processing, just like the Lua parsers. But it's not even close to being as terrible as the 3 BD and DVD modules which systematically try to open and parse every file URI.

I didn't know that BD/DVD could slow down the access opening. We should open a ticket (if not already done) and try to improve it.

For information, lua parsers are terribly slow on android. We spent a lot of time trying to improve or just disable it for some case (mp3 parsing take 50ms without lua versus 1seconds with lua).

I would love to have youtube-dl on any platforms via your patches (but it's another story to put python3 on Android), but I'm also afraid of normal http URI slowdown (when ytdl parsing fails).


> 
> In the special case that it's a web page needing extraction, then it wouldn't have worked without the new module, so it's vain to even make a comparison.
> 
> 
> As for the security of YoutubeDL, it's out of scope. It's not even part of contribs. Besides, Python is far less prone to security issues than C. There are far more dangerous libraries that we ship and use and yet don't fuzz ourselves.
> 
> If you found a security issue with the code in the patch, please say so.
> 
> Le 22 septembre 2020 08:50:48 GMT+03:00, Steve Lhomme <robux4 at ycbcr.xyz> a écrit :
>> On 2020-09-21 18:02, Rémi Denis-Courmont wrote:
>>> This passes every HTTP(S) URL through YoutubeDL to extract playlists
>> 
>> That sounds like a terrible idea. What is the extra time and memory 
>> consumption added to every HTTP call ?
>> 
>> Also that means the python script becomes a very sensitive part of VLC. 
>> Is it going to be fuzzed ? What about the Youtube-DL that users may 
>> download from questionable locations, since we don't ship it (nor fuzz it) ?
>> 
>>> or media. If YoutubeDL fails, it falls back to the normal HTTP(S)
>>> access. This includes if YoutubeDL or Python 3 are not found.  NEWS                       |   1 +
>>>   modules/access/Makefile.am |   5 +
>>>   modules/access/ytdl.c      | 200 +++++++++++++++++++++++++++++++++++++
>>>   po/POTFILES.in             |   1 +
>>>   4 files changed, 207 insertions(+)
>>>   create mode 100644 modules/access/ytdl.c
>>> 
>>> diff --git a/NEWS b/NEWS
>>> index 9af6a77b8f..7eb1a72567 100644
>>> --- a/NEWS
>>> +++ b/NEWS
>>> @@ -65,6 +65,7 @@ Access:
>>>    * Audio CD data tracks are now correctly detected and skipped
>>>    * Deprecates Audio CD CDDB lookups in favor of more accurate Musicbrainz
>>>    * Improved CD-TEXT and added Shift-JIS encoding support
>>> + * Support for YoutubeDL (where available).
>>>   
>>>   Access output:
>>>    * Added support for the RIST (Reliable Internet Stream Transport) Protocol
>>> diff --git a/modules/access/Makefile.am b/modules/access/Makefile.am
>>> index 9cd3098fc2..f7dc76d6d3 100644
>>> --- a/modules/access/Makefile.am
>>> +++ b/modules/access/Makefile.am
>>> @@ -436,6 +436,11 @@ libaccess_mtp_plugin_la_LDFLAGS = $(AM_LDFLAGS) -rpath '$(accessdir)'
>>>   access_LTLIBRARIES += $(LTLIBaccess_mtp)
>>>   EXTRA_LTLIBRARIES += libaccess_mtp_plugin.la
>>>   
>>> +libaccess_ytdl_plugin_la_SOURCES = access/ytdl.c
>>> +if !HAVE_WIN32
>>> +access_LTLIBRARIES += libaccess_ytdl_plugin.la
>>> +endif
>>> +
>>>   ### SRT ###
>>>   
>>>   libaccess_srt_plugin_la_SOURCES = access/srt.c access/srt_common.c access/srt_common.h dummy.cpp
>>> diff --git a/modules/access/ytdl.c b/modules/access/ytdl.c
>>> new file mode 100644
>>> index 0000000000..4a47f4491e
>>> --- /dev/null
>>> +++ b/modules/access/ytdl.c
>>> @@ -0,0 +1,200 @@
>>> +/*****************************************************************************
>>> + * ytdl.c:
>>> + *****************************************************************************
>>> + * Copyright (C) 2019-2020 Rémi Denis-Courmont
>>> + *
>>> + * This program is free software; you can redistribute it and/or modify it
>>> + * under the terms of the GNU Lesser General Public License as published by
>>> + * the Free Software Foundation; either version 2.1 of the License, or
>>> + * (at your option) any later version.
>>> + *
>>> + * This program is distributed in the hope that it will be useful,
>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
>>> + * GNU Lesser General Public License for more details.
>>> + *
>>> + * You should have received a copy of the GNU Lesser General Public License
>>> + * along with this program; if not, write to the Free Software Foundation,
>>> + * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
>>> + *****************************************************************************/
>>> +
>>> +#ifdef HAVE_CONFIG_H
>>> +# include "config.h"
>>> +#endif
>>> +
>>> +#define PY_SSIZE_T_CLEAN
>>> +
>>> +#include <assert.h>
>>> +#include <errno.h>
>>> +#include <signal.h>
>>> +#include <stdlib.h>
>>> +
>>> +#include <vlc_common.h>
>>> +#include <vlc_access.h>
>>> +#include <vlc_fs.h>
>>> +#include <vlc_input_item.h>
>>> +#include <vlc_interrupt.h>
>>> +#include <vlc_plugin.h>
>>> +#include <vlc_spawn.h>
>>> +
>>> +
>>> +struct ytdl_sys {
>>> +    pid_t pid;
>>> +    int fd;
>>> +    char first_byte;
>>> +};
>>> +
>>> +static size_t readall(int fd, void *buf, size_t len)
>>> +{
>>> +    size_t total = 0;
>>> +
>>> +    while (len > 0 && !vlc_killed()) {
>>> +        ssize_t val = vlc_read_i11e(fd, buf, len);
>>> +
>>> +        if (val == 0)
>>> +            break; /* EOS */
>>> +
>>> +        if (val < 0) {
>>> +            if (errno != EINTR)
>>> +                break; /* fatal error */
>>> +            continue;
>>> +        }
>>> +
>>> +        buf = (char *)buf + val;
>>> +        len -= val;
>>> +        total += val;
>>> +    }
>>> +
>>> +    return total;
>>> +}
>>> +
>>> +static ssize_t Read(stream_t *s, void *buf, size_t len)
>>> +{
>>> +    struct ytdl_sys *sys = s->p_sys;
>>> +    ssize_t val = vlc_read_i11e(sys->fd, buf, len);
>>> +
>>> +    if (val < 0) {
>>> +        if (errno == EINTR || errno == EAGAIN)
>>> +            return -1;
>>> +
>>> +        msg_Err(s, "read error: %s", vlc_strerror_c(errno));
>>> +        val = 0;
>>> +    }
>>> +
>>> +    return val;
>>> +}
>>> +
>>> +static int Control(stream_t *s, int query, va_list args)
>>> +{
>>> +    switch (query)
>>> +    {
>>> +        case STREAM_CAN_SEEK:
>>> +        case STREAM_CAN_FASTSEEK:
>>> +        case STREAM_CAN_PAUSE:
>>> +        case STREAM_CAN_CONTROL_PACE:
>>> +            *va_arg(args, bool *) = false;
>>> +            break;
>>> +
>>> +        case STREAM_GET_TYPE:
>>> +            *va_arg(args, int *) = ITEM_TYPE_PLAYLIST;
>>> +            break;
>>> +
>>> +        case STREAM_GET_PTS_DELAY:
>>> +            *va_arg(args, vlc_tick_t *) =
>>> +                 VLC_TICK_FROM_MS(var_InheritInteger(s, "network-caching"));
>>> +            break;
>>> +
>>> +        default:
>>> +            return VLC_EGENERIC;
>>> +
>>> +    }
>>> +
>>> +    return VLC_SUCCESS;
>>> +}
>>> +
>>> +static ssize_t ReadFirstByte(stream_t *s, void *buf, size_t len)
>>> +{
>>> +    struct ytdl_sys *sys = s->p_sys;
>>> +
>>> +    if (unlikely(len == 0))
>>> +        return 0;
>>> +
>>> +    *(char *)buf = sys->first_byte;
>>> +    s->pf_read = Read;
>>> +    return 1;
>>> +}
>>> +
>>> +static void Close(vlc_object_t *obj)
>>> +{
>>> +    stream_t *s = (stream_t *)obj;
>>> +    struct ytdl_sys *sys = s->p_sys;
>>> +
>>> +    kill(sys->pid, SIGTERM);
>>> +    vlc_close(sys->fd);
>>> +    vlc_waitpid(sys->pid);
>>> +}
>>> +
>>> +static int Open(vlc_object_t *obj)
>>> +{
>>> +    stream_t *s = (stream_t *)obj;
>>> +    ssize_t val;
>>> +
>>> +    if (!var_InheritBool(s, "ytdl"))
>>> +        return VLC_EGENERIC;
>>> +
>>> +    struct ytdl_sys *sys = vlc_obj_malloc(obj, sizeof (*sys));
>>> +    if (unlikely(sys == NULL))
>>> +        return VLC_EGENERIC;
>>> +
>>> +    char *path = config_GetSysPath(VLC_PKG_DATA_DIR, "ytdl-extract.py");
>>> +    if (unlikely(path == NULL))
>>> +        return VLC_EGENERIC;
>>> +
>>> +    int fds[2];
>>> +
>>> +    if (vlc_pipe(fds)) {
>>> +        free(path);
>>> +        return VLC_EGENERIC;
>>> +    }
>>> +
>>> +    s->p_sys = sys;
>>> +    sys->fd = fds[0];
>>> +
>>> +    int fdv[] = { -1, fds[1], 2, -1 };
>>> +    const char *argv[] = { path, s->psz_url, NULL };
>>> +
>>> +    val = vlc_spawn(&sys->pid, path, fdv, argv);
>>> +    vlc_close(fds[1]);
>>> +
>>> +    if (val) {
>>> +        msg_Dbg(obj, "cannot start %s: %s", path, vlc_strerror_c(val));
>>> +        free(path);
>>> +        vlc_close(fds[0]);
>>> +        return VLC_EGENERIC;
>>> +    }
>>> +
>>> +    free(path);
>>> +
>>> +    if (readall(sys->fd, &sys->first_byte, 1) <= 0) {
>>> +        /* Location not handled */
>>> +        msg_Dbg(s, "cannot extract infos");
>>> +        Close(obj);
>>> +        return VLC_EGENERIC;
>>> +    }
>>> +
>>> +    s->pf_read = ReadFirstByte;
>>> +    s->pf_control = Control;
>>> +    return VLC_SUCCESS;
>>> +}
>>> +
>>> +vlc_module_begin()
>>> +    set_shortname("YT-DL")
>>> +    set_description("YoutubeDL")
>>> +    set_category(CAT_INPUT)
>>> +    set_subcategory(SUBCAT_INPUT_STREAM_FILTER)
>>> +    set_capability("access", 10)
>>> +    add_shortcut("http", "https")
>>> +    set_callbacks(Open, Close)
>>> +    add_bool("ytdl", true, N_("Enable YT-DL"), N_("Enable YT-DL"), true)
>>> +        change_safe()
>>> +vlc_module_end()
>>> diff --git a/po/POTFILES.in b/po/POTFILES.in
>>> index d5ce933408..9d49cb59d8 100644
>>> --- a/po/POTFILES.in
>>> +++ b/po/POTFILES.in
>>> @@ -212,6 +212,7 @@ modules/access/vcd/vcd.c
>>>   modules/access/vdr.c
>>>   modules/access/vnc.c
>>>   modules/access/wasapi.c
>>> +modules/access/ytdl.c
>>>   modules/access_output/dummy.c
>>>   modules/access_output/file.c
>>>   modules/access_output/http.c
>>> -- 
>>> 2.28.0vlc-devel mailing list
>>> To unsubscribe or modify your subscription options:
>>> https://mailman.videolan.org/listinfo/vlc-devel
>> vlc-devel mailing list
>> To unsubscribe or modify your subscription options:
>> https://mailman.videolan.org/listinfo/vlc-devel
> 
> -- 
> Envoyé de mon appareil Android avec Courriel K-9 Mail. Veuillez excuser ma brièveté. 
> _______________________________________________
> vlc-devel mailing list
> To unsubscribe or modify your subscription options:
> https://mailman.videolan.org/listinfo/vlc-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/vlc-devel/attachments/20200922/dfbe2588/attachment.html>


More information about the vlc-devel mailing list