[vlc-devel] [PATCH 1/2] vout/opengl: add direct rendering support (OpenGL 4.4)

Hugo Beauzée-Luyssen hugo at beauzee.fr
Tue Jan 3 17:18:57 CET 2017


On 01/03/2017 04:45 PM, Thomas Guillem wrote:
> This commit adds support for direct rendering with YUV/RGB software chromas.
> This is done using Pixel Buffer Object (PBO, A Buffer Object that is used for
> asynchronous pixel transfer operations) [1][2]. PBO are present since a long
> time for OpenGL and since 3.0 for OpenGLES.
>
> But there is a problem, VLC software decoders and video filters might need to
> read pictures buffers while they're being displayed. Therefore, the basic use
> case of PBOs can't work (since you need to unmap the buffer before displaying
> it).
>
> To solve this issue, we need to use persistent mapped buffers[3]. This can be
> done using the glBufferStorage() function with the GL_MAP_PERSISTENT_BIT flag.
>
> Unfortunately, this new API is only present since OpenGL 4.4 and as an
> extension since OpenGLES 3.1 (so no Android, macos and ios support).
>
> References:
> [1]: https://www.khronos.org/opengl/wiki/Pixel_Buffer_Object
> [2]: http://www.songho.ca/opengl/gl_pbo.html
> [3]: https://www.khronos.org/opengl/wiki/Buffer_Object_Streaming
> ---
>  NEWS                                      |   1 +
>  modules/video_output/opengl/converters.c  | 300 +++++++++++++++++++++++++++++-
>  modules/video_output/opengl/internal.h    |  22 +++
>  modules/video_output/opengl/vout_helper.c |  24 +++
>  4 files changed, 340 insertions(+), 7 deletions(-)
>
> diff --git a/NEWS b/NEWS
> index 8aba9cfc4d..350098fcbe 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -150,6 +150,7 @@ Video ouput:
>   * EFL Evas video output with Tizen TBM Surface support
>   * New OpenGL provider for Windows
>   * Drop OpenGL 1.x and OpenGL ES 1 support
> + * Direct rendering with OpenGL (starting OpenGL 4.4)
>
>  Text renderer:
>   * CTL support through Harfbuzz in the Freetype module
> diff --git a/modules/video_output/opengl/converters.c b/modules/video_output/opengl/converters.c
> index 8b6b4b40a6..f278bb96ae 100644
> --- a/modules/video_output/opengl/converters.c
> +++ b/modules/video_output/opengl/converters.c
> @@ -22,10 +22,11 @@
>  # include "config.h"
>  #endif
>
> -#include <vlc_memory.h>
> -
>  #include <assert.h>
> +#include <limits.h>
> +#include <stdlib.h>
>
> +#include <vlc_memory.h>
>  #include "internal.h"
>
>  #ifndef GL_RED
> @@ -35,6 +36,30 @@
>  #define GL_R16 0
>  #endif
>
> +#ifdef VLCGL_HAS_PBO
> +# ifndef GL_UNPACK_ROW_LENGTH
> +#  error "PBO without GL_UNPACK_ROW_LENGTH"

Shouldn't this be done in opengl/internal.h to conditionally enable PBO 
support instead?

> +# endif
> +#endif
> +
> +#ifndef NDEBUG
> +static __thread bool dbg_is_glthread = false;

I'm not sure this plays well on all compilers

> +#define ASSERT_GLTHREAD() assert(dbg_is_glthread)
> +#else
> +#define ASSERT_GLTHREAD()
> +#endif
> +
> +#ifdef VLCGL_HAS_PBO
> +struct picture_sys_t
> +{
> +    const opengl_tex_converter_t *tc;
> +    GLuint      buffers[PICTURE_PLANE_MAX];
> +    size_t      bytes[PICTURE_PLANE_MAX];

Wouldn't it be clearer to have a struct { GLuint, size_t 
}[PICTURE_PLANE_MAX] ?

> +    GLsync      fence;
> +    unsigned    index;
> +};
> +#endif
> +
>  struct priv
>  {
>      GLint  tex_internal;
> @@ -45,6 +70,12 @@ struct priv
>      void * texture_temp_buf;
>      size_t texture_temp_buf_size;
>  #endif
> +#ifdef VLCGL_HAS_PBO
> +    struct {
> +        picture_t *pics[VLCGL_PICTURE_MAX];
> +        unsigned long long list;
> +    } ongpu;
> +#endif
>  };
>
>  struct yuv_priv
> @@ -53,6 +84,242 @@ struct yuv_priv
>      GLfloat local_value[16];
>  };
>
> +#ifdef VLCGL_HAS_PBO
> +static int
> +pbo_map(const opengl_tex_converter_t *tc, picture_t *pic)
> +{
> +    picture_sys_t *picsys = pic->p_sys;
> +
> +    tc->api->GenBuffers(pic->i_planes, picsys->buffers);
> +
> +    const GLbitfield access = GL_MAP_READ_BIT | GL_MAP_WRITE_BIT |
> +                              GL_MAP_PERSISTENT_BIT;
> +    for (int i = 0; i < pic->i_planes; ++i)
> +    {
> +        tc->api->BindBuffer(GL_PIXEL_UNPACK_BUFFER, picsys->buffers[i]);
> +        tc->api->BufferStorage(GL_PIXEL_UNPACK_BUFFER, picsys->bytes[i], NULL,
> +                               access);
> +
> +        pic->p[i].p_pixels =
> +            tc->api->MapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, picsys->bytes[i],
> +                                    access);
> +
> +        if (pic->p[i].p_pixels == NULL)
> +        {
> +            msg_Err(tc->parent, "could not map PBO buffers");
> +            for (i = i - 1; i >= 0; --i)
> +            {
> +                tc->api->BindBuffer(GL_PIXEL_UNPACK_BUFFER,
> +                                    picsys->buffers[i]);
> +                tc->api->UnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
> +            }
> +            tc->api->DeleteBuffers(pic->i_planes, picsys->buffers);
> +            memset(picsys->buffers, 0, PICTURE_PLANE_MAX * sizeof(GLuint));
> +            return VLC_EGENERIC;
> +        }
> +    }
> +    return VLC_SUCCESS;
> +}
> +
> +/** Find next (bit) set */
> +static int fnsll(unsigned long long x, unsigned i)

This could be inlined

> +{
> +    if (i >= CHAR_BIT * sizeof (x))
> +        return 0;
> +    return ffsll(x & ~((1ULL << i) - 1));
> +}
> +
> +static void
> +pbo_release_gpupics(const opengl_tex_converter_t *tc, bool force)
> +{
> +    struct priv *priv = tc->priv;
> +
> +    /* Release all pictures that are not used by the GPU anymore */
> +    for (unsigned i = ffsll(priv->ongpu.list); i;
> +         i = fnsll(priv->ongpu.list, i))
> +    {
> +        assert(priv->ongpu.pics[i - 1] != NULL);
> +
> +        picture_t *pic = priv->ongpu.pics[i - 1];
> +        picture_sys_t *picsys = pic->p_sys;
> +
> +        assert(picsys->fence != NULL);
> +        GLenum wait = force ? GL_ALREADY_SIGNALED
> +                            : tc->api->ClientWaitSync(picsys->fence, 0, 0);
> +
> +        if (wait == GL_ALREADY_SIGNALED || wait == GL_CONDITION_SATISFIED)
> +        {
> +            tc->api->DeleteSync(picsys->fence);
> +            picsys->fence = NULL;
> +
> +            priv->ongpu.list &= ~(1ULL << (i - 1));
> +            priv->ongpu.pics[i - 1] = NULL;
> +            picture_Release(pic);
> +        }
> +    }
> +}
> +
> +static int
> +pbo_common_update(const opengl_tex_converter_t *tc, const GLuint *textures,
> +                  unsigned width, unsigned height, picture_t *pic)
> +{
> +    struct priv *priv = tc->priv;
> +    picture_sys_t *picsys = pic->p_sys;
> +
> +    for (int i = 0; i < pic->i_planes; i++)
> +    {
> +        tc->api->BindBuffer(GL_PIXEL_UNPACK_BUFFER, picsys->buffers[i]);
> +        if (picsys->fence == NULL)
> +            tc->api->FlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, 0,
> +                                            picsys->bytes[i]);
> +        glActiveTexture(GL_TEXTURE0 + i);
> +        glClientActiveTexture(GL_TEXTURE0 + i);
> +        glBindTexture(tc->tex_target, textures[i]);
> +
> +        glPixelStorei(GL_UNPACK_ROW_LENGTH,
> +                      pic->p[i].i_pitch / pic->p[i].i_pixel_pitch);
> +
> +        glTexSubImage2D(tc->tex_target, 0, 0, 0,
> +                        width * tc->desc->p[i].w.num / tc->desc->p[i].w.den,
> +                        height * tc->desc->p[i].h.num / tc->desc->p[i].h.den,
> +                        priv->tex_format, priv->tex_type, NULL);
> +    }
> +
> +    bool hold;
> +    if (picsys->fence == NULL)
> +        hold = true;
> +    else
> +    {
> +        /* The picture is already held */
> +        hold = false;
> +        tc->api->DeleteSync(picsys->fence);
> +    }
> +
> +    picsys->fence = tc->api->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
> +    if (pic->p_sys->fence == NULL)
> +    {
> +        /* Error (corner case): don't hold the picture */
> +        hold = false;
> +    }
> +
> +    pbo_release_gpupics(tc, false);
> +
> +    if (hold)
> +    {
> +        /* Hold the picture while it's used by the GPU */
> +        unsigned index = pic->p_sys->index;
> +
> +        priv->ongpu.list |= 1ULL << index;
> +        assert(priv->ongpu.pics[index] == NULL);
> +        priv->ongpu.pics[index] = pic;
> +        picture_Hold(pic);
> +    }
> +
> +    /* turn off pbo */
> +    tc->api->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
> +
> +    return VLC_SUCCESS;
> +}
> +
> +static void
> +picture_destroy_cb(picture_t *pic)
> +{
> +    picture_sys_t *picsys = pic->p_sys;
> +    const opengl_tex_converter_t *tc = picsys->tc;
> +
> +    ASSERT_GLTHREAD();
> +
> +    if (picsys->buffers[0] != 0)
> +    {
> +        for (int i = 0; i < pic->i_planes; ++i)
> +        {
> +            tc->api->BindBuffer(GL_PIXEL_UNPACK_BUFFER, picsys->buffers[i]);
> +            tc->api->UnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
> +        }
> +        tc->api->DeleteBuffers(pic->i_planes, picsys->buffers);
> +    }
> +    free(picsys);
> +    free(pic);
> +}
> +
> +static picture_pool_t *
> +tc_common_get_pool(const opengl_tex_converter_t *tc, const video_format_t *fmt,
> +                   unsigned requested_count, const GLuint *textures)
> +{
> +    struct priv *priv = tc->priv;
> +    picture_t *pictures[VLCGL_PICTURE_MAX];
> +    unsigned count;
> +    (void) textures;
> +
> +    ASSERT_GLTHREAD();
> +    priv->ongpu.list = 0;
> +
> +    for (count = 0; count < requested_count; count++)
> +    {
> +        picture_sys_t *picsys = calloc(1, sizeof(*picsys));
> +        if (unlikely(picsys == NULL))
> +            break;
> +        picsys->tc = tc;
> +        picsys->index = count;
> +        picture_resource_t rsc = {
> +            .p_sys = picsys,
> +            .pf_destroy = picture_destroy_cb,
> +        };
> +
> +        picture_t *pic = pictures[count] = picture_NewFromResource(fmt, &rsc);
> +        if (pic == NULL)
> +        {
> +            free(picsys);
> +            break;
> +        }
> +        if (picture_Setup(pic, fmt))
> +        {
> +            picture_Release(pic);
> +            break;
> +        }
> +
> +        assert(pic->i_planes > 0
> +            && (unsigned) pic->i_planes == tc->desc->plane_count);
> +
> +        for (int i = 0; i < pic->i_planes; ++i)
> +        {
> +            const plane_t *p = &pic->p[i];
> +
> +            if( p->i_pitch < 0 || p->i_lines <= 0 ||
> +                (size_t)p->i_pitch > SIZE_MAX/p->i_lines )
> +                goto error;
> +            picsys->bytes[i] = (p->i_pitch * p->i_lines) + 15 / 16 * 16;
> +            assert(picsys->bytes[i] == pictures[0]->p_sys->bytes[i]);
> +        }
> +
> +        if (pbo_map(tc, pic) != VLC_SUCCESS)
> +        {
> +            picture_Release(pic);
> +            break;
> +        }
> +    }
> +
> +    /* We need minumum 2 pbo buffers */
> +    if (count <= 1)
> +        goto error;
> +
> +    /* turn off pbo */
> +    tc->api->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
> +
> +    /* Wrap the pictures into a pool */
> +    picture_pool_t *pool = picture_pool_New(count, pictures);
> +    if (!pool)
> +        goto error;
> +    return pool;
> +
> +error:
> +    for (unsigned i = 0; i < count; i++)
> +        picture_Release(pictures[i]);
> +
> +    return NULL;
> +}
> +#endif /* VLCGL_HAS_PBO */
> +
>  static int
>  tc_common_gen_textures(const opengl_tex_converter_t *tc,
>                         const GLsizei *tex_width, const GLsizei *tex_height,
> @@ -153,6 +420,11 @@ tc_common_update(const opengl_tex_converter_t *tc, const GLuint *textures,
>                   unsigned width, unsigned height,
>                   picture_t *pic, const size_t *plane_offset)
>  {
> +#ifdef VLCGL_HAS_PBO
> +    if (pic->p_sys != NULL)
> +        return pbo_common_update(tc, textures, width, height, pic);
> +#endif
> +
>      int ret = VLC_SUCCESS;
>      for (unsigned i = 0; i < tc->desc->plane_count && ret == VLC_SUCCESS; i++)
>      {
> @@ -174,12 +446,17 @@ tc_common_update(const opengl_tex_converter_t *tc, const GLuint *textures,
>  static void
>  tc_common_release(const opengl_tex_converter_t *tc)
>  {
> -    tc->api->DeleteShader(tc->fragment_shader);
> +    if (tc->fragment_shader != 0)
> +        tc->api->DeleteShader(tc->fragment_shader);
>
>  #ifndef GL_UNPACK_ROW_LENGTH
>      struct priv *priv = tc->priv;
>      free(priv->texture_temp_buf);
>  #endif
> +#ifdef VLCGL_HAS_PBO
> +    pbo_release_gpupics(tc, true);
> +#endif
> +
>      free(tc->priv);
>  }
>
> @@ -200,11 +477,20 @@ common_init(opengl_tex_converter_t *tc, size_t priv_size, vlc_fourcc_t chroma,
>      tc->pf_update       = tc_common_update;
>      tc->pf_release      = tc_common_release;
>
> +#ifdef VLCGL_HAS_PBO
> +    if (tc->supports_pbo)
> +        tc->pf_get_pool = tc_common_get_pool;
> +#endif
> +
>      tc->tex_target      = GL_TEXTURE_2D;
>      priv->tex_internal  = tex_internal;
>      priv->tex_format    = tex_format;
>      priv->tex_type      = tex_type;
>
> +#ifndef NDEBUG
> +    dbg_is_glthread = true;
> +#endif
> +
>      return VLC_SUCCESS;
>  }
>
> @@ -258,7 +544,7 @@ opengl_tex_converter_rgba_init(const video_format_t *fmt,
>      tc->fragment_shader = tc->api->CreateShader(GL_FRAGMENT_SHADER);
>      if (tc->fragment_shader == 0)
>      {
> -        free(tc->priv);
> +        tc_common_release(tc);
>          return VLC_EGENERIC;
>      }
>      tc->api->ShaderSource(tc->fragment_shader, 1, &code, NULL);
> @@ -432,7 +718,7 @@ opengl_tex_converter_yuv_init(const video_format_t *fmt,
>                   swap_uv ? 'z' : 'y',
>                   swap_uv ? 'y' : 'z') < 0)
>      {
> -        free(tc->priv);
> +        tc_common_release(tc);
>          return VLC_ENOMEM;
>      }
>
> @@ -448,7 +734,7 @@ opengl_tex_converter_yuv_init(const video_format_t *fmt,
>      tc->fragment_shader = tc->api->CreateShader(GL_FRAGMENT_SHADER);
>      if (tc->fragment_shader == 0)
>      {
> -        free(tc->priv);
> +        tc_common_release(tc);
>          free(code);
>          return VLC_EGENERIC;
>      }
> @@ -515,7 +801,7 @@ opengl_tex_converter_xyz12_init(const video_format_t *fmt,
>      tc->fragment_shader = tc->api->CreateShader(GL_FRAGMENT_SHADER);
>      if (tc->fragment_shader == 0)
>      {
> -        free(tc->priv);
> +        tc_common_release(tc);
>          return VLC_EGENERIC;
>      }
>      tc->api->ShaderSource(tc->fragment_shader, 1, &code, NULL);
> diff --git a/modules/video_output/opengl/internal.h b/modules/video_output/opengl/internal.h
> index d925561b81..020f9b5cd3 100644
> --- a/modules/video_output/opengl/internal.h
> +++ b/modules/video_output/opengl/internal.h
> @@ -33,6 +33,9 @@
>  #   define GLSL_VERSION "120"
>  #   define VLCGL_TEXTURE_COUNT 1
>  #   define VLCGL_PICTURE_MAX 128
> +#   ifdef GL_VERSION_4_4
> +#       define VLCGL_HAS_PBO
> +#   endif
>  #   define PRECISION ""
>  #endif
>
> @@ -61,6 +64,15 @@
>  #   define PFNGLGENBUFFERSPROC               typeof(glGenBuffers)*
>  #   define PFNGLBINDBUFFERPROC               typeof(glBindBuffer)*
>  #   define PFNGLBUFFERDATAPROC               typeof(glBufferData)*
> +#   ifdef VLCGL_HAS_PBO
> +#    define PFNGLBUFFERSTORAGEPROC           typeof(glBufferStorage)*
> +#    define PFNGLMAPBUFFERRANGEPROC          typeof(glMapBufferRange)*
> +#    define PFNGLFLUSHMAPPEDBUFFERRANGEPROC  typeof(glFlushMappedBufferRange)*
> +#    define PFNGLUNMAPBUFFERPROC             typeof(glUnmapBuffer)*
> +#    define PFNGLFENCESYNCPROC               typeof(glFenceSync)*
> +#    define PFNGLDELETESYNCPROC              typeof(glDeleteSync)*
> +#    define PFNGLCLIENTWAITSYNCPROC          typeof(glClientWaitSync)*
> +#   endif
>  #   define PFNGLDELETEBUFFERSPROC            typeof(glDeleteBuffers)*
>  #if defined(__APPLE__)
>  #   import <CoreFoundation/CoreFoundation.h>
> @@ -104,6 +116,15 @@ typedef struct {
>      PFNGLGENBUFFERSPROC    GenBuffers;
>      PFNGLBINDBUFFERPROC    BindBuffer;
>      PFNGLBUFFERDATAPROC    BufferData;
> +#ifdef VLCGL_HAS_PBO
> +    PFNGLBUFFERSTORAGEPROC          BufferStorage;
> +    PFNGLMAPBUFFERRANGEPROC         MapBufferRange;
> +    PFNGLFLUSHMAPPEDBUFFERRANGEPROC FlushMappedBufferRange;
> +    PFNGLUNMAPBUFFERPROC            UnmapBuffer;
> +    PFNGLFENCESYNCPROC              FenceSync;
> +    PFNGLDELETESYNCPROC             DeleteSync;
> +    PFNGLCLIENTWAITSYNCPROC         ClientWaitSync;
> +#endif
>      PFNGLDELETEBUFFERSPROC DeleteBuffers;
>
>  #if defined(_WIN32)
> @@ -137,6 +158,7 @@ typedef int (*opengl_tex_converter_init_cb)(const video_format_t *fmt,
>   */
>  struct opengl_tex_converter_t
>  {
> +    bool supports_pbo;
>      /* Pointer to object parent, set by the caller of the init cb */
>      vlc_object_t *parent;
>      /* Function pointer to shaders commands, set by the caller of the init cb */
> diff --git a/modules/video_output/opengl/vout_helper.c b/modules/video_output/opengl/vout_helper.c
> index 4d0b6a676e..5c0185ef78 100644
> --- a/modules/video_output/opengl/vout_helper.c
> +++ b/modules/video_output/opengl/vout_helper.c
> @@ -220,6 +220,15 @@ vout_display_opengl_t *vout_display_opengl_New(video_format_t *fmt,
>      api->BindBuffer    = GET_PROC_ADDR(glBindBuffer);
>      api->BufferData    = GET_PROC_ADDR(glBufferData);
>      api->DeleteBuffers = GET_PROC_ADDR(glDeleteBuffers);
> +#ifdef VLCGL_HAS_PBO
> +    api->BufferStorage          = GET_PROC_ADDR(glBufferStorage);
> +    api->MapBufferRange         = GET_PROC_ADDR(glMapBufferRange);
> +    api->FlushMappedBufferRange = GET_PROC_ADDR(glFlushMappedBufferRange);
> +    api->UnmapBuffer            = GET_PROC_ADDR(glUnmapBuffer);
> +    api->FenceSync              = GET_PROC_ADDR(glFenceSync);
> +    api->DeleteSync             = GET_PROC_ADDR(glDeleteSync);
> +    api->ClientWaitSync         = GET_PROC_ADDR(glClientWaitSync);
> +#endif
>  #undef GET_PROC_ADDR
>
>      if (!vgl->api.CreateShader || !vgl->api.ShaderSource || !vgl->api.CreateProgram)
> @@ -249,6 +258,19 @@ vout_display_opengl_t *vout_display_opengl_New(video_format_t *fmt,
>                           HasExtension(extensions, "GL_APPLE_texture_2D_limited_npot");
>  #endif
>
> +#if defined (VLCGL_HAS_PBO)
> +    const bool supports_pbo = vgl->supports_npot && api->BufferStorage
> +        && api->MapBufferRange && api->FlushMappedBufferRange
> +        && api->UnmapBuffer && api->FenceSync && api->DeleteSync
> +        && api->ClientWaitSync
> +        && HasExtension(extensions, "GL_ARB_pixel_buffer_object")
> +        && HasExtension(extensions, "GL_ARB_buffer_storage");
> +#else
> +    const bool supports_pbo = false;
> +#endif
> +    msg_Dbg(gl, "PBO support (for direct rendering): %s",
> +            supports_pbo ? "On" : "Off");
> +
>      /* Initialize with default chroma */
>      vgl->fmt = *fmt;
>      vgl->fmt.i_chroma = VLC_CODEC_RGB32;
> @@ -263,6 +285,7 @@ vout_display_opengl_t *vout_display_opengl_New(video_format_t *fmt,
>  #   endif
>      opengl_tex_converter_t tex_conv;
>      opengl_tex_converter_t rgba_tex_conv = {
> +        .supports_pbo = supports_pbo,
>          .parent = VLC_OBJECT(vgl->gl),
>          .api = &vgl->api,
>          .orientation = fmt->orientation,
> @@ -279,6 +302,7 @@ vout_display_opengl_t *vout_display_opengl_New(video_format_t *fmt,
>      for (size_t i = 0; i < ARRAY_SIZE(opengl_tex_converter_init_cbs); ++i)
>      {
>          tex_conv = (opengl_tex_converter_t) {
> +            .supports_pbo = supports_pbo,
>              .parent = VLC_OBJECT(vgl->gl),
>              .api = &vgl->api,
>              .orientation = fmt->orientation,
>



More information about the vlc-devel mailing list