[vlc-devel] [PATCH 1/2] vout/opengl: add direct rendering support (OpenGL 4.4)

Thomas Guillem thomas at gllm.fr
Tue Jan 3 16:45:24 CET 2017


This commit adds support for direct rendering with YUV/RGB software chromas.
This is done using Pixel Buffer Object (PBO, A Buffer Object that is used for
asynchronous pixel transfer operations) [1][2]. PBO are present since a long
time for OpenGL and since 3.0 for OpenGLES.

But there is a problem, VLC software decoders and video filters might need to
read pictures buffers while they're being displayed. Therefore, the basic use
case of PBOs can't work (since you need to unmap the buffer before displaying
it).

To solve this issue, we need to use persistent mapped buffers[3]. This can be
done using the glBufferStorage() function with the GL_MAP_PERSISTENT_BIT flag.

Unfortunately, this new API is only present since OpenGL 4.4 and as an
extension since OpenGLES 3.1 (so no Android, macos and ios support).

References:
[1]: https://www.khronos.org/opengl/wiki/Pixel_Buffer_Object
[2]: http://www.songho.ca/opengl/gl_pbo.html
[3]: https://www.khronos.org/opengl/wiki/Buffer_Object_Streaming
---
 NEWS                                      |   1 +
 modules/video_output/opengl/converters.c  | 300 +++++++++++++++++++++++++++++-
 modules/video_output/opengl/internal.h    |  22 +++
 modules/video_output/opengl/vout_helper.c |  24 +++
 4 files changed, 340 insertions(+), 7 deletions(-)

diff --git a/NEWS b/NEWS
index 8aba9cfc4d..350098fcbe 100644
--- a/NEWS
+++ b/NEWS
@@ -150,6 +150,7 @@ Video ouput:
  * EFL Evas video output with Tizen TBM Surface support
  * New OpenGL provider for Windows
  * Drop OpenGL 1.x and OpenGL ES 1 support
+ * Direct rendering with OpenGL (starting OpenGL 4.4)
 
 Text renderer:
  * CTL support through Harfbuzz in the Freetype module
diff --git a/modules/video_output/opengl/converters.c b/modules/video_output/opengl/converters.c
index 8b6b4b40a6..f278bb96ae 100644
--- a/modules/video_output/opengl/converters.c
+++ b/modules/video_output/opengl/converters.c
@@ -22,10 +22,11 @@
 # include "config.h"
 #endif
 
-#include <vlc_memory.h>
-
 #include <assert.h>
+#include <limits.h>
+#include <stdlib.h>
 
+#include <vlc_memory.h>
 #include "internal.h"
 
 #ifndef GL_RED
@@ -35,6 +36,30 @@
 #define GL_R16 0
 #endif
 
+#ifdef VLCGL_HAS_PBO
+# ifndef GL_UNPACK_ROW_LENGTH
+#  error "PBO without GL_UNPACK_ROW_LENGTH"
+# endif
+#endif
+
+#ifndef NDEBUG
+static __thread bool dbg_is_glthread = false;
+#define ASSERT_GLTHREAD() assert(dbg_is_glthread)
+#else
+#define ASSERT_GLTHREAD()
+#endif
+
+#ifdef VLCGL_HAS_PBO
+struct picture_sys_t
+{
+    const opengl_tex_converter_t *tc;
+    GLuint      buffers[PICTURE_PLANE_MAX];
+    size_t      bytes[PICTURE_PLANE_MAX];
+    GLsync      fence;
+    unsigned    index;
+};
+#endif
+
 struct priv
 {
     GLint  tex_internal;
@@ -45,6 +70,12 @@ struct priv
     void * texture_temp_buf;
     size_t texture_temp_buf_size;
 #endif
+#ifdef VLCGL_HAS_PBO
+    struct {
+        picture_t *pics[VLCGL_PICTURE_MAX];
+        unsigned long long list;
+    } ongpu;
+#endif
 };
 
 struct yuv_priv
@@ -53,6 +84,242 @@ struct yuv_priv
     GLfloat local_value[16];
 };
 
+#ifdef VLCGL_HAS_PBO
+static int
+pbo_map(const opengl_tex_converter_t *tc, picture_t *pic)
+{
+    picture_sys_t *picsys = pic->p_sys;
+
+    tc->api->GenBuffers(pic->i_planes, picsys->buffers);
+
+    const GLbitfield access = GL_MAP_READ_BIT | GL_MAP_WRITE_BIT |
+                              GL_MAP_PERSISTENT_BIT;
+    for (int i = 0; i < pic->i_planes; ++i)
+    {
+        tc->api->BindBuffer(GL_PIXEL_UNPACK_BUFFER, picsys->buffers[i]);
+        tc->api->BufferStorage(GL_PIXEL_UNPACK_BUFFER, picsys->bytes[i], NULL,
+                               access);
+
+        pic->p[i].p_pixels =
+            tc->api->MapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, picsys->bytes[i],
+                                    access);
+
+        if (pic->p[i].p_pixels == NULL)
+        {
+            msg_Err(tc->parent, "could not map PBO buffers");
+            for (i = i - 1; i >= 0; --i)
+            {
+                tc->api->BindBuffer(GL_PIXEL_UNPACK_BUFFER,
+                                    picsys->buffers[i]);
+                tc->api->UnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
+            }
+            tc->api->DeleteBuffers(pic->i_planes, picsys->buffers);
+            memset(picsys->buffers, 0, PICTURE_PLANE_MAX * sizeof(GLuint));
+            return VLC_EGENERIC;
+        }
+    }
+    return VLC_SUCCESS;
+}
+
+/** Find next (bit) set */
+static int fnsll(unsigned long long x, unsigned i)
+{
+    if (i >= CHAR_BIT * sizeof (x))
+        return 0;
+    return ffsll(x & ~((1ULL << i) - 1));
+}
+
+static void
+pbo_release_gpupics(const opengl_tex_converter_t *tc, bool force)
+{
+    struct priv *priv = tc->priv;
+
+    /* Release all pictures that are not used by the GPU anymore */
+    for (unsigned i = ffsll(priv->ongpu.list); i;
+         i = fnsll(priv->ongpu.list, i))
+    {
+        assert(priv->ongpu.pics[i - 1] != NULL);
+
+        picture_t *pic = priv->ongpu.pics[i - 1];
+        picture_sys_t *picsys = pic->p_sys;
+
+        assert(picsys->fence != NULL);
+        GLenum wait = force ? GL_ALREADY_SIGNALED
+                            : tc->api->ClientWaitSync(picsys->fence, 0, 0);
+
+        if (wait == GL_ALREADY_SIGNALED || wait == GL_CONDITION_SATISFIED)
+        {
+            tc->api->DeleteSync(picsys->fence);
+            picsys->fence = NULL;
+
+            priv->ongpu.list &= ~(1ULL << (i - 1));
+            priv->ongpu.pics[i - 1] = NULL;
+            picture_Release(pic);
+        }
+    }
+}
+
+static int
+pbo_common_update(const opengl_tex_converter_t *tc, const GLuint *textures,
+                  unsigned width, unsigned height, picture_t *pic)
+{
+    struct priv *priv = tc->priv;
+    picture_sys_t *picsys = pic->p_sys;
+
+    for (int i = 0; i < pic->i_planes; i++)
+    {
+        tc->api->BindBuffer(GL_PIXEL_UNPACK_BUFFER, picsys->buffers[i]);
+        if (picsys->fence == NULL)
+            tc->api->FlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, 0,
+                                            picsys->bytes[i]);
+        glActiveTexture(GL_TEXTURE0 + i);
+        glClientActiveTexture(GL_TEXTURE0 + i);
+        glBindTexture(tc->tex_target, textures[i]);
+
+        glPixelStorei(GL_UNPACK_ROW_LENGTH,
+                      pic->p[i].i_pitch / pic->p[i].i_pixel_pitch);
+
+        glTexSubImage2D(tc->tex_target, 0, 0, 0,
+                        width * tc->desc->p[i].w.num / tc->desc->p[i].w.den,
+                        height * tc->desc->p[i].h.num / tc->desc->p[i].h.den,
+                        priv->tex_format, priv->tex_type, NULL);
+    }
+
+    bool hold;
+    if (picsys->fence == NULL)
+        hold = true;
+    else
+    {
+        /* The picture is already held */
+        hold = false;
+        tc->api->DeleteSync(picsys->fence);
+    }
+
+    picsys->fence = tc->api->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+    if (pic->p_sys->fence == NULL)
+    {
+        /* Error (corner case): don't hold the picture */
+        hold = false;
+    }
+
+    pbo_release_gpupics(tc, false);
+
+    if (hold)
+    {
+        /* Hold the picture while it's used by the GPU */
+        unsigned index = pic->p_sys->index;
+
+        priv->ongpu.list |= 1ULL << index;
+        assert(priv->ongpu.pics[index] == NULL);
+        priv->ongpu.pics[index] = pic;
+        picture_Hold(pic);
+    }
+
+    /* turn off pbo */
+    tc->api->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+
+    return VLC_SUCCESS;
+}
+
+static void
+picture_destroy_cb(picture_t *pic)
+{
+    picture_sys_t *picsys = pic->p_sys;
+    const opengl_tex_converter_t *tc = picsys->tc;
+
+    ASSERT_GLTHREAD();
+
+    if (picsys->buffers[0] != 0)
+    {
+        for (int i = 0; i < pic->i_planes; ++i)
+        {
+            tc->api->BindBuffer(GL_PIXEL_UNPACK_BUFFER, picsys->buffers[i]);
+            tc->api->UnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
+        }
+        tc->api->DeleteBuffers(pic->i_planes, picsys->buffers);
+    }
+    free(picsys);
+    free(pic);
+}
+
+static picture_pool_t *
+tc_common_get_pool(const opengl_tex_converter_t *tc, const video_format_t *fmt,
+                   unsigned requested_count, const GLuint *textures)
+{
+    struct priv *priv = tc->priv;
+    picture_t *pictures[VLCGL_PICTURE_MAX];
+    unsigned count;
+    (void) textures;
+
+    ASSERT_GLTHREAD();
+    priv->ongpu.list = 0;
+
+    for (count = 0; count < requested_count; count++)
+    {
+        picture_sys_t *picsys = calloc(1, sizeof(*picsys));
+        if (unlikely(picsys == NULL))
+            break;
+        picsys->tc = tc;
+        picsys->index = count;
+        picture_resource_t rsc = {
+            .p_sys = picsys,
+            .pf_destroy = picture_destroy_cb,
+        };
+
+        picture_t *pic = pictures[count] = picture_NewFromResource(fmt, &rsc);
+        if (pic == NULL)
+        {
+            free(picsys);
+            break;
+        }
+        if (picture_Setup(pic, fmt))
+        {
+            picture_Release(pic);
+            break;
+        }
+
+        assert(pic->i_planes > 0
+            && (unsigned) pic->i_planes == tc->desc->plane_count);
+
+        for (int i = 0; i < pic->i_planes; ++i)
+        {
+            const plane_t *p = &pic->p[i];
+
+            if( p->i_pitch < 0 || p->i_lines <= 0 ||
+                (size_t)p->i_pitch > SIZE_MAX/p->i_lines )
+                goto error;
+            picsys->bytes[i] = (p->i_pitch * p->i_lines) + 15 / 16 * 16;
+            assert(picsys->bytes[i] == pictures[0]->p_sys->bytes[i]);
+        }
+
+        if (pbo_map(tc, pic) != VLC_SUCCESS)
+        {
+            picture_Release(pic);
+            break;
+        }
+    }
+
+    /* We need minumum 2 pbo buffers */
+    if (count <= 1)
+        goto error;
+
+    /* turn off pbo */
+    tc->api->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+
+    /* Wrap the pictures into a pool */
+    picture_pool_t *pool = picture_pool_New(count, pictures);
+    if (!pool)
+        goto error;
+    return pool;
+
+error:
+    for (unsigned i = 0; i < count; i++)
+        picture_Release(pictures[i]);
+
+    return NULL;
+}
+#endif /* VLCGL_HAS_PBO */
+
 static int
 tc_common_gen_textures(const opengl_tex_converter_t *tc,
                        const GLsizei *tex_width, const GLsizei *tex_height,
@@ -153,6 +420,11 @@ tc_common_update(const opengl_tex_converter_t *tc, const GLuint *textures,
                  unsigned width, unsigned height,
                  picture_t *pic, const size_t *plane_offset)
 {
+#ifdef VLCGL_HAS_PBO
+    if (pic->p_sys != NULL)
+        return pbo_common_update(tc, textures, width, height, pic);
+#endif
+
     int ret = VLC_SUCCESS;
     for (unsigned i = 0; i < tc->desc->plane_count && ret == VLC_SUCCESS; i++)
     {
@@ -174,12 +446,17 @@ tc_common_update(const opengl_tex_converter_t *tc, const GLuint *textures,
 static void
 tc_common_release(const opengl_tex_converter_t *tc)
 {
-    tc->api->DeleteShader(tc->fragment_shader);
+    if (tc->fragment_shader != 0)
+        tc->api->DeleteShader(tc->fragment_shader);
 
 #ifndef GL_UNPACK_ROW_LENGTH
     struct priv *priv = tc->priv;
     free(priv->texture_temp_buf);
 #endif
+#ifdef VLCGL_HAS_PBO
+    pbo_release_gpupics(tc, true);
+#endif
+
     free(tc->priv);
 }
 
@@ -200,11 +477,20 @@ common_init(opengl_tex_converter_t *tc, size_t priv_size, vlc_fourcc_t chroma,
     tc->pf_update       = tc_common_update;
     tc->pf_release      = tc_common_release;
 
+#ifdef VLCGL_HAS_PBO
+    if (tc->supports_pbo)
+        tc->pf_get_pool = tc_common_get_pool;
+#endif
+
     tc->tex_target      = GL_TEXTURE_2D;
     priv->tex_internal  = tex_internal;
     priv->tex_format    = tex_format;
     priv->tex_type      = tex_type;
 
+#ifndef NDEBUG
+    dbg_is_glthread = true;
+#endif
+
     return VLC_SUCCESS;
 }
 
@@ -258,7 +544,7 @@ opengl_tex_converter_rgba_init(const video_format_t *fmt,
     tc->fragment_shader = tc->api->CreateShader(GL_FRAGMENT_SHADER);
     if (tc->fragment_shader == 0)
     {
-        free(tc->priv);
+        tc_common_release(tc);
         return VLC_EGENERIC;
     }
     tc->api->ShaderSource(tc->fragment_shader, 1, &code, NULL);
@@ -432,7 +718,7 @@ opengl_tex_converter_yuv_init(const video_format_t *fmt,
                  swap_uv ? 'z' : 'y',
                  swap_uv ? 'y' : 'z') < 0)
     {
-        free(tc->priv);
+        tc_common_release(tc);
         return VLC_ENOMEM;
     }
 
@@ -448,7 +734,7 @@ opengl_tex_converter_yuv_init(const video_format_t *fmt,
     tc->fragment_shader = tc->api->CreateShader(GL_FRAGMENT_SHADER);
     if (tc->fragment_shader == 0)
     {
-        free(tc->priv);
+        tc_common_release(tc);
         free(code);
         return VLC_EGENERIC;
     }
@@ -515,7 +801,7 @@ opengl_tex_converter_xyz12_init(const video_format_t *fmt,
     tc->fragment_shader = tc->api->CreateShader(GL_FRAGMENT_SHADER);
     if (tc->fragment_shader == 0)
     {
-        free(tc->priv);
+        tc_common_release(tc);
         return VLC_EGENERIC;
     }
     tc->api->ShaderSource(tc->fragment_shader, 1, &code, NULL);
diff --git a/modules/video_output/opengl/internal.h b/modules/video_output/opengl/internal.h
index d925561b81..020f9b5cd3 100644
--- a/modules/video_output/opengl/internal.h
+++ b/modules/video_output/opengl/internal.h
@@ -33,6 +33,9 @@
 #   define GLSL_VERSION "120"
 #   define VLCGL_TEXTURE_COUNT 1
 #   define VLCGL_PICTURE_MAX 128
+#   ifdef GL_VERSION_4_4
+#       define VLCGL_HAS_PBO
+#   endif
 #   define PRECISION ""
 #endif
 
@@ -61,6 +64,15 @@
 #   define PFNGLGENBUFFERSPROC               typeof(glGenBuffers)*
 #   define PFNGLBINDBUFFERPROC               typeof(glBindBuffer)*
 #   define PFNGLBUFFERDATAPROC               typeof(glBufferData)*
+#   ifdef VLCGL_HAS_PBO
+#    define PFNGLBUFFERSTORAGEPROC           typeof(glBufferStorage)*
+#    define PFNGLMAPBUFFERRANGEPROC          typeof(glMapBufferRange)*
+#    define PFNGLFLUSHMAPPEDBUFFERRANGEPROC  typeof(glFlushMappedBufferRange)*
+#    define PFNGLUNMAPBUFFERPROC             typeof(glUnmapBuffer)*
+#    define PFNGLFENCESYNCPROC               typeof(glFenceSync)*
+#    define PFNGLDELETESYNCPROC              typeof(glDeleteSync)*
+#    define PFNGLCLIENTWAITSYNCPROC          typeof(glClientWaitSync)*
+#   endif
 #   define PFNGLDELETEBUFFERSPROC            typeof(glDeleteBuffers)*
 #if defined(__APPLE__)
 #   import <CoreFoundation/CoreFoundation.h>
@@ -104,6 +116,15 @@ typedef struct {
     PFNGLGENBUFFERSPROC    GenBuffers;
     PFNGLBINDBUFFERPROC    BindBuffer;
     PFNGLBUFFERDATAPROC    BufferData;
+#ifdef VLCGL_HAS_PBO
+    PFNGLBUFFERSTORAGEPROC          BufferStorage;
+    PFNGLMAPBUFFERRANGEPROC         MapBufferRange;
+    PFNGLFLUSHMAPPEDBUFFERRANGEPROC FlushMappedBufferRange;
+    PFNGLUNMAPBUFFERPROC            UnmapBuffer;
+    PFNGLFENCESYNCPROC              FenceSync;
+    PFNGLDELETESYNCPROC             DeleteSync;
+    PFNGLCLIENTWAITSYNCPROC         ClientWaitSync;
+#endif
     PFNGLDELETEBUFFERSPROC DeleteBuffers;
 
 #if defined(_WIN32)
@@ -137,6 +158,7 @@ typedef int (*opengl_tex_converter_init_cb)(const video_format_t *fmt,
  */
 struct opengl_tex_converter_t
 {
+    bool supports_pbo;
     /* Pointer to object parent, set by the caller of the init cb */
     vlc_object_t *parent;
     /* Function pointer to shaders commands, set by the caller of the init cb */
diff --git a/modules/video_output/opengl/vout_helper.c b/modules/video_output/opengl/vout_helper.c
index 4d0b6a676e..5c0185ef78 100644
--- a/modules/video_output/opengl/vout_helper.c
+++ b/modules/video_output/opengl/vout_helper.c
@@ -220,6 +220,15 @@ vout_display_opengl_t *vout_display_opengl_New(video_format_t *fmt,
     api->BindBuffer    = GET_PROC_ADDR(glBindBuffer);
     api->BufferData    = GET_PROC_ADDR(glBufferData);
     api->DeleteBuffers = GET_PROC_ADDR(glDeleteBuffers);
+#ifdef VLCGL_HAS_PBO
+    api->BufferStorage          = GET_PROC_ADDR(glBufferStorage);
+    api->MapBufferRange         = GET_PROC_ADDR(glMapBufferRange);
+    api->FlushMappedBufferRange = GET_PROC_ADDR(glFlushMappedBufferRange);
+    api->UnmapBuffer            = GET_PROC_ADDR(glUnmapBuffer);
+    api->FenceSync              = GET_PROC_ADDR(glFenceSync);
+    api->DeleteSync             = GET_PROC_ADDR(glDeleteSync);
+    api->ClientWaitSync         = GET_PROC_ADDR(glClientWaitSync);
+#endif
 #undef GET_PROC_ADDR
 
     if (!vgl->api.CreateShader || !vgl->api.ShaderSource || !vgl->api.CreateProgram)
@@ -249,6 +258,19 @@ vout_display_opengl_t *vout_display_opengl_New(video_format_t *fmt,
                          HasExtension(extensions, "GL_APPLE_texture_2D_limited_npot");
 #endif
 
+#if defined (VLCGL_HAS_PBO)
+    const bool supports_pbo = vgl->supports_npot && api->BufferStorage
+        && api->MapBufferRange && api->FlushMappedBufferRange
+        && api->UnmapBuffer && api->FenceSync && api->DeleteSync
+        && api->ClientWaitSync
+        && HasExtension(extensions, "GL_ARB_pixel_buffer_object")
+        && HasExtension(extensions, "GL_ARB_buffer_storage");
+#else
+    const bool supports_pbo = false;
+#endif
+    msg_Dbg(gl, "PBO support (for direct rendering): %s",
+            supports_pbo ? "On" : "Off");
+
     /* Initialize with default chroma */
     vgl->fmt = *fmt;
     vgl->fmt.i_chroma = VLC_CODEC_RGB32;
@@ -263,6 +285,7 @@ vout_display_opengl_t *vout_display_opengl_New(video_format_t *fmt,
 #   endif
     opengl_tex_converter_t tex_conv;
     opengl_tex_converter_t rgba_tex_conv = {
+        .supports_pbo = supports_pbo,
         .parent = VLC_OBJECT(vgl->gl),
         .api = &vgl->api,
         .orientation = fmt->orientation,
@@ -279,6 +302,7 @@ vout_display_opengl_t *vout_display_opengl_New(video_format_t *fmt,
     for (size_t i = 0; i < ARRAY_SIZE(opengl_tex_converter_init_cbs); ++i)
     {
         tex_conv = (opengl_tex_converter_t) {
+            .supports_pbo = supports_pbo,
             .parent = VLC_OBJECT(vgl->gl),
             .api = &vgl->api,
             .orientation = fmt->orientation,
-- 
2.11.0



More information about the vlc-devel mailing list