[vlc-commits] vout/opengl: add direct rendering support (OpenGL 4.4)

Thomas Guillem git at videolan.org
Wed Jan 18 07:54:18 CET 2017


vlc | branch: master | Thomas Guillem <thomas at gllm.fr> | Fri Dec 30 09:57:37 2016 +0100| [565d1771dbbbec7437681ac9b247adf9b4049999] | committer: Thomas Guillem

vout/opengl: add direct rendering support (OpenGL 4.4)

This commit adds support for direct rendering with YUV/RGB software chromas.
This is done using Pixel Buffer Object (PBO, A Buffer Object that is used for
asynchronous pixel transfer operations) [1][2]. PBO are present since OpenGL
2.1 and since OpenGLES 3.0.

But there is an issue, VLC software decoders and video filters might need to
read picture buffers while they're being displayed. Therefore, the basic use
case of PBOs can't work (since you need to unmap the buffer before displaying
it).

To solve this issue, we need to use persistent mapped buffers[3]. This can be
done using the glBufferStorage() function with the GL_MAP_PERSISTENT_BIT flag.

Unfortunately, this new API is only present since OpenGL 4.4 and as an
extension since OpenGLES 3.1 (so no Android, macos and ios support for now).

References:
[1]: https://www.khronos.org/opengl/wiki/Pixel_Buffer_Object
[2]: http://www.songho.ca/opengl/gl_pbo.html
[3]: https://www.khronos.org/opengl/wiki/Buffer_Object_Streaming

> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=565d1771dbbbec7437681ac9b247adf9b4049999
---

 NEWS                                      |   1 +
 modules/video_output/opengl/converters.c  | 288 +++++++++++++++++++++++++++++-
 modules/video_output/opengl/internal.h    |  21 +++
 modules/video_output/opengl/vout_helper.c |   9 +
 4 files changed, 312 insertions(+), 7 deletions(-)

diff --git a/NEWS b/NEWS
index 8aba9cf..350098f 100644
--- a/NEWS
+++ b/NEWS
@@ -150,6 +150,7 @@ Video ouput:
  * EFL Evas video output with Tizen TBM Surface support
  * New OpenGL provider for Windows
  * Drop OpenGL 1.x and OpenGL ES 1 support
+ * Direct rendering with OpenGL (starting OpenGL 4.4)
 
 Text renderer:
  * CTL support through Harfbuzz in the Freetype module
diff --git a/modules/video_output/opengl/converters.c b/modules/video_output/opengl/converters.c
index 080064b..bcbc5a1 100644
--- a/modules/video_output/opengl/converters.c
+++ b/modules/video_output/opengl/converters.c
@@ -22,10 +22,11 @@
 # include "config.h"
 #endif
 
-#include <vlc_memory.h>
-
 #include <assert.h>
+#include <limits.h>
+#include <stdlib.h>
 
+#include <vlc_memory.h>
 #include "internal.h"
 
 #ifndef GL_RED
@@ -40,6 +41,17 @@
 #define NEED_GL_EXT_unpack_subimage
 #endif
 
+#ifdef VLCGL_HAS_PBO
+struct picture_sys_t
+{
+    const opengl_tex_converter_t *tc;
+    GLuint      buffers[PICTURE_PLANE_MAX];
+    size_t      bytes[PICTURE_PLANE_MAX];
+    GLsync      fence;
+    unsigned    index;
+};
+#endif
+
 struct priv
 {
     GLint  tex_internal;
@@ -49,6 +61,12 @@ struct priv
     bool   has_unpack_subimage;
     void * texture_temp_buf;
     size_t texture_temp_buf_size;
+#ifdef VLCGL_HAS_PBO
+    struct {
+        picture_t *pics[VLCGL_PICTURE_MAX];
+        unsigned long long list;
+    } ongpu;
+#endif
 };
 
 struct yuv_priv
@@ -57,6 +75,239 @@ struct yuv_priv
     GLfloat local_value[16];
 };
 
+#ifdef VLCGL_HAS_PBO
+static int
+pbo_map(const opengl_tex_converter_t *tc, picture_t *pic)
+{
+    picture_sys_t *picsys = pic->p_sys;
+
+    tc->api->GenBuffers(pic->i_planes, picsys->buffers);
+
+    const GLbitfield access = GL_MAP_READ_BIT | GL_MAP_WRITE_BIT |
+                              GL_MAP_PERSISTENT_BIT;
+    for (int i = 0; i < pic->i_planes; ++i)
+    {
+        tc->api->BindBuffer(GL_PIXEL_UNPACK_BUFFER, picsys->buffers[i]);
+        tc->api->BufferStorage(GL_PIXEL_UNPACK_BUFFER, picsys->bytes[i], NULL,
+                               access);
+
+        pic->p[i].p_pixels =
+            tc->api->MapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, picsys->bytes[i],
+                                    access);
+
+        if (pic->p[i].p_pixels == NULL)
+        {
+            msg_Err(tc->parent, "could not map PBO buffers");
+            for (i = i - 1; i >= 0; --i)
+            {
+                tc->api->BindBuffer(GL_PIXEL_UNPACK_BUFFER,
+                                    picsys->buffers[i]);
+                tc->api->UnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
+            }
+            tc->api->DeleteBuffers(pic->i_planes, picsys->buffers);
+            memset(picsys->buffers, 0, PICTURE_PLANE_MAX * sizeof(GLuint));
+            return VLC_EGENERIC;
+        }
+    }
+    return VLC_SUCCESS;
+}
+
+/** Find next (bit) set */
+static int fnsll(unsigned long long x, unsigned i)
+{
+    if (i >= CHAR_BIT * sizeof (x))
+        return 0;
+    return ffsll(x & ~((1ULL << i) - 1));
+}
+
+static void
+pbo_release_gpupics(const opengl_tex_converter_t *tc, bool force)
+{
+    struct priv *priv = tc->priv;
+
+    /* Release all pictures that are not used by the GPU anymore */
+    for (unsigned i = ffsll(priv->ongpu.list); i;
+         i = fnsll(priv->ongpu.list, i))
+    {
+        assert(priv->ongpu.pics[i - 1] != NULL);
+
+        picture_t *pic = priv->ongpu.pics[i - 1];
+        picture_sys_t *picsys = pic->p_sys;
+
+        assert(picsys->fence != NULL);
+        GLenum wait = force ? GL_ALREADY_SIGNALED
+                            : tc->api->ClientWaitSync(picsys->fence, 0, 0);
+
+        if (wait == GL_ALREADY_SIGNALED || wait == GL_CONDITION_SATISFIED)
+        {
+            tc->api->DeleteSync(picsys->fence);
+            picsys->fence = NULL;
+
+            priv->ongpu.list &= ~(1ULL << (i - 1));
+            priv->ongpu.pics[i - 1] = NULL;
+            picture_Release(pic);
+        }
+    }
+}
+
+static int
+pbo_common_update(const opengl_tex_converter_t *tc, const GLuint *textures,
+                  unsigned width, unsigned height, picture_t *pic)
+{
+    struct priv *priv = tc->priv;
+    picture_sys_t *picsys = pic->p_sys;
+
+    for (int i = 0; i < pic->i_planes; i++)
+    {
+        tc->api->BindBuffer(GL_PIXEL_UNPACK_BUFFER, picsys->buffers[i]);
+        if (picsys->fence == NULL)
+            tc->api->FlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, 0,
+                                            picsys->bytes[i]);
+        glActiveTexture(GL_TEXTURE0 + i);
+        glClientActiveTexture(GL_TEXTURE0 + i);
+        glBindTexture(tc->tex_target, textures[i]);
+
+        glPixelStorei(GL_UNPACK_ROW_LENGTH,
+                      pic->p[i].i_pitch / pic->p[i].i_pixel_pitch);
+
+        glTexSubImage2D(tc->tex_target, 0, 0, 0,
+                        width * tc->desc->p[i].w.num / tc->desc->p[i].w.den,
+                        height * tc->desc->p[i].h.num / tc->desc->p[i].h.den,
+                        priv->tex_format, priv->tex_type, NULL);
+    }
+
+    bool hold;
+    if (picsys->fence == NULL)
+        hold = true;
+    else
+    {
+        /* The picture is already held */
+        hold = false;
+        tc->api->DeleteSync(picsys->fence);
+    }
+
+    picsys->fence = tc->api->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+    if (pic->p_sys->fence == NULL)
+    {
+        /* Error (corner case): don't hold the picture */
+        hold = false;
+    }
+
+    pbo_release_gpupics(tc, false);
+
+    if (hold)
+    {
+        /* Hold the picture while it's used by the GPU */
+        unsigned index = pic->p_sys->index;
+
+        priv->ongpu.list |= 1ULL << index;
+        assert(priv->ongpu.pics[index] == NULL);
+        priv->ongpu.pics[index] = pic;
+        picture_Hold(pic);
+    }
+
+    /* turn off pbo */
+    tc->api->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+
+    return VLC_SUCCESS;
+}
+
+static void
+picture_destroy_cb(picture_t *pic)
+{
+    picture_sys_t *picsys = pic->p_sys;
+    const opengl_tex_converter_t *tc = picsys->tc;
+
+    if (picsys->buffers[0] != 0)
+    {
+        for (int i = 0; i < pic->i_planes; ++i)
+        {
+            tc->api->BindBuffer(GL_PIXEL_UNPACK_BUFFER, picsys->buffers[i]);
+            tc->api->UnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
+        }
+        tc->api->DeleteBuffers(pic->i_planes, picsys->buffers);
+    }
+    free(picsys);
+    free(pic);
+}
+
+static picture_pool_t *
+tc_common_get_pool(const opengl_tex_converter_t *tc, const video_format_t *fmt,
+                   unsigned requested_count, const GLuint *textures)
+{
+    struct priv *priv = tc->priv;
+    picture_t *pictures[VLCGL_PICTURE_MAX];
+    unsigned count;
+    (void) textures;
+
+    priv->ongpu.list = 0;
+
+    for (count = 0; count < requested_count; count++)
+    {
+        picture_sys_t *picsys = calloc(1, sizeof(*picsys));
+        if (unlikely(picsys == NULL))
+            break;
+        picsys->tc = tc;
+        picsys->index = count;
+        picture_resource_t rsc = {
+            .p_sys = picsys,
+            .pf_destroy = picture_destroy_cb,
+        };
+
+        picture_t *pic = pictures[count] = picture_NewFromResource(fmt, &rsc);
+        if (pic == NULL)
+        {
+            free(picsys);
+            break;
+        }
+        if (picture_Setup(pic, fmt))
+        {
+            picture_Release(pic);
+            break;
+        }
+
+        assert(pic->i_planes > 0
+            && (unsigned) pic->i_planes == tc->desc->plane_count);
+
+        for (int i = 0; i < pic->i_planes; ++i)
+        {
+            const plane_t *p = &pic->p[i];
+
+            if( p->i_pitch < 0 || p->i_lines <= 0 ||
+                (size_t)p->i_pitch > SIZE_MAX/p->i_lines )
+                goto error;
+            picsys->bytes[i] = (p->i_pitch * p->i_lines) + 15 / 16 * 16;
+            assert(picsys->bytes[i] == pictures[0]->p_sys->bytes[i]);
+        }
+
+        if (pbo_map(tc, pic) != VLC_SUCCESS)
+        {
+            picture_Release(pic);
+            break;
+        }
+    }
+
+    /* We need minumum 2 pbo buffers */
+    if (count <= 1)
+        goto error;
+
+    /* turn off pbo */
+    tc->api->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+
+    /* Wrap the pictures into a pool */
+    picture_pool_t *pool = picture_pool_New(count, pictures);
+    if (!pool)
+        goto error;
+    return pool;
+
+error:
+    for (unsigned i = 0; i < count; i++)
+        picture_Release(pictures[i]);
+
+    return NULL;
+}
+#endif /* VLCGL_HAS_PBO */
+
 static int
 tc_common_gen_textures(const opengl_tex_converter_t *tc,
                        const GLsizei *tex_width, const GLsizei *tex_height,
@@ -163,6 +414,11 @@ tc_common_update(const opengl_tex_converter_t *tc, const GLuint *textures,
                  unsigned width, unsigned height,
                  picture_t *pic, const size_t *plane_offset)
 {
+#ifdef VLCGL_HAS_PBO
+    if (pic->p_sys != NULL)
+        return pbo_common_update(tc, textures, width, height, pic);
+#endif
+
     int ret = VLC_SUCCESS;
     for (unsigned i = 0; i < tc->desc->plane_count && ret == VLC_SUCCESS; i++)
     {
@@ -184,11 +440,16 @@ tc_common_update(const opengl_tex_converter_t *tc, const GLuint *textures,
 static void
 tc_common_release(const opengl_tex_converter_t *tc)
 {
-    tc->api->DeleteShader(tc->fragment_shader);
+    if (tc->fragment_shader != 0)
+        tc->api->DeleteShader(tc->fragment_shader);
 
     struct priv *priv = tc->priv;
     free(priv->texture_temp_buf);
 
+#ifdef VLCGL_HAS_PBO
+    pbo_release_gpupics(tc, true);
+#endif
+
     free(tc->priv);
 }
 
@@ -209,6 +470,19 @@ common_init(opengl_tex_converter_t *tc, size_t priv_size, vlc_fourcc_t chroma,
     tc->pf_update       = tc_common_update;
     tc->pf_release      = tc_common_release;
 
+#ifdef VLCGL_HAS_PBO
+    const bool supports_pbo = tc->api->BufferStorage
+        && tc->api->MapBufferRange && tc->api->FlushMappedBufferRange
+        && tc->api->UnmapBuffer && tc->api->FenceSync && tc->api->DeleteSync
+        && tc->api->ClientWaitSync
+        && HasExtension(tc->glexts, "GL_ARB_pixel_buffer_object")
+        && HasExtension(tc->glexts, "GL_ARB_buffer_storage");
+    if (supports_pbo)
+        tc->pf_get_pool = tc_common_get_pool;
+    msg_Dbg(tc->parent, "PBO support for %4.4s (direct rendering): %s",
+            (const char *) &chroma, supports_pbo ? "On" : "Off");
+#endif
+
     tc->tex_target      = GL_TEXTURE_2D;
     priv->tex_internal  = tex_internal;
     priv->tex_format    = tex_format;
@@ -274,7 +548,7 @@ opengl_tex_converter_rgba_init(const video_format_t *fmt,
     tc->fragment_shader = tc->api->CreateShader(GL_FRAGMENT_SHADER);
     if (tc->fragment_shader == 0)
     {
-        free(tc->priv);
+        tc_common_release(tc);
         return VLC_EGENERIC;
     }
     tc->api->ShaderSource(tc->fragment_shader, 1, &code, NULL);
@@ -448,7 +722,7 @@ opengl_tex_converter_yuv_init(const video_format_t *fmt,
                  swap_uv ? 'z' : 'y',
                  swap_uv ? 'y' : 'z') < 0)
     {
-        free(tc->priv);
+        tc_common_release(tc);
         return VLC_ENOMEM;
     }
 
@@ -464,7 +738,7 @@ opengl_tex_converter_yuv_init(const video_format_t *fmt,
     tc->fragment_shader = tc->api->CreateShader(GL_FRAGMENT_SHADER);
     if (tc->fragment_shader == 0)
     {
-        free(tc->priv);
+        tc_common_release(tc);
         free(code);
         return VLC_EGENERIC;
     }
@@ -531,7 +805,7 @@ opengl_tex_converter_xyz12_init(const video_format_t *fmt,
     tc->fragment_shader = tc->api->CreateShader(GL_FRAGMENT_SHADER);
     if (tc->fragment_shader == 0)
     {
-        free(tc->priv);
+        tc_common_release(tc);
         return VLC_EGENERIC;
     }
     tc->api->ShaderSource(tc->fragment_shader, 1, &code, NULL);
diff --git a/modules/video_output/opengl/internal.h b/modules/video_output/opengl/internal.h
index e9bf846..6c66739 100644
--- a/modules/video_output/opengl/internal.h
+++ b/modules/video_output/opengl/internal.h
@@ -33,6 +33,9 @@
 #   define GLSL_VERSION "120"
 #   define VLCGL_TEXTURE_COUNT 1
 #   define VLCGL_PICTURE_MAX 128
+#   ifdef GL_VERSION_4_4
+#       define VLCGL_HAS_PBO
+#   endif
 #   define PRECISION ""
 #endif
 
@@ -61,6 +64,15 @@
 #   define PFNGLGENBUFFERSPROC               typeof(glGenBuffers)*
 #   define PFNGLBINDBUFFERPROC               typeof(glBindBuffer)*
 #   define PFNGLBUFFERDATAPROC               typeof(glBufferData)*
+#   ifdef VLCGL_HAS_PBO
+#    define PFNGLBUFFERSTORAGEPROC           typeof(glBufferStorage)*
+#    define PFNGLMAPBUFFERRANGEPROC          typeof(glMapBufferRange)*
+#    define PFNGLFLUSHMAPPEDBUFFERRANGEPROC  typeof(glFlushMappedBufferRange)*
+#    define PFNGLUNMAPBUFFERPROC             typeof(glUnmapBuffer)*
+#    define PFNGLFENCESYNCPROC               typeof(glFenceSync)*
+#    define PFNGLDELETESYNCPROC              typeof(glDeleteSync)*
+#    define PFNGLCLIENTWAITSYNCPROC          typeof(glClientWaitSync)*
+#   endif
 #   define PFNGLDELETEBUFFERSPROC            typeof(glDeleteBuffers)*
 #if defined(__APPLE__)
 #   import <CoreFoundation/CoreFoundation.h>
@@ -104,6 +116,15 @@ typedef struct {
     PFNGLGENBUFFERSPROC    GenBuffers;
     PFNGLBINDBUFFERPROC    BindBuffer;
     PFNGLBUFFERDATAPROC    BufferData;
+#ifdef VLCGL_HAS_PBO
+    PFNGLBUFFERSTORAGEPROC          BufferStorage;
+    PFNGLMAPBUFFERRANGEPROC         MapBufferRange;
+    PFNGLFLUSHMAPPEDBUFFERRANGEPROC FlushMappedBufferRange;
+    PFNGLUNMAPBUFFERPROC            UnmapBuffer;
+    PFNGLFENCESYNCPROC              FenceSync;
+    PFNGLDELETESYNCPROC             DeleteSync;
+    PFNGLCLIENTWAITSYNCPROC         ClientWaitSync;
+#endif
     PFNGLDELETEBUFFERSPROC DeleteBuffers;
 
 #if defined(_WIN32)
diff --git a/modules/video_output/opengl/vout_helper.c b/modules/video_output/opengl/vout_helper.c
index 84453f2..4a6e201 100644
--- a/modules/video_output/opengl/vout_helper.c
+++ b/modules/video_output/opengl/vout_helper.c
@@ -220,6 +220,15 @@ vout_display_opengl_t *vout_display_opengl_New(video_format_t *fmt,
     api->BindBuffer    = GET_PROC_ADDR(glBindBuffer);
     api->BufferData    = GET_PROC_ADDR(glBufferData);
     api->DeleteBuffers = GET_PROC_ADDR(glDeleteBuffers);
+#ifdef VLCGL_HAS_PBO
+    api->BufferStorage          = GET_PROC_ADDR(glBufferStorage);
+    api->MapBufferRange         = GET_PROC_ADDR(glMapBufferRange);
+    api->FlushMappedBufferRange = GET_PROC_ADDR(glFlushMappedBufferRange);
+    api->UnmapBuffer            = GET_PROC_ADDR(glUnmapBuffer);
+    api->FenceSync              = GET_PROC_ADDR(glFenceSync);
+    api->DeleteSync             = GET_PROC_ADDR(glDeleteSync);
+    api->ClientWaitSync         = GET_PROC_ADDR(glClientWaitSync);
+#endif
 #undef GET_PROC_ADDR
 
     if (!vgl->api.CreateShader || !vgl->api.ShaderSource || !vgl->api.CreateProgram)



More information about the vlc-commits mailing list