[vlc-devel] [PATCH 1/2] vout/opengl: add direct rendering support (OpenGL 4.4)
Hugo Beauzée-Luyssen
hugo at beauzee.fr
Tue Jan 3 17:18:57 CET 2017
On 01/03/2017 04:45 PM, Thomas Guillem wrote:
> This commit adds support for direct rendering with YUV/RGB software chromas.
> This is done using Pixel Buffer Object (PBO, A Buffer Object that is used for
> asynchronous pixel transfer operations) [1][2]. PBO are present since a long
> time for OpenGL and since 3.0 for OpenGLES.
>
> But there is a problem, VLC software decoders and video filters might need to
> read pictures buffers while they're being displayed. Therefore, the basic use
> case of PBOs can't work (since you need to unmap the buffer before displaying
> it).
>
> To solve this issue, we need to use persistent mapped buffers[3]. This can be
> done using the glBufferStorage() function with the GL_MAP_PERSISTENT_BIT flag.
>
> Unfortunately, this new API is only present since OpenGL 4.4 and as an
> extension since OpenGLES 3.1 (so no Android, macos and ios support).
>
> References:
> [1]: https://www.khronos.org/opengl/wiki/Pixel_Buffer_Object
> [2]: http://www.songho.ca/opengl/gl_pbo.html
> [3]: https://www.khronos.org/opengl/wiki/Buffer_Object_Streaming
> ---
> NEWS | 1 +
> modules/video_output/opengl/converters.c | 300 +++++++++++++++++++++++++++++-
> modules/video_output/opengl/internal.h | 22 +++
> modules/video_output/opengl/vout_helper.c | 24 +++
> 4 files changed, 340 insertions(+), 7 deletions(-)
>
> diff --git a/NEWS b/NEWS
> index 8aba9cfc4d..350098fcbe 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -150,6 +150,7 @@ Video ouput:
> * EFL Evas video output with Tizen TBM Surface support
> * New OpenGL provider for Windows
> * Drop OpenGL 1.x and OpenGL ES 1 support
> + * Direct rendering with OpenGL (starting OpenGL 4.4)
>
> Text renderer:
> * CTL support through Harfbuzz in the Freetype module
> diff --git a/modules/video_output/opengl/converters.c b/modules/video_output/opengl/converters.c
> index 8b6b4b40a6..f278bb96ae 100644
> --- a/modules/video_output/opengl/converters.c
> +++ b/modules/video_output/opengl/converters.c
> @@ -22,10 +22,11 @@
> # include "config.h"
> #endif
>
> -#include <vlc_memory.h>
> -
> #include <assert.h>
> +#include <limits.h>
> +#include <stdlib.h>
>
> +#include <vlc_memory.h>
> #include "internal.h"
>
> #ifndef GL_RED
> @@ -35,6 +36,30 @@
> #define GL_R16 0
> #endif
>
> +#ifdef VLCGL_HAS_PBO
> +# ifndef GL_UNPACK_ROW_LENGTH
> +# error "PBO without GL_UNPACK_ROW_LENGTH"
Shouldn't this be done in opengl/internal.h to conditionally enable PBO
support instead?
> +# endif
> +#endif
> +
> +#ifndef NDEBUG
> +static __thread bool dbg_is_glthread = false;
I'm not sure this plays well on all compilers
> +#define ASSERT_GLTHREAD() assert(dbg_is_glthread)
> +#else
> +#define ASSERT_GLTHREAD()
> +#endif
> +
> +#ifdef VLCGL_HAS_PBO
> +struct picture_sys_t
> +{
> + const opengl_tex_converter_t *tc;
> + GLuint buffers[PICTURE_PLANE_MAX];
> + size_t bytes[PICTURE_PLANE_MAX];
Wouldn't it be clearer to have a struct { GLuint, size_t
}[PICTURE_PLANE_MAX] ?
> + GLsync fence;
> + unsigned index;
> +};
> +#endif
> +
> struct priv
> {
> GLint tex_internal;
> @@ -45,6 +70,12 @@ struct priv
> void * texture_temp_buf;
> size_t texture_temp_buf_size;
> #endif
> +#ifdef VLCGL_HAS_PBO
> + struct {
> + picture_t *pics[VLCGL_PICTURE_MAX];
> + unsigned long long list;
> + } ongpu;
> +#endif
> };
>
> struct yuv_priv
> @@ -53,6 +84,242 @@ struct yuv_priv
> GLfloat local_value[16];
> };
>
> +#ifdef VLCGL_HAS_PBO
> +static int
> +pbo_map(const opengl_tex_converter_t *tc, picture_t *pic)
> +{
> + picture_sys_t *picsys = pic->p_sys;
> +
> + tc->api->GenBuffers(pic->i_planes, picsys->buffers);
> +
> + const GLbitfield access = GL_MAP_READ_BIT | GL_MAP_WRITE_BIT |
> + GL_MAP_PERSISTENT_BIT;
> + for (int i = 0; i < pic->i_planes; ++i)
> + {
> + tc->api->BindBuffer(GL_PIXEL_UNPACK_BUFFER, picsys->buffers[i]);
> + tc->api->BufferStorage(GL_PIXEL_UNPACK_BUFFER, picsys->bytes[i], NULL,
> + access);
> +
> + pic->p[i].p_pixels =
> + tc->api->MapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, picsys->bytes[i],
> + access);
> +
> + if (pic->p[i].p_pixels == NULL)
> + {
> + msg_Err(tc->parent, "could not map PBO buffers");
> + for (i = i - 1; i >= 0; --i)
> + {
> + tc->api->BindBuffer(GL_PIXEL_UNPACK_BUFFER,
> + picsys->buffers[i]);
> + tc->api->UnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
> + }
> + tc->api->DeleteBuffers(pic->i_planes, picsys->buffers);
> + memset(picsys->buffers, 0, PICTURE_PLANE_MAX * sizeof(GLuint));
> + return VLC_EGENERIC;
> + }
> + }
> + return VLC_SUCCESS;
> +}
> +
> +/** Find next (bit) set */
> +static int fnsll(unsigned long long x, unsigned i)
This could be inlined
> +{
> + if (i >= CHAR_BIT * sizeof (x))
> + return 0;
> + return ffsll(x & ~((1ULL << i) - 1));
> +}
> +
> +static void
> +pbo_release_gpupics(const opengl_tex_converter_t *tc, bool force)
> +{
> + struct priv *priv = tc->priv;
> +
> + /* Release all pictures that are not used by the GPU anymore */
> + for (unsigned i = ffsll(priv->ongpu.list); i;
> + i = fnsll(priv->ongpu.list, i))
> + {
> + assert(priv->ongpu.pics[i - 1] != NULL);
> +
> + picture_t *pic = priv->ongpu.pics[i - 1];
> + picture_sys_t *picsys = pic->p_sys;
> +
> + assert(picsys->fence != NULL);
> + GLenum wait = force ? GL_ALREADY_SIGNALED
> + : tc->api->ClientWaitSync(picsys->fence, 0, 0);
> +
> + if (wait == GL_ALREADY_SIGNALED || wait == GL_CONDITION_SATISFIED)
> + {
> + tc->api->DeleteSync(picsys->fence);
> + picsys->fence = NULL;
> +
> + priv->ongpu.list &= ~(1ULL << (i - 1));
> + priv->ongpu.pics[i - 1] = NULL;
> + picture_Release(pic);
> + }
> + }
> +}
> +
> +static int
> +pbo_common_update(const opengl_tex_converter_t *tc, const GLuint *textures,
> + unsigned width, unsigned height, picture_t *pic)
> +{
> + struct priv *priv = tc->priv;
> + picture_sys_t *picsys = pic->p_sys;
> +
> + for (int i = 0; i < pic->i_planes; i++)
> + {
> + tc->api->BindBuffer(GL_PIXEL_UNPACK_BUFFER, picsys->buffers[i]);
> + if (picsys->fence == NULL)
> + tc->api->FlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, 0,
> + picsys->bytes[i]);
> + glActiveTexture(GL_TEXTURE0 + i);
> + glClientActiveTexture(GL_TEXTURE0 + i);
> + glBindTexture(tc->tex_target, textures[i]);
> +
> + glPixelStorei(GL_UNPACK_ROW_LENGTH,
> + pic->p[i].i_pitch / pic->p[i].i_pixel_pitch);
> +
> + glTexSubImage2D(tc->tex_target, 0, 0, 0,
> + width * tc->desc->p[i].w.num / tc->desc->p[i].w.den,
> + height * tc->desc->p[i].h.num / tc->desc->p[i].h.den,
> + priv->tex_format, priv->tex_type, NULL);
> + }
> +
> + bool hold;
> + if (picsys->fence == NULL)
> + hold = true;
> + else
> + {
> + /* The picture is already held */
> + hold = false;
> + tc->api->DeleteSync(picsys->fence);
> + }
> +
> + picsys->fence = tc->api->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
> + if (pic->p_sys->fence == NULL)
> + {
> + /* Error (corner case): don't hold the picture */
> + hold = false;
> + }
> +
> + pbo_release_gpupics(tc, false);
> +
> + if (hold)
> + {
> + /* Hold the picture while it's used by the GPU */
> + unsigned index = pic->p_sys->index;
> +
> + priv->ongpu.list |= 1ULL << index;
> + assert(priv->ongpu.pics[index] == NULL);
> + priv->ongpu.pics[index] = pic;
> + picture_Hold(pic);
> + }
> +
> + /* turn off pbo */
> + tc->api->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
> +
> + return VLC_SUCCESS;
> +}
> +
> +static void
> +picture_destroy_cb(picture_t *pic)
> +{
> + picture_sys_t *picsys = pic->p_sys;
> + const opengl_tex_converter_t *tc = picsys->tc;
> +
> + ASSERT_GLTHREAD();
> +
> + if (picsys->buffers[0] != 0)
> + {
> + for (int i = 0; i < pic->i_planes; ++i)
> + {
> + tc->api->BindBuffer(GL_PIXEL_UNPACK_BUFFER, picsys->buffers[i]);
> + tc->api->UnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
> + }
> + tc->api->DeleteBuffers(pic->i_planes, picsys->buffers);
> + }
> + free(picsys);
> + free(pic);
> +}
> +
> +static picture_pool_t *
> +tc_common_get_pool(const opengl_tex_converter_t *tc, const video_format_t *fmt,
> + unsigned requested_count, const GLuint *textures)
> +{
> + struct priv *priv = tc->priv;
> + picture_t *pictures[VLCGL_PICTURE_MAX];
> + unsigned count;
> + (void) textures;
> +
> + ASSERT_GLTHREAD();
> + priv->ongpu.list = 0;
> +
> + for (count = 0; count < requested_count; count++)
> + {
> + picture_sys_t *picsys = calloc(1, sizeof(*picsys));
> + if (unlikely(picsys == NULL))
> + break;
> + picsys->tc = tc;
> + picsys->index = count;
> + picture_resource_t rsc = {
> + .p_sys = picsys,
> + .pf_destroy = picture_destroy_cb,
> + };
> +
> + picture_t *pic = pictures[count] = picture_NewFromResource(fmt, &rsc);
> + if (pic == NULL)
> + {
> + free(picsys);
> + break;
> + }
> + if (picture_Setup(pic, fmt))
> + {
> + picture_Release(pic);
> + break;
> + }
> +
> + assert(pic->i_planes > 0
> + && (unsigned) pic->i_planes == tc->desc->plane_count);
> +
> + for (int i = 0; i < pic->i_planes; ++i)
> + {
> + const plane_t *p = &pic->p[i];
> +
> + if( p->i_pitch < 0 || p->i_lines <= 0 ||
> + (size_t)p->i_pitch > SIZE_MAX/p->i_lines )
> + goto error;
> + picsys->bytes[i] = (p->i_pitch * p->i_lines) + 15 / 16 * 16;
> + assert(picsys->bytes[i] == pictures[0]->p_sys->bytes[i]);
> + }
> +
> + if (pbo_map(tc, pic) != VLC_SUCCESS)
> + {
> + picture_Release(pic);
> + break;
> + }
> + }
> +
> + /* We need minumum 2 pbo buffers */
> + if (count <= 1)
> + goto error;
> +
> + /* turn off pbo */
> + tc->api->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
> +
> + /* Wrap the pictures into a pool */
> + picture_pool_t *pool = picture_pool_New(count, pictures);
> + if (!pool)
> + goto error;
> + return pool;
> +
> +error:
> + for (unsigned i = 0; i < count; i++)
> + picture_Release(pictures[i]);
> +
> + return NULL;
> +}
> +#endif /* VLCGL_HAS_PBO */
> +
> static int
> tc_common_gen_textures(const opengl_tex_converter_t *tc,
> const GLsizei *tex_width, const GLsizei *tex_height,
> @@ -153,6 +420,11 @@ tc_common_update(const opengl_tex_converter_t *tc, const GLuint *textures,
> unsigned width, unsigned height,
> picture_t *pic, const size_t *plane_offset)
> {
> +#ifdef VLCGL_HAS_PBO
> + if (pic->p_sys != NULL)
> + return pbo_common_update(tc, textures, width, height, pic);
> +#endif
> +
> int ret = VLC_SUCCESS;
> for (unsigned i = 0; i < tc->desc->plane_count && ret == VLC_SUCCESS; i++)
> {
> @@ -174,12 +446,17 @@ tc_common_update(const opengl_tex_converter_t *tc, const GLuint *textures,
> static void
> tc_common_release(const opengl_tex_converter_t *tc)
> {
> - tc->api->DeleteShader(tc->fragment_shader);
> + if (tc->fragment_shader != 0)
> + tc->api->DeleteShader(tc->fragment_shader);
>
> #ifndef GL_UNPACK_ROW_LENGTH
> struct priv *priv = tc->priv;
> free(priv->texture_temp_buf);
> #endif
> +#ifdef VLCGL_HAS_PBO
> + pbo_release_gpupics(tc, true);
> +#endif
> +
> free(tc->priv);
> }
>
> @@ -200,11 +477,20 @@ common_init(opengl_tex_converter_t *tc, size_t priv_size, vlc_fourcc_t chroma,
> tc->pf_update = tc_common_update;
> tc->pf_release = tc_common_release;
>
> +#ifdef VLCGL_HAS_PBO
> + if (tc->supports_pbo)
> + tc->pf_get_pool = tc_common_get_pool;
> +#endif
> +
> tc->tex_target = GL_TEXTURE_2D;
> priv->tex_internal = tex_internal;
> priv->tex_format = tex_format;
> priv->tex_type = tex_type;
>
> +#ifndef NDEBUG
> + dbg_is_glthread = true;
> +#endif
> +
> return VLC_SUCCESS;
> }
>
> @@ -258,7 +544,7 @@ opengl_tex_converter_rgba_init(const video_format_t *fmt,
> tc->fragment_shader = tc->api->CreateShader(GL_FRAGMENT_SHADER);
> if (tc->fragment_shader == 0)
> {
> - free(tc->priv);
> + tc_common_release(tc);
> return VLC_EGENERIC;
> }
> tc->api->ShaderSource(tc->fragment_shader, 1, &code, NULL);
> @@ -432,7 +718,7 @@ opengl_tex_converter_yuv_init(const video_format_t *fmt,
> swap_uv ? 'z' : 'y',
> swap_uv ? 'y' : 'z') < 0)
> {
> - free(tc->priv);
> + tc_common_release(tc);
> return VLC_ENOMEM;
> }
>
> @@ -448,7 +734,7 @@ opengl_tex_converter_yuv_init(const video_format_t *fmt,
> tc->fragment_shader = tc->api->CreateShader(GL_FRAGMENT_SHADER);
> if (tc->fragment_shader == 0)
> {
> - free(tc->priv);
> + tc_common_release(tc);
> free(code);
> return VLC_EGENERIC;
> }
> @@ -515,7 +801,7 @@ opengl_tex_converter_xyz12_init(const video_format_t *fmt,
> tc->fragment_shader = tc->api->CreateShader(GL_FRAGMENT_SHADER);
> if (tc->fragment_shader == 0)
> {
> - free(tc->priv);
> + tc_common_release(tc);
> return VLC_EGENERIC;
> }
> tc->api->ShaderSource(tc->fragment_shader, 1, &code, NULL);
> diff --git a/modules/video_output/opengl/internal.h b/modules/video_output/opengl/internal.h
> index d925561b81..020f9b5cd3 100644
> --- a/modules/video_output/opengl/internal.h
> +++ b/modules/video_output/opengl/internal.h
> @@ -33,6 +33,9 @@
> # define GLSL_VERSION "120"
> # define VLCGL_TEXTURE_COUNT 1
> # define VLCGL_PICTURE_MAX 128
> +# ifdef GL_VERSION_4_4
> +# define VLCGL_HAS_PBO
> +# endif
> # define PRECISION ""
> #endif
>
> @@ -61,6 +64,15 @@
> # define PFNGLGENBUFFERSPROC typeof(glGenBuffers)*
> # define PFNGLBINDBUFFERPROC typeof(glBindBuffer)*
> # define PFNGLBUFFERDATAPROC typeof(glBufferData)*
> +# ifdef VLCGL_HAS_PBO
> +# define PFNGLBUFFERSTORAGEPROC typeof(glBufferStorage)*
> +# define PFNGLMAPBUFFERRANGEPROC typeof(glMapBufferRange)*
> +# define PFNGLFLUSHMAPPEDBUFFERRANGEPROC typeof(glFlushMappedBufferRange)*
> +# define PFNGLUNMAPBUFFERPROC typeof(glUnmapBuffer)*
> +# define PFNGLFENCESYNCPROC typeof(glFenceSync)*
> +# define PFNGLDELETESYNCPROC typeof(glDeleteSync)*
> +# define PFNGLCLIENTWAITSYNCPROC typeof(glClientWaitSync)*
> +# endif
> # define PFNGLDELETEBUFFERSPROC typeof(glDeleteBuffers)*
> #if defined(__APPLE__)
> # import <CoreFoundation/CoreFoundation.h>
> @@ -104,6 +116,15 @@ typedef struct {
> PFNGLGENBUFFERSPROC GenBuffers;
> PFNGLBINDBUFFERPROC BindBuffer;
> PFNGLBUFFERDATAPROC BufferData;
> +#ifdef VLCGL_HAS_PBO
> + PFNGLBUFFERSTORAGEPROC BufferStorage;
> + PFNGLMAPBUFFERRANGEPROC MapBufferRange;
> + PFNGLFLUSHMAPPEDBUFFERRANGEPROC FlushMappedBufferRange;
> + PFNGLUNMAPBUFFERPROC UnmapBuffer;
> + PFNGLFENCESYNCPROC FenceSync;
> + PFNGLDELETESYNCPROC DeleteSync;
> + PFNGLCLIENTWAITSYNCPROC ClientWaitSync;
> +#endif
> PFNGLDELETEBUFFERSPROC DeleteBuffers;
>
> #if defined(_WIN32)
> @@ -137,6 +158,7 @@ typedef int (*opengl_tex_converter_init_cb)(const video_format_t *fmt,
> */
> struct opengl_tex_converter_t
> {
> + bool supports_pbo;
> /* Pointer to object parent, set by the caller of the init cb */
> vlc_object_t *parent;
> /* Function pointer to shaders commands, set by the caller of the init cb */
> diff --git a/modules/video_output/opengl/vout_helper.c b/modules/video_output/opengl/vout_helper.c
> index 4d0b6a676e..5c0185ef78 100644
> --- a/modules/video_output/opengl/vout_helper.c
> +++ b/modules/video_output/opengl/vout_helper.c
> @@ -220,6 +220,15 @@ vout_display_opengl_t *vout_display_opengl_New(video_format_t *fmt,
> api->BindBuffer = GET_PROC_ADDR(glBindBuffer);
> api->BufferData = GET_PROC_ADDR(glBufferData);
> api->DeleteBuffers = GET_PROC_ADDR(glDeleteBuffers);
> +#ifdef VLCGL_HAS_PBO
> + api->BufferStorage = GET_PROC_ADDR(glBufferStorage);
> + api->MapBufferRange = GET_PROC_ADDR(glMapBufferRange);
> + api->FlushMappedBufferRange = GET_PROC_ADDR(glFlushMappedBufferRange);
> + api->UnmapBuffer = GET_PROC_ADDR(glUnmapBuffer);
> + api->FenceSync = GET_PROC_ADDR(glFenceSync);
> + api->DeleteSync = GET_PROC_ADDR(glDeleteSync);
> + api->ClientWaitSync = GET_PROC_ADDR(glClientWaitSync);
> +#endif
> #undef GET_PROC_ADDR
>
> if (!vgl->api.CreateShader || !vgl->api.ShaderSource || !vgl->api.CreateProgram)
> @@ -249,6 +258,19 @@ vout_display_opengl_t *vout_display_opengl_New(video_format_t *fmt,
> HasExtension(extensions, "GL_APPLE_texture_2D_limited_npot");
> #endif
>
> +#if defined (VLCGL_HAS_PBO)
> + const bool supports_pbo = vgl->supports_npot && api->BufferStorage
> + && api->MapBufferRange && api->FlushMappedBufferRange
> + && api->UnmapBuffer && api->FenceSync && api->DeleteSync
> + && api->ClientWaitSync
> + && HasExtension(extensions, "GL_ARB_pixel_buffer_object")
> + && HasExtension(extensions, "GL_ARB_buffer_storage");
> +#else
> + const bool supports_pbo = false;
> +#endif
> + msg_Dbg(gl, "PBO support (for direct rendering): %s",
> + supports_pbo ? "On" : "Off");
> +
> /* Initialize with default chroma */
> vgl->fmt = *fmt;
> vgl->fmt.i_chroma = VLC_CODEC_RGB32;
> @@ -263,6 +285,7 @@ vout_display_opengl_t *vout_display_opengl_New(video_format_t *fmt,
> # endif
> opengl_tex_converter_t tex_conv;
> opengl_tex_converter_t rgba_tex_conv = {
> + .supports_pbo = supports_pbo,
> .parent = VLC_OBJECT(vgl->gl),
> .api = &vgl->api,
> .orientation = fmt->orientation,
> @@ -279,6 +302,7 @@ vout_display_opengl_t *vout_display_opengl_New(video_format_t *fmt,
> for (size_t i = 0; i < ARRAY_SIZE(opengl_tex_converter_init_cbs); ++i)
> {
> tex_conv = (opengl_tex_converter_t) {
> + .supports_pbo = supports_pbo,
> .parent = VLC_OBJECT(vgl->gl),
> .api = &vgl->api,
> .orientation = fmt->orientation,
>
More information about the vlc-devel
mailing list