[vlc-devel] [PATCH 6/6] nvdec: add direct rendering in OpenGL

Steve Lhomme robux4 at ycbcr.xyz
Thu Sep 12 14:44:22 CEST 2019


---
 modules/video_output/Makefile.am              |   8 +
 modules/video_output/opengl/converter_nvdec.c | 274 ++++++++++++++++++
 2 files changed, 282 insertions(+)
 create mode 100644 modules/video_output/opengl/converter_nvdec.c

diff --git a/modules/video_output/Makefile.am b/modules/video_output/Makefile.am
index a5b59f911a6..8c75f89574f 100644
--- a/modules/video_output/Makefile.am
+++ b/modules/video_output/Makefile.am
@@ -96,6 +96,10 @@ libglconv_vdpau_plugin_la_SOURCES = video_output/opengl/converter_vdpau.c \
 libglconv_vdpau_plugin_la_CFLAGS = $(AM_CFLAGS) $(VDPAU_CFLAGS)
 libglconv_vdpau_plugin_la_LIBADD = $(LIBDL) libvlc_vdpau.la $(X_LIBS) $(X_PRE_LIBS) -lX11
 
+libglconv_nvdec_plugin_la_SOURCES = video_output/opengl/converter_nvdec.c \
+	video_output/opengl/converter.h video_chroma/nvdec_fmt.h
+libglconv_nvdec_plugin_la_LIBADD = $(LIBDL)
+
 if HAVE_GL
 vout_LTLIBRARIES += libgl_plugin.la
 if HAVE_EGL
@@ -107,6 +111,10 @@ endif # HAVE_EGL
 if HAVE_VDPAU
 vout_LTLIBRARIES += libglconv_vdpau_plugin.la
 endif
+
+if HAVE_NVDEC
+vout_LTLIBRARIES += libglconv_nvdec_plugin.la
+endif
 endif # HAVE_GL
 
 ### Vulkan ###
diff --git a/modules/video_output/opengl/converter_nvdec.c b/modules/video_output/opengl/converter_nvdec.c
new file mode 100644
index 00000000000..c35db91ded6
--- /dev/null
+++ b/modules/video_output/opengl/converter_nvdec.c
@@ -0,0 +1,274 @@
+/*****************************************************************************
+ * converter_nvdec.c: OpenGL NVDEC opaque converter
+ *****************************************************************************
+ * Copyright (C) 2019 VLC authors, VideoLAN and VideoLabs
+ *
+ * Authors: Steve Lhomme <robux4 at videolabs.io>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <assert.h>
+
+#include <vlc_common.h>
+#include <vlc_vout_window.h>
+#include <vlc_codec.h>
+
+#include "../../video_chroma/nvdec_fmt.h"
+#include "internal.h"
+
+#include <ffnvcodec/dynlink_loader.h>
+
+#include <GL/gl.h>
+#include <GL/glext.h>
+
+typedef struct {
+    vlc_decoder_device *device;
+    CUcontext cuConverterCtx;
+    CUgraphicsResource cu_res[PICTURE_PLANE_MAX]; // Y, UV for NV12/P010
+    CUarray mappedArray[PICTURE_PLANE_MAX];
+} converter_sys_t;
+
+static inline int CudaCall(vlc_decoder_device *device, CUresult result, const char *psz_func)
+{
+    if (unlikely(result != CUDA_SUCCESS)) {
+        const char *psz_err, *psz_err_str;
+        decoder_device_nvdec_t *p_sys = device->opaque;
+        p_sys->cudaFunctions->cuGetErrorName(result, &psz_err);
+        p_sys->cudaFunctions->cuGetErrorString(result, &psz_err_str);
+        msg_Err(device, "%s failed: %s (%s)", psz_func, psz_err_str, psz_err);
+        return VLC_EGENERIC;
+    }
+    return VLC_SUCCESS;
+}
+
+#define CALL_CUDA(func, ...) CudaCall(device, ((decoder_device_nvdec_t *)device->opaque)->cudaFunctions->func(__VA_ARGS__), #func)
+
+static int tc_nvdec_gl_allocate_texture(const opengl_tex_converter_t *tc, GLuint *textures,
+                                const GLsizei *tex_width, const GLsizei *tex_height)
+{
+    vlc_decoder_device *device = tc->dec_device;
+    decoder_device_nvdec_t *devsys = device->opaque;
+    converter_sys_t *p_sys = tc->priv;
+
+    int result;
+    result = CALL_CUDA(cuCtxPushCurrent, p_sys->cuConverterCtx ? p_sys->cuConverterCtx : devsys->cuCtx);
+    if (result != VLC_SUCCESS)
+        return result;
+
+    for (unsigned i = 0; i < tc->tex_count; i++)
+    {
+        tc->vt->BindTexture(tc->tex_target, textures[i]);
+        tc->vt->TexImage2D(tc->tex_target, 0, tc->texs[i].internal,
+                           tex_width[i], tex_height[i], 0, tc->texs[i].format,
+                           tc->texs[i].type, NULL);
+        if (tc->vt->GetError() != GL_NO_ERROR)
+        {
+            msg_Err(tc->gl, "could not alloc PBO buffers");
+            return VLC_EGENERIC;
+        }
+
+        result = CALL_CUDA(cuGraphicsGLRegisterImage, &p_sys->cu_res[i], textures[i], tc->tex_target, CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD);
+
+        result = CALL_CUDA(cuGraphicsMapResources, 1, &p_sys->cu_res[i], 0);
+        result = CALL_CUDA(cuGraphicsSubResourceGetMappedArray, &p_sys->mappedArray[i], p_sys->cu_res[i], 0, 0);
+        result = CALL_CUDA(cuGraphicsUnmapResources, 1, &p_sys->cu_res[i], 0);
+
+        tc->vt->BindTexture(tc->tex_target, 0);
+    }
+
+error:
+    CALL_CUDA(cuCtxPopCurrent, NULL);
+    return result;
+}
+
+static int
+tc_nvdec_gl_update(opengl_tex_converter_t const *tc, GLuint textures[],
+                   GLsizei const tex_widths[], GLsizei const tex_heights[],
+                   picture_t *pic, size_t const plane_offsets[])
+{
+    VLC_UNUSED(plane_offsets);
+
+    vlc_decoder_device *device = tc->dec_device;
+    decoder_device_nvdec_t *devsys = device->opaque;
+    converter_sys_t *p_sys = tc->priv;
+    pic_context_nvdec_t *srcpic = container_of(pic->context, pic_context_nvdec_t, ctx);
+
+    int result;
+    result = CALL_CUDA(cuCtxPushCurrent, p_sys->cuConverterCtx ? p_sys->cuConverterCtx : devsys->cuCtx);
+    if (result != VLC_SUCCESS)
+        return result;
+
+    // copy the planes from the pic context to mappedArray
+    size_t srcY = 0;
+    for (unsigned i = 0; i < tc->tex_count; i++)
+    {
+        CUDA_MEMCPY2D cu_cpy = {
+            .srcMemoryType  = CU_MEMORYTYPE_DEVICE,
+            .srcDevice      = srcpic->devidePtr,
+            .srcPitch       = srcpic->bufferPitch,
+            .srcY           = srcY,
+            .dstMemoryType = CU_MEMORYTYPE_ARRAY,
+            .dstArray = p_sys->mappedArray[i],
+            .WidthInBytes = tex_widths[0],
+            .Height = tex_heights[i],
+        };
+        if (tc->fmt.i_chroma != VLC_CODEC_NVDEC_OPAQUE)
+            cu_cpy.WidthInBytes *= 2;
+        result = CALL_CUDA(cuMemcpy2DAsync, &cu_cpy, 0);
+        if (result != VLC_SUCCESS)
+            goto error;
+        srcY += cu_cpy.Height;
+    }
+
+error:
+    CALL_CUDA(cuCtxPopCurrent, NULL);
+    return result;
+}
+
+static void
+Close(vlc_object_t *obj)
+{
+    opengl_tex_converter_t *tc = (void *)obj;
+    converter_sys_t *p_sys = tc->priv;
+    vlc_decoder_device_Release(p_sys->device);
+}
+
+static int
+Open(vlc_object_t *obj)
+{
+    opengl_tex_converter_t *tc = (void *) obj;
+    if (tc->dec_device == NULL
+     || tc->dec_device->type != VLC_DECODER_DEVICE_NVDEC
+     || !is_nvdec_opaque(tc->fmt.i_chroma))
+        return VLC_EGENERIC;
+
+    converter_sys_t *p_sys = vlc_obj_malloc(VLC_OBJECT(tc), sizeof(*p_sys));
+    if (unlikely(p_sys == NULL))
+        return VLC_ENOMEM;
+    for (size_t i=0; i < ARRAY_SIZE(p_sys->cu_res); i++)
+        p_sys->cu_res[i] = NULL;
+    p_sys->cuConverterCtx = NULL;
+    p_sys->device = vlc_decoder_device_Hold(tc->dec_device);
+
+    vlc_decoder_device *device = tc->dec_device;
+    decoder_device_nvdec_t *devsys = device->opaque;
+    int result;
+    CUdevice cuDecDevice = 0;
+    unsigned int device_count;
+    result = CALL_CUDA(cuGLGetDevices, &device_count, &cuDecDevice, 1, CU_GL_DEVICE_LIST_ALL);
+    if (result < 0)
+    {
+        return result;
+    }
+
+    CUdevice cuConverterDevice;
+    CALL_CUDA(cuCtxPushCurrent, devsys->cuCtx);
+    result = CALL_CUDA(cuCtxGetDevice, &cuConverterDevice);
+    CALL_CUDA(cuCtxPopCurrent, NULL);
+
+    if (cuConverterDevice != cuDecDevice)
+    {
+        result = CALL_CUDA(cuCtxCreate, &p_sys->cuConverterCtx, 0, cuConverterDevice);
+        if (result != VLC_SUCCESS)
+        {
+        }
+    }
+
+    vlc_fourcc_t render_chroma;
+    switch (tc->fmt.i_chroma)
+    {
+        case VLC_CODEC_NVDEC_OPAQUE_10B: render_chroma = VLC_CODEC_P010; break;
+        case VLC_CODEC_NVDEC_OPAQUE_16B: render_chroma = VLC_CODEC_P016; break;
+        case VLC_CODEC_NVDEC_OPAQUE:
+        default:                         render_chroma = VLC_CODEC_NV12; break;
+    }
+
+    tc->fshader = opengl_fragment_shader_init(tc, GL_TEXTURE_2D, render_chroma, tc->fmt.space);
+    if (!tc->fshader)
+    {
+        Close(obj);
+        return VLC_EGENERIC;
+    }
+
+    tc->pf_allocate_textures = tc_nvdec_gl_allocate_texture;
+    tc->pf_update = tc_nvdec_gl_update;
+    tc->priv = p_sys;
+
+    return VLC_SUCCESS;
+}
+
+static void
+DecoderContextClose(vlc_decoder_device *device)
+{
+    decoder_device_nvdec_t *p_sys = device->opaque;
+    if (p_sys->cuCtx)
+        CALL_CUDA(cuCtxDestroy, p_sys->cuCtx);
+    cuda_free_functions(&p_sys->cudaFunctions);
+}
+
+static const struct vlc_decoder_device_operations dev_ops = {
+    .close = DecoderContextClose,
+};
+
+static int
+DecoderContextOpen(vlc_decoder_device *device, vout_window_t *window)
+{
+    VLC_UNUSED(window);
+
+    decoder_device_nvdec_t *p_sys = vlc_obj_malloc(VLC_OBJECT(device), sizeof(*p_sys));
+    if (unlikely(p_sys == NULL))
+        return VLC_ENOMEM;
+    device->opaque = p_sys;
+    p_sys->cudaFunctions = NULL;
+
+    int result = cuda_load_functions(&p_sys->cudaFunctions, device);
+    if (result != VLC_SUCCESS) {
+        return VLC_EGENERIC;
+    }
+
+    result = CALL_CUDA(cuInit, 0);
+    if (result != VLC_SUCCESS)
+    {
+        DecoderContextClose(device);
+        return result;
+    }
+
+    result = CALL_CUDA(cuCtxCreate, &p_sys->cuCtx, 0, 0);
+    if (result != VLC_SUCCESS)
+    {
+        DecoderContextClose(device);
+        return result;
+    }
+
+    device->ops = &dev_ops;
+    device->type = VLC_DECODER_DEVICE_NVDEC;
+    return VLC_SUCCESS;
+}
+
+vlc_module_begin ()
+    set_description("NVDEC OpenGL surface converter")
+    set_capability("glconv", 2)
+    set_callbacks(Open, Close)
+    set_category(CAT_VIDEO)
+    set_subcategory(SUBCAT_VIDEO_VOUT)
+    add_shortcut("nvdec")
+    add_submodule()
+        set_callback_dec_device(DecoderContextOpen, 3)
+vlc_module_end ()
-- 
2.17.1



More information about the vlc-devel mailing list