[vlc-devel] [PATCH 5/6] nvdec: add a GPU to CPU converter

Steve Lhomme robux4 at ycbcr.xyz
Tue Sep 17 14:02:21 CEST 2019


The decoder device info are temporarily added to each picture until we have
proper push.
---
 modules/hw/nvdec/Makefile.am |   7 ++
 modules/hw/nvdec/chroma.c    | 132 +++++++++++++++++++++++++++++++++++
 modules/hw/nvdec/nvdec.c     |   3 +
 modules/hw/nvdec/nvdec_fmt.h |   3 +
 4 files changed, 145 insertions(+)
 create mode 100644 modules/hw/nvdec/chroma.c

diff --git a/modules/hw/nvdec/Makefile.am b/modules/hw/nvdec/Makefile.am
index 621f8766698..17bddf8f187 100644
--- a/modules/hw/nvdec/Makefile.am
+++ b/modules/hw/nvdec/Makefile.am
@@ -10,3 +10,10 @@ libnvdec_plugin_la_LIBADD = $(LIBDL)
 if HAVE_NVDEC
 codec_LTLIBRARIES += libnvdec_plugin.la
 endif
+
+libnvdec_chroma_plugin_la_SOURCES = hw/nvdec/chroma.c hw/nvdec/nvdec_fmt.h
+libnvdec_chroma_plugin_la_LDFLAGS = $(AM_LDFLAGS) -rpath '$(nvdecdir)'
+if HAVE_NVDEC
+nvdec_LTLIBRARIES = libnvdec_chroma_plugin.la
+endif
+
diff --git a/modules/hw/nvdec/chroma.c b/modules/hw/nvdec/chroma.c
new file mode 100644
index 00000000000..b963d6602bf
--- /dev/null
+++ b/modules/hw/nvdec/chroma.c
@@ -0,0 +1,132 @@
+/*****************************************************************************
+ * chroma.c: NVDEC/CUDA chroma conversion filter
+ *****************************************************************************
+ * Copyright (C) 2019 VLC authors and VideoLAN
+ *
+ * Authors: Steve Lhomme <robux4 at videolabs.io>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <vlc_common.h>
+#include <vlc_plugin.h>
+#include <vlc_filter.h>
+#include <vlc_codec.h>
+
+#include "nvdec_fmt.h"
+
+static int OpenCUDAToCPU( vlc_object_t * );
+
+vlc_module_begin()
+    set_shortname(N_("CUDA converter"))
+    set_description(N_("CUDA/NVDEC Chroma Converter filter"))
+    set_category(CAT_VIDEO)
+    set_subcategory(SUBCAT_VIDEO_VFILTER)
+    set_capability("video converter", 10)
+    set_callbacks(OpenCUDAToCPU, NULL)
+vlc_module_end()
+
+typedef struct
+{
+    vlc_decoder_device *device;
+} nvdec_filter_sys_t;
+
+static inline int CudaCall(filter_t *p_filter, decoder_device_nvdec_t *devsys, CUresult result, const char *psz_func)
+{
+    if (unlikely(result != CUDA_SUCCESS)) {
+        const char *psz_err, *psz_err_str;
+        devsys->cudaFunctions->cuGetErrorName(result, &psz_err);
+        devsys->cudaFunctions->cuGetErrorString(result, &psz_err_str);
+        msg_Err(p_filter, "%s failed: %s (%s)", psz_func, psz_err_str, psz_err);
+        return VLC_EGENERIC;
+    }
+    return VLC_SUCCESS;
+}
+
+#define CALL_CUDA(func, ...) CudaCall(p_filter, devsys, devsys->cudaFunctions->func(__VA_ARGS__), #func)
+
+
+static picture_t * FilterCUDAToCPU( filter_t *p_filter, picture_t *src )
+{
+    picture_t *dst = filter_NewPicture( p_filter );
+    if (unlikely(dst == NULL))
+    {
+        picture_Release(src);
+        return NULL;
+    }
+
+    pic_context_nvdec_t *srcpic = container_of(src->context, pic_context_nvdec_t, ctx);
+    decoder_device_nvdec_t *devsys = &srcpic->nvdecDevice;
+
+    int result;
+    result = CALL_CUDA(cuCtxPushCurrent, devsys->cuCtx);
+    if (result != VLC_SUCCESS)
+    {
+        picture_Release(dst);
+        picture_Release(src);
+        return NULL;
+    }
+
+    size_t srcY = 0;
+    for (int i_plane = 0; i_plane < dst->i_planes; i_plane++) {
+        plane_t plane = dst->p[i_plane];
+        CUDA_MEMCPY2D cu_cpy = {
+            .srcMemoryType  = CU_MEMORYTYPE_DEVICE,
+            .srcDevice      = srcpic->devidePtr,
+            .srcY           = srcY,
+            .srcPitch       = srcpic->bufferPitch,
+            .dstMemoryType  = CU_MEMORYTYPE_HOST,
+            .dstHost        = plane.p_pixels,
+            .dstPitch       = plane.i_pitch,
+            .WidthInBytes   = __MIN(srcpic->bufferPitch, (unsigned)dst->p[0].i_pitch),
+            .Height         = __MIN(srcpic->bufferHeight, (unsigned)plane.i_visible_lines),
+        };
+        result = CALL_CUDA(cuMemcpy2DAsync, &cu_cpy, 0);
+        if (result != VLC_SUCCESS)
+        {
+            picture_Release(dst);
+            dst = NULL;
+            goto done;
+        }
+        srcY += srcpic->bufferHeight;
+    }
+
+done:
+    CALL_CUDA(cuCtxPopCurrent, NULL);
+    picture_Release(src);
+    return dst;
+}
+
+static int OpenCUDAToCPU( vlc_object_t *p_this )
+{
+    filter_t *p_filter = (filter_t *)p_this;
+
+    if ( !( ( p_filter->fmt_in.video.i_chroma  == VLC_CODEC_NVDEC_OPAQUE &&
+              p_filter->fmt_out.video.i_chroma == VLC_CODEC_NV12 ) ||
+            ( p_filter->fmt_in.video.i_chroma  == VLC_CODEC_NVDEC_OPAQUE_10B &&
+              p_filter->fmt_out.video.i_chroma == VLC_CODEC_P010 ) ||
+            ( p_filter->fmt_in.video.i_chroma  == VLC_CODEC_NVDEC_OPAQUE_16B &&
+              p_filter->fmt_out.video.i_chroma == VLC_CODEC_P016 )
+           ) )
+        return VLC_EGENERIC;
+
+    p_filter->pf_video_filter = FilterCUDAToCPU;
+
+    return VLC_SUCCESS;
+}
diff --git a/modules/hw/nvdec/nvdec.c b/modules/hw/nvdec/nvdec.c
index a55da06faa7..73e36806097 100644
--- a/modules/hw/nvdec/nvdec.c
+++ b/modules/hw/nvdec/nvdec.c
@@ -337,6 +337,7 @@ static struct picture_context_t *NVDecCtxClone(struct picture_context_t *srcctx)
     clonectx->devidePtr = srcpic->devidePtr;
     clonectx->bufferPitch = srcpic->bufferPitch;
     clonectx->bufferHeight = srcpic->bufferHeight;
+    clonectx->nvdecDevice = srcpic->nvdecDevice;
     return &clonectx->ctx;
 }
 
@@ -386,6 +387,8 @@ static int CUDAAPI HandlePictureDisplay(void *p_opaque, CUVIDPARSERDISPINFO *p_d
         picctx->devidePtr = p_sys->outputDevicePtr[pool_idx];
         picctx->bufferPitch = p_sys->outputPitch;
         picctx->bufferHeight = p_sys->decoderHeight;
+        picctx->nvdecDevice.cuCtx = p_sys->cuCtx;
+        picctx->nvdecDevice.cudaFunctions = p_sys->cudaFunctions;
 
         size_t srcY = 0;
         size_t dstY = 0;
diff --git a/modules/hw/nvdec/nvdec_fmt.h b/modules/hw/nvdec/nvdec_fmt.h
index 85728176048..9ac0bb947ac 100644
--- a/modules/hw/nvdec/nvdec_fmt.h
+++ b/modules/hw/nvdec/nvdec_fmt.h
@@ -46,6 +46,9 @@ typedef struct
     CUdeviceptr  devidePtr;
     unsigned int bufferPitch;
     unsigned int bufferHeight;
+
+    // temporary until the filters get it from their environment/video context
+    decoder_device_nvdec_t nvdecDevice;
 } pic_context_nvdec_t;
 
 #endif /* include-guard */
-- 
2.17.1



More information about the vlc-devel mailing list