[vlc-commits] nvdec: add a GPU to CPU converter
Steve Lhomme
git at videolan.org
Wed Sep 18 09:05:27 CEST 2019
vlc | branch: master | Steve Lhomme <robux4 at ycbcr.xyz> | Mon Sep 16 12:13:23 2019 +0200| [9939eab7ce17656ae0f906e49b865b3771664bf2] | committer: Steve Lhomme
nvdec: add a GPU to CPU converter
The decoder device info are temporarily added to each picture until we have
proper push.
> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=9939eab7ce17656ae0f906e49b865b3771664bf2
---
modules/hw/nvdec/Makefile.am | 7 +++
modules/hw/nvdec/chroma.c | 132 +++++++++++++++++++++++++++++++++++++++++++
modules/hw/nvdec/nvdec.c | 3 +
modules/hw/nvdec/nvdec_fmt.h | 3 +
4 files changed, 145 insertions(+)
diff --git a/modules/hw/nvdec/Makefile.am b/modules/hw/nvdec/Makefile.am
index 621f876669..17bddf8f18 100644
--- a/modules/hw/nvdec/Makefile.am
+++ b/modules/hw/nvdec/Makefile.am
@@ -10,3 +10,10 @@ libnvdec_plugin_la_LIBADD = $(LIBDL)
if HAVE_NVDEC
codec_LTLIBRARIES += libnvdec_plugin.la
endif
+
+libnvdec_chroma_plugin_la_SOURCES = hw/nvdec/chroma.c hw/nvdec/nvdec_fmt.h
+libnvdec_chroma_plugin_la_LDFLAGS = $(AM_LDFLAGS) -rpath '$(nvdecdir)'
+if HAVE_NVDEC
+nvdec_LTLIBRARIES = libnvdec_chroma_plugin.la
+endif
+
diff --git a/modules/hw/nvdec/chroma.c b/modules/hw/nvdec/chroma.c
new file mode 100644
index 0000000000..b963d6602b
--- /dev/null
+++ b/modules/hw/nvdec/chroma.c
@@ -0,0 +1,132 @@
+/*****************************************************************************
+ * chroma.c: NVDEC/CUDA chroma conversion filter
+ *****************************************************************************
+ * Copyright (C) 2019 VLC authors and VideoLAN
+ *
+ * Authors: Steve Lhomme <robux4 at videolabs.io>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <vlc_common.h>
+#include <vlc_plugin.h>
+#include <vlc_filter.h>
+#include <vlc_codec.h>
+
+#include "nvdec_fmt.h"
+
+static int OpenCUDAToCPU( vlc_object_t * );
+
+vlc_module_begin()
+ set_shortname(N_("CUDA converter"))
+ set_description(N_("CUDA/NVDEC Chroma Converter filter"))
+ set_category(CAT_VIDEO)
+ set_subcategory(SUBCAT_VIDEO_VFILTER)
+ set_capability("video converter", 10)
+ set_callbacks(OpenCUDAToCPU, NULL)
+vlc_module_end()
+
+typedef struct
+{
+ vlc_decoder_device *device;
+} nvdec_filter_sys_t;
+
+static inline int CudaCall(filter_t *p_filter, decoder_device_nvdec_t *devsys, CUresult result, const char *psz_func)
+{
+ if (unlikely(result != CUDA_SUCCESS)) {
+ const char *psz_err, *psz_err_str;
+ devsys->cudaFunctions->cuGetErrorName(result, &psz_err);
+ devsys->cudaFunctions->cuGetErrorString(result, &psz_err_str);
+ msg_Err(p_filter, "%s failed: %s (%s)", psz_func, psz_err_str, psz_err);
+ return VLC_EGENERIC;
+ }
+ return VLC_SUCCESS;
+}
+
+#define CALL_CUDA(func, ...) CudaCall(p_filter, devsys, devsys->cudaFunctions->func(__VA_ARGS__), #func)
+
+
+static picture_t * FilterCUDAToCPU( filter_t *p_filter, picture_t *src )
+{
+ picture_t *dst = filter_NewPicture( p_filter );
+ if (unlikely(dst == NULL))
+ {
+ picture_Release(src);
+ return NULL;
+ }
+
+ pic_context_nvdec_t *srcpic = container_of(src->context, pic_context_nvdec_t, ctx);
+ decoder_device_nvdec_t *devsys = &srcpic->nvdecDevice;
+
+ int result;
+ result = CALL_CUDA(cuCtxPushCurrent, devsys->cuCtx);
+ if (result != VLC_SUCCESS)
+ {
+ picture_Release(dst);
+ picture_Release(src);
+ return NULL;
+ }
+
+ size_t srcY = 0;
+ for (int i_plane = 0; i_plane < dst->i_planes; i_plane++) {
+ plane_t plane = dst->p[i_plane];
+ CUDA_MEMCPY2D cu_cpy = {
+ .srcMemoryType = CU_MEMORYTYPE_DEVICE,
+ .srcDevice = srcpic->devidePtr,
+ .srcY = srcY,
+ .srcPitch = srcpic->bufferPitch,
+ .dstMemoryType = CU_MEMORYTYPE_HOST,
+ .dstHost = plane.p_pixels,
+ .dstPitch = plane.i_pitch,
+ .WidthInBytes = __MIN(srcpic->bufferPitch, (unsigned)dst->p[0].i_pitch),
+ .Height = __MIN(srcpic->bufferHeight, (unsigned)plane.i_visible_lines),
+ };
+ result = CALL_CUDA(cuMemcpy2DAsync, &cu_cpy, 0);
+ if (result != VLC_SUCCESS)
+ {
+ picture_Release(dst);
+ dst = NULL;
+ goto done;
+ }
+ srcY += srcpic->bufferHeight;
+ }
+
+done:
+ CALL_CUDA(cuCtxPopCurrent, NULL);
+ picture_Release(src);
+ return dst;
+}
+
+static int OpenCUDAToCPU( vlc_object_t *p_this )
+{
+ filter_t *p_filter = (filter_t *)p_this;
+
+ if ( !( ( p_filter->fmt_in.video.i_chroma == VLC_CODEC_NVDEC_OPAQUE &&
+ p_filter->fmt_out.video.i_chroma == VLC_CODEC_NV12 ) ||
+ ( p_filter->fmt_in.video.i_chroma == VLC_CODEC_NVDEC_OPAQUE_10B &&
+ p_filter->fmt_out.video.i_chroma == VLC_CODEC_P010 ) ||
+ ( p_filter->fmt_in.video.i_chroma == VLC_CODEC_NVDEC_OPAQUE_16B &&
+ p_filter->fmt_out.video.i_chroma == VLC_CODEC_P016 )
+ ) )
+ return VLC_EGENERIC;
+
+ p_filter->pf_video_filter = FilterCUDAToCPU;
+
+ return VLC_SUCCESS;
+}
diff --git a/modules/hw/nvdec/nvdec.c b/modules/hw/nvdec/nvdec.c
index f6ac3ccbc4..14651e3a5e 100644
--- a/modules/hw/nvdec/nvdec.c
+++ b/modules/hw/nvdec/nvdec.c
@@ -342,6 +342,7 @@ static struct picture_context_t *NVDecCtxClone(struct picture_context_t *srcctx)
clonectx->devidePtr = srcpic->devidePtr;
clonectx->bufferPitch = srcpic->bufferPitch;
clonectx->bufferHeight = srcpic->bufferHeight;
+ clonectx->nvdecDevice = srcpic->nvdecDevice;
return &clonectx->ctx;
}
@@ -391,6 +392,8 @@ static int CUDAAPI HandlePictureDisplay(void *p_opaque, CUVIDPARSERDISPINFO *p_d
picctx->devidePtr = p_sys->outputDevicePtr[pool_idx];
picctx->bufferPitch = p_sys->outputPitch;
picctx->bufferHeight = p_sys->decoderHeight;
+ picctx->nvdecDevice.cuCtx = p_sys->cuCtx;
+ picctx->nvdecDevice.cudaFunctions = p_sys->cudaFunctions;
size_t srcY = 0;
size_t dstY = 0;
diff --git a/modules/hw/nvdec/nvdec_fmt.h b/modules/hw/nvdec/nvdec_fmt.h
index 8572817604..9ac0bb947a 100644
--- a/modules/hw/nvdec/nvdec_fmt.h
+++ b/modules/hw/nvdec/nvdec_fmt.h
@@ -46,6 +46,9 @@ typedef struct
CUdeviceptr devidePtr;
unsigned int bufferPitch;
unsigned int bufferHeight;
+
+ // temporary until the filters get it from their environment/video context
+ decoder_device_nvdec_t nvdecDevice;
} pic_context_nvdec_t;
#endif /* include-guard */
More information about the vlc-commits
mailing list