[vlc-devel] [PATCH 4/6] nvdec: output a custom NVDEC opaque format
Steve Lhomme
robux4 at ycbcr.xyz
Thu Sep 12 14:44:20 CEST 2019
---
include/vlc_codec.h | 2 +
modules/codec/Makefile.am | 3 +-
modules/codec/nvdec.c | 255 +++++++++++++++++++++++++++----
modules/video_chroma/nvdec_fmt.h | 50 ++++++
4 files changed, 277 insertions(+), 33 deletions(-)
create mode 100644 modules/video_chroma/nvdec_fmt.h
diff --git a/include/vlc_codec.h b/include/vlc_codec.h
index 8ecdcadb396..55e24cb9648 100644
--- a/include/vlc_codec.h
+++ b/include/vlc_codec.h
@@ -491,6 +491,7 @@ enum vlc_decoder_device_type
VLC_DECODER_DEVICE_DXVA2,
VLC_DECODER_DEVICE_D3D11VA,
VLC_DECODER_DEVICE_AWINDOW,
+ VLC_DECODER_DEVICE_NVDEC,
VLC_DECODER_DEVICE_MMAL,
};
@@ -525,6 +526,7 @@ typedef struct vlc_decoder_device
* DXVA2: IDirect3DDevice9*
* D3D11VA: ID3D11DeviceContext*
* AWindow: android AWindowHandler*
+ * NVDEC: decoder_device_nvdec_t*
* MMAL: MMAL_PORT_T*
*/
void *opaque;
diff --git a/modules/codec/Makefile.am b/modules/codec/Makefile.am
index a6a04b0d400..df45ba76bfb 100644
--- a/modules/codec/Makefile.am
+++ b/modules/codec/Makefile.am
@@ -442,7 +442,8 @@ libnvdec_plugin_la_SOURCES = \
codec/nvdec.c codec/hxxx_helper.c codec/hxxx_helper.h \
packetizer/hxxx_nal.h packetizer/hxxx_nal.c \
packetizer/h264_nal.c packetizer/h264_nal.h \
- packetizer/hevc_nal.c packetizer/hevc_nal.h
+ packetizer/hevc_nal.c packetizer/hevc_nal.h \
+ video_chroma/nvdec_fmt.h
if HAVE_NVDEC
codec_LTLIBRARIES += libnvdec_plugin.la
endif
diff --git a/modules/codec/nvdec.c b/modules/codec/nvdec.c
index ee65f6f1286..3fbcd3ddbdb 100644
--- a/modules/codec/nvdec.c
+++ b/modules/codec/nvdec.c
@@ -28,15 +28,19 @@
#include <vlc_plugin.h>
#include <vlc_codec.h>
#include <vlc_messages.h>
+#include <vlc_picture_pool.h>
#define FFNV_LOG_FUNC(logctx, msg, ...) msg_Err((vlc_object_t*)logctx, msg, __VA_ARGS__)
#define FFNV_DEBUG_LOG_FUNC(logctx, msg, ...) msg_Dbg((vlc_object_t*)logctx, msg, __VA_ARGS__)
#include <ffnvcodec/dynlink_loader.h>
#include "hxxx_helper.h"
+#include "../video_chroma/nvdec_fmt.h"
#define MAX_HXXX_SURFACES (16 + 1)
#define NVDEC_DISPLAY_SURFACES 1
+#define MAX_POOL_SIZE 4 // number of in-flight buffers, if more are needed the decoder waits
+
#define OUTPUT_WIDTH_ALIGN 256
@@ -61,6 +65,10 @@ typedef struct nvdec_ctx {
size_t decoderHeight;
unsigned int outputPitch;
+ picture_pool_t *out_pool;
+
+ CUdeviceptr outputDevicePtr[MAX_POOL_SIZE];
+
} nvdec_ctx_t;
static int OpenDecoder(vlc_object_t *p_this);
@@ -111,9 +119,12 @@ static inline int CudaCall(decoder_t *p_dec, CUresult result, const char *psz_fu
static cudaVideoSurfaceFormat MapSurfaceFmt(int i_vlc_fourcc)
{
switch (i_vlc_fourcc) {
+ case VLC_CODEC_NVDEC_OPAQUE_10B:
+ case VLC_CODEC_NVDEC_OPAQUE_16B:
case VLC_CODEC_P010:
case VLC_CODEC_P016:
return cudaVideoSurfaceFormat_P016;
+ case VLC_CODEC_NVDEC_OPAQUE:
case VLC_CODEC_NV12:
return cudaVideoSurfaceFormat_NV12;
// case VLC_CODEC_I444:
@@ -147,7 +158,10 @@ static int CUtoFMT(video_format_t *fmt, const CUVIDEOFORMAT *p_format)
// bit depth and chroma
unsigned int i_bpp = p_format->bit_depth_luma_minus8 + 8;
vlc_fourcc_t i_chroma;
- i_chroma = MapSurfaceChroma(p_format->chroma_format, i_bpp);
+ if (is_nvdec_opaque(fmt->i_chroma))
+ i_chroma = fmt->i_chroma;
+ else
+ i_chroma = MapSurfaceChroma(p_format->chroma_format, i_bpp);
if (i_chroma == 0)
return VLC_EGENERIC;
@@ -171,12 +185,40 @@ static int CUDAAPI HandleVideoSequence(void *p_opaque, CUVIDEOFORMAT *p_format)
nvdec_ctx_t *p_sys = p_dec->p_sys;
int ret;
+ if ( is_nvdec_opaque(p_dec->fmt_out.video.i_chroma) )
+ {
+ for (size_t i=0; i < ARRAY_SIZE(p_sys->outputDevicePtr); i++)
+ {
+ CALL_CUDA(cuMemFree, p_sys->outputDevicePtr[i]);
+ p_sys->outputDevicePtr[i] = 0;
+ }
+
+ if (p_sys->out_pool)
+ {
+ picture_pool_Release(p_sys->out_pool);
+ p_sys->out_pool = NULL;
+ }
+ }
+
// update vlc's output format using NVDEC parser's output
ret = CUtoFMT(&p_dec->fmt_out.video, p_format);
if (ret != VLC_SUCCESS)
+ {
+ msg_Dbg(p_dec, "unsupported Chroma %d + BitDepth %d", p_format->chroma_format, p_format->bit_depth_luma_minus8 + 8);
goto error;
+ }
p_dec->fmt_out.i_codec = p_dec->fmt_out.video.i_chroma;
+ ret = CALL_CUDA(cuCtxPushCurrent, p_sys->cuCtx);
+ if (ret != VLC_SUCCESS)
+ goto error;
+
+ if (p_sys->cudecoder)
+ {
+ CALL_CUVID(cuvidDestroyDecoder, p_sys->cudecoder);
+ p_sys->cudecoder = NULL;
+ }
+
CUVIDDECODECREATEINFO dparams = {
.ulWidth = p_dec->fmt_out.video.i_width,
.ulHeight = p_dec->fmt_out.video.i_height,
@@ -191,23 +233,71 @@ static int CUDAAPI HandleVideoSequence(void *p_opaque, CUVIDEOFORMAT *p_format)
.DeinterlaceMode = p_sys->deintMode
};
ret = CALL_CUVID(cuvidCreateDecoder, &p_sys->cudecoder, &dparams);
-
- ret = CALL_CUDA(cuCtxPushCurrent, p_sys->cuCtx);
if (ret != VLC_SUCCESS)
goto error;
+ if ( is_nvdec_opaque(p_dec->fmt_out.video.i_chroma) )
+ {
+ unsigned int ByteWidth = p_dec->fmt_out.video.i_width;
+ if ( p_dec->fmt_out.video.i_chroma != VLC_CODEC_NVDEC_OPAQUE)
+ // 10 bits of more use double width in bytes
+ ByteWidth *= 2;
+ unsigned int Height = p_dec->fmt_out.video.i_height;
+ Height += (Height + 1) / 2;
+
+ picture_t *pics[ARRAY_SIZE(p_sys->outputDevicePtr)];
+ for (size_t i=0; i < ARRAY_SIZE(p_sys->outputDevicePtr); i++)
+ {
+ size_t pitch;
+ ret = CALL_CUDA(cuMemAllocPitch, &p_sys->outputDevicePtr[i], &pitch, ByteWidth, Height, 16);
+ if (ret != VLC_SUCCESS || p_sys->outputDevicePtr[i] == 0)
+ goto clean_pics;
+ p_sys->outputPitch = pitch;
+ picture_resource_t res = {
+ .p_sys = (void*)(uintptr_t)i,
+ };
+ pics[i] = picture_NewFromResource( &p_dec->fmt_out.video, &res );
+ if (unlikely(pics[i] == NULL))
+ {
+ msg_Dbg(p_dec, "failed to get a picture for the buffer");
+ ret = VLC_ENOMEM;
+ goto clean_pics;
+ }
+ continue;
+clean_pics:
+ if (p_sys->outputDevicePtr[i])
+ {
+ CALL_CUDA(cuMemFree, p_sys->outputDevicePtr[i]);
+ p_sys->outputDevicePtr[i] = 0;
+ }
+ if (i > 0)
+ {
+ while (i--)
+ {
+ picture_Release(pics[i]);
+ CALL_CUDA(cuMemFree, p_sys->outputDevicePtr[i]);
+ p_sys->outputDevicePtr[i] = 0;
+ }
+ }
+ break;
+ }
+ if (ret != VLC_SUCCESS)
+ goto error;
+
+ p_sys->out_pool = picture_pool_New( ARRAY_SIZE(p_sys->outputDevicePtr), pics );
+ }
+ else
+ {
CUdeviceptr frameDevicePtr = 0;
CUVIDPROCPARAMS params = {
.progressive_frame = 1,
.top_field_first = 1,
- };
+ };
ret = CALL_CUVID( cuvidMapVideoFrame, p_sys->cudecoder, 0, &frameDevicePtr, &p_sys->outputPitch, ¶ms );
if (ret != VLC_SUCCESS)
goto error;
CALL_CUVID(cuvidUnmapVideoFrame, p_sys->cudecoder, frameDevicePtr);
-
- if (p_sys->cudecoder)
- CALL_CUVID(cuvidDestroyDecoder, p_sys->cudecoder);
+ }
p_sys->decoderHeight = p_format->coded_height;
@@ -215,7 +305,8 @@ static int CUDAAPI HandleVideoSequence(void *p_opaque, CUVIDEOFORMAT *p_format)
// ensure the output surfaces have the same pitch so copies can work properly
p_dec->fmt_out.video.i_width = p_sys->outputPitch;
- if ( p_dec->fmt_out.video.i_chroma != VLC_CODEC_NV12 )
+ if ( p_dec->fmt_out.video.i_chroma != VLC_CODEC_NV12 &&
+ p_dec->fmt_out.video.i_chroma != VLC_CODEC_NVDEC_OPAQUE )
// 10 bits of more use double width in bytes
p_dec->fmt_out.video.i_width >>= 1;
@@ -242,30 +333,114 @@ static int CUDAAPI HandlePictureDecode(void *p_opaque, CUVIDPICPARAMS *p_picpara
return (ret == VLC_SUCCESS);
}
-static int CUDAAPI HandlePictureDisplay(void *p_opaque, CUVIDPARSERDISPINFO *p_dispinfo)
+static void NVDecCtxDestroy(struct picture_context_t *picctx)
{
- decoder_t *p_dec = (decoder_t *) p_opaque;
- nvdec_ctx_t *p_sys = p_dec->p_sys;
+ pic_context_nvdec_t *srcpic = container_of(picctx, pic_context_nvdec_t, ctx);
+ free(srcpic);
+}
- picture_t * p_pic = decoder_NewPicture(p_dec);
- if (unlikely(p_pic == NULL))
- return 0;
+static struct picture_context_t *NVDecCtxClone(struct picture_context_t *srcctx)
+{
+ pic_context_nvdec_t *clonectx = malloc(sizeof(*clonectx));
+ if (unlikely(clonectx == NULL))
+ return NULL;
+ pic_context_nvdec_t *srcpic = container_of(srcctx, pic_context_nvdec_t, ctx);
- int result = CALL_CUDA(cuCtxPushCurrent, p_sys->cuCtx);
- if (unlikely(result != VLC_SUCCESS))
- {
- picture_Release(p_pic);
- return 0;
- }
+ clonectx->ctx.destroy = NVDecCtxDestroy;
+ clonectx->ctx.copy = NVDecCtxClone;
+ clonectx->devidePtr = srcpic->devidePtr;
+ clonectx->bufferPitch = srcpic->bufferPitch;
+ return &clonectx->ctx;
+}
+
+static int CUDAAPI HandlePictureDisplay(void *p_opaque, CUVIDPARSERDISPINFO *p_dispinfo)
+{
+ decoder_t *p_dec = (decoder_t *) p_opaque;
+ nvdec_ctx_t *p_sys = p_dec->p_sys;
+ picture_t *p_pic = NULL;
CUdeviceptr frameDevicePtr = 0;
- unsigned int i_pitch;
CUVIDPROCPARAMS params = {
.progressive_frame = p_sys->deintMode == cudaVideoDeinterlaceMode_Weave ? 1 : p_dispinfo->progressive_frame,
.top_field_first = p_dispinfo->top_field_first,
.second_field = p_dispinfo->repeat_first_field + 1,
.unpaired_field = p_dispinfo->repeat_first_field < 0,
};
+ int result;
+
+ if ( is_nvdec_opaque(p_dec->fmt_out.video.i_chroma) )
+ {
+ p_pic = picture_pool_Wait(p_sys->out_pool);
+ if (unlikely(p_pic == NULL))
+ return 0;
+
+ result = CALL_CUDA(cuCtxPushCurrent, p_sys->cuCtx);
+ if (unlikely(result != VLC_SUCCESS))
+ {
+ picture_Release(p_pic);
+ return 0;
+ }
+
+ unsigned int i_pitch;
+
+ // Map decoded frame to a device pointer
+ result = CALL_CUVID( cuvidMapVideoFrame, p_sys->cudecoder, p_dispinfo->picture_index,
+ &frameDevicePtr, &i_pitch, ¶ms );
+ if (result != VLC_SUCCESS)
+ goto error;
+
+ // put a new context in the output picture
+ pic_context_nvdec_t *picctx = malloc(sizeof(*picctx));
+ if (unlikely(picctx == NULL))
+ goto error;
+ picctx->ctx.destroy = NVDecCtxDestroy;
+ picctx->ctx.copy = NVDecCtxClone;
+ uintptr_t pool_idx = (uintptr_t)p_pic->p_sys;
+ picctx->devidePtr = p_sys->outputDevicePtr[pool_idx];
+ picctx->bufferPitch = p_sys->outputPitch;
+
+ size_t srcY = 0;
+ size_t dstY = 0;
+ for (int i_plane = 0; i_plane < 2; i_plane++) {
+ CUDA_MEMCPY2D cu_cpy = {
+ .srcMemoryType = CU_MEMORYTYPE_DEVICE,
+ .srcDevice = frameDevicePtr,
+ .srcY = srcY,
+ .srcPitch = i_pitch,
+ .dstMemoryType = CU_MEMORYTYPE_DEVICE,
+ .dstDevice = picctx->devidePtr,
+ .dstPitch = picctx->bufferPitch,
+ .dstY = dstY,
+ .WidthInBytes = i_pitch,
+ .Height = __MIN(p_sys->decoderHeight, p_dec->fmt_out.video.i_y_offset + p_dec->fmt_out.video.i_visible_height),
+ };
+ if (i_plane == 1)
+ cu_cpy.Height >>= 1;
+ result = CALL_CUDA(cuMemcpy2DAsync, &cu_cpy, 0);
+ if (unlikely(result != VLC_SUCCESS))
+ {
+ free(picctx);
+ goto error;
+ }
+ srcY += p_sys->decoderHeight;
+ dstY += p_dec->fmt_out.video.i_y_offset + p_dec->fmt_out.video.i_visible_height;
+ }
+ p_pic->context = &picctx->ctx;
+ }
+ else
+ {
+ p_pic = decoder_NewPicture(p_dec);
+ if (unlikely(p_pic == NULL))
+ return 0;
+
+ result = CALL_CUDA(cuCtxPushCurrent, p_sys->cuCtx);
+ if (unlikely(result != VLC_SUCCESS))
+ {
+ picture_Release(p_pic);
+ return 0;
+ }
+
+ unsigned int i_pitch;
// Map decoded frame to a device pointer
result = CALL_CUVID( cuvidMapVideoFrame, p_sys->cudecoder, p_dispinfo->picture_index,
@@ -293,10 +468,11 @@ static int CUDAAPI HandlePictureDisplay(void *p_opaque, CUVIDPARSERDISPINFO *p_d
goto error;
srcY += p_sys->decoderHeight;
}
+ }
// Release surface on GPU
result = CALL_CUVID(cuvidUnmapVideoFrame, p_sys->cudecoder, frameDevicePtr);
- if (result != VLC_SUCCESS)
+ if (unlikely(result != VLC_SUCCESS))
goto error;
CALL_CUDA(cuCtxPopCurrent, NULL);
@@ -319,8 +495,11 @@ static int CUDAAPI HandlePictureDisplay(void *p_opaque, CUVIDPARSERDISPINFO *p_d
return 1;
error:
+ if (frameDevicePtr)
+ CALL_CUVID(cuvidUnmapVideoFrame, p_sys->cudecoder, frameDevicePtr);
CALL_CUDA(cuCtxPopCurrent, NULL);
- picture_Release(p_pic);
+ if (p_pic)
+ picture_Release(p_pic);
return 0;
}
@@ -626,15 +805,12 @@ static int OpenDecoder(vlc_object_t *p_this)
p_dec->fmt_out.video.i_visible_height = i_vh;
}
- if(!p_dec->fmt_in.video.i_sar_num || !p_dec->fmt_in.video.i_sar_den)
+ int i_sar_num, i_sar_den;
+ if (VLC_SUCCESS ==
+ hxxx_helper_get_current_sar(&p_sys->hh, &i_sar_num, &i_sar_den))
{
- int i_sar_num, i_sar_den;
- if (VLC_SUCCESS ==
- hxxx_helper_get_current_sar(&p_sys->hh, &i_sar_num, &i_sar_den))
- {
- p_dec->fmt_out.video.i_sar_num = i_sar_num;
- p_dec->fmt_out.video.i_sar_den = i_sar_den;
- }
+ p_dec->fmt_out.video.i_sar_num = i_sar_num;
+ p_dec->fmt_out.video.i_sar_den = i_sar_den;
}
}
else if (p_dec->fmt_in.i_codec == VLC_CODEC_VP9)
@@ -690,8 +866,18 @@ static int OpenDecoder(vlc_object_t *p_this)
goto error;
}
- vlc_fourcc_t output_chromas[2];
+ vlc_fourcc_t output_chromas[3];
size_t chroma_idx = 0;
+ if (cudaChroma == cudaVideoChromaFormat_420)
+ {
+ if (i_depth_luma >= 16)
+ output_chromas[chroma_idx++] = VLC_CODEC_NVDEC_OPAQUE_16B;
+ else if (i_depth_luma >= 10)
+ output_chromas[chroma_idx++] = VLC_CODEC_NVDEC_OPAQUE_10B;
+ else
+ output_chromas[chroma_idx++] = VLC_CODEC_NVDEC_OPAQUE;
+ }
+
output_chromas[chroma_idx++] = MapSurfaceChroma(cudaChroma, i_depth_luma);
output_chromas[chroma_idx++] = 0;
@@ -735,6 +921,11 @@ static void CloseDecoder(vlc_object_t *p_this)
nvdec_ctx_t *p_sys = p_dec->p_sys;
CALL_CUDA(cuCtxPushCurrent, p_sys->cuCtx);
CALL_CUDA(cuCtxPopCurrent, NULL);
+
+ for (size_t i=0; i < ARRAY_SIZE(p_sys->outputDevicePtr); i++)
+ CALL_CUDA(cuMemFree, p_sys->outputDevicePtr[i]);
+ if (p_sys->out_pool)
+ picture_pool_Release(p_sys->out_pool);
if (p_sys->cudecoder)
CALL_CUVID(cuvidDestroyDecoder, p_sys->cudecoder);
if (p_sys->cuparser)
diff --git a/modules/video_chroma/nvdec_fmt.h b/modules/video_chroma/nvdec_fmt.h
new file mode 100644
index 00000000000..5ba0d8d71f7
--- /dev/null
+++ b/modules/video_chroma/nvdec_fmt.h
@@ -0,0 +1,50 @@
+/*****************************************************************************
+ * nvdec_fmt.h : NVDEC common code
+ *****************************************************************************
+ * Copyright © 2019 VLC authors, VideoLAN and VideoLabs
+ *
+ * Authors: Steve Lhomme <robux4 at videolabs.io>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+#ifndef VLC_VIDEOCHROMA_NVDEC_FMT_H_
+#define VLC_VIDEOCHROMA_NVDEC_FMT_H_
+
+#include <ffnvcodec/dynlink_loader.h>
+
+typedef struct {
+
+ CudaFunctions *cudaFunctions;
+ CUcontext cuCtx;
+
+} decoder_device_nvdec_t;
+
+static inline bool is_nvdec_opaque(vlc_fourcc_t fourcc)
+{
+ return fourcc == VLC_CODEC_NVDEC_OPAQUE ||
+ fourcc == VLC_CODEC_NVDEC_OPAQUE_10B ||
+ fourcc == VLC_CODEC_NVDEC_OPAQUE_16B;
+}
+
+/* for VLC_CODEC_NVDEC_OPAQUE / VLC_CODEC_NVDEC_OPAQUE_16B */
+typedef struct
+{
+ picture_context_t ctx;
+ CUdeviceptr devidePtr;
+ unsigned int bufferPitch;
+} pic_context_nvdec_t;
+
+#endif /* include-guard */
--
2.17.1
More information about the vlc-devel
mailing list