[vlc-devel] [PATCH] nvdec: removed CPU chroma fallback
quentin.chateau at deepskycorp.com
quentin.chateau at deepskycorp.com
Tue Mar 3 11:46:20 CET 2020
From: Quentin Chateau <quentin.chateau at deepskycorp.com>
A GPU to CPU video filter is available
---
modules/hw/nvdec/nvdec.c | 379 +++++++++++++----------------------
modules/hw/nvdec/nvdec_fmt.h | 9 -
modules/hw/nvdec/nvdec_gl.c | 3 -
3 files changed, 144 insertions(+), 247 deletions(-)
diff --git a/modules/hw/nvdec/nvdec.c b/modules/hw/nvdec/nvdec.c
index e5cda56ef3..72875c5ac7 100644
--- a/modules/hw/nvdec/nvdec.c
+++ b/modules/hw/nvdec/nvdec.c
@@ -113,14 +113,14 @@ static vlc_fourcc_t MapSurfaceChroma(cudaVideoChromaFormat chroma, unsigned bitD
switch (chroma) {
case cudaVideoChromaFormat_420:
if (bitDepth <= 8)
- return VLC_CODEC_NV12;
+ return VLC_CODEC_NVDEC_OPAQUE;
if (bitDepth <= 10)
- return VLC_CODEC_P010;
- return VLC_CODEC_P016;
+ return VLC_CODEC_NVDEC_OPAQUE_10B;
+ return VLC_CODEC_NVDEC_OPAQUE_16B;
case cudaVideoChromaFormat_444:
if (bitDepth <= 8)
- return VLC_CODEC_I444;
- return VLC_CODEC_I444_16L;
+ return VLC_CODEC_NVDEC_OPAQUE_444;
+ return VLC_CODEC_NVDEC_OPAQUE_444_16B;
default:
return 0;
}
@@ -131,19 +131,15 @@ static cudaVideoSurfaceFormat MapSurfaceFmt(int i_vlc_fourcc)
switch (i_vlc_fourcc) {
case VLC_CODEC_NVDEC_OPAQUE_10B:
case VLC_CODEC_NVDEC_OPAQUE_16B:
- case VLC_CODEC_P010:
- case VLC_CODEC_P016:
return cudaVideoSurfaceFormat_P016;
case VLC_CODEC_NVDEC_OPAQUE:
- case VLC_CODEC_NV12:
return cudaVideoSurfaceFormat_NV12;
case VLC_CODEC_NVDEC_OPAQUE_444:
- case VLC_CODEC_I444:
return cudaVideoSurfaceFormat_YUV444;
case VLC_CODEC_NVDEC_OPAQUE_444_16B:
- case VLC_CODEC_I444_16L:
- return cudaVideoSurfaceFormat_YUV444_16Bit;
- default: vlc_assert_unreachable();
+ return cudaVideoSurfaceFormat_YUV444_16Bit;
+ default:
+ vlc_assert_unreachable();
}
}
@@ -151,15 +147,6 @@ static int CUtoFMT(video_format_t *fmt, const CUVIDEOFORMAT *p_format)
{
// bit depth and chroma
unsigned int i_bpp = p_format->bit_depth_luma_minus8 + 8;
- vlc_fourcc_t i_chroma;
- if (is_nvdec_opaque(fmt->i_chroma))
- i_chroma = fmt->i_chroma;
- else
- i_chroma = MapSurfaceChroma(p_format->chroma_format, i_bpp);
- if (i_chroma == 0)
- return VLC_EGENERIC;
-
- fmt->i_chroma = i_chroma;
// use the real padded size when we know it fmt->i_width = p_format->coded_width;
fmt->i_height = p_format->coded_height;
fmt->i_x_offset = p_format->display_area.left;
@@ -179,19 +166,16 @@ static int CUDAAPI HandleVideoSequence(void *p_opaque, CUVIDEOFORMAT *p_format)
nvdec_ctx_t *p_sys = p_dec->p_sys;
int ret;
- if ( is_nvdec_opaque(p_dec->fmt_out.video.i_chroma) )
+ for (size_t i=0; i < ARRAY_SIZE(p_sys->outputDevicePtr); i++)
{
- for (size_t i=0; i < ARRAY_SIZE(p_sys->outputDevicePtr); i++)
- {
- CALL_CUDA_DEC(cuMemFree, p_sys->outputDevicePtr[i]);
- p_sys->outputDevicePtr[i] = 0;
- }
+ CALL_CUDA_DEC(cuMemFree, p_sys->outputDevicePtr[i]);
+ p_sys->outputDevicePtr[i] = 0;
+ }
- if (p_sys->out_pool)
- {
- picture_pool_Release(p_sys->out_pool);
- p_sys->out_pool = NULL;
- }
+ if (p_sys->out_pool)
+ {
+ picture_pool_Release(p_sys->out_pool);
+ p_sys->out_pool = NULL;
}
// update vlc's output format using NVDEC parser's output
@@ -231,75 +215,71 @@ static int CUDAAPI HandleVideoSequence(void *p_opaque, CUVIDEOFORMAT *p_format)
goto error;
// ensure the output surfaces have the same pitch so copies can work properly
- if ( is_nvdec_opaque(p_dec->fmt_out.video.i_chroma) )
+ // get the real decoder pitch
+ CUdeviceptr frameDevicePtr = 0;
+ CUVIDPROCPARAMS params = {
+ .progressive_frame = 1,
+ .top_field_first = 1,
+ };
+ ret = CALL_CUVID( cuvidMapVideoFrame, p_sys->cudecoder, 0, &frameDevicePtr, &p_sys->outputPitch, ¶ms );
+ if (ret != VLC_SUCCESS)
+ goto error;
+ CALL_CUVID(cuvidUnmapVideoFrame, p_sys->cudecoder, frameDevicePtr);
+
+ unsigned int ByteWidth = p_sys->outputPitch;
+ unsigned int Height = p_dec->fmt_out.video.i_height;
+ switch (dparams.OutputFormat)
{
- // get the real decoder pitch
- CUdeviceptr frameDevicePtr = 0;
- CUVIDPROCPARAMS params = {
- .progressive_frame = 1,
- .top_field_first = 1,
- };
- ret = CALL_CUVID( cuvidMapVideoFrame, p_sys->cudecoder, 0, &frameDevicePtr, &p_sys->outputPitch, ¶ms );
- if (ret != VLC_SUCCESS)
- goto error;
- CALL_CUVID(cuvidUnmapVideoFrame, p_sys->cudecoder, frameDevicePtr);
+ case cudaVideoSurfaceFormat_YUV444:
+ case cudaVideoSurfaceFormat_YUV444_16Bit:
+ Height += 2 * Height; // 3 planes
+ break;
+ case cudaVideoSurfaceFormat_NV12:
+ case cudaVideoSurfaceFormat_P016:
+ Height += Height / 2; // U and V at quarter resolution
+ break;
+ default:
+ vlc_assert_unreachable();
+ }
- unsigned int ByteWidth = p_sys->outputPitch;
- unsigned int Height = p_dec->fmt_out.video.i_height;
- switch (dparams.OutputFormat)
+ picture_t *pics[ARRAY_SIZE(p_sys->outputDevicePtr)];
+ for (size_t i=0; i < ARRAY_SIZE(p_sys->outputDevicePtr); i++)
+ {
+ ret = CALL_CUDA_DEC(cuMemAlloc, &p_sys->outputDevicePtr[i], ByteWidth * Height);
+ if (ret != VLC_SUCCESS || p_sys->outputDevicePtr[i] == 0)
+ goto clean_pics;
+ picture_resource_t res = {
+ .p_sys = (void*)(uintptr_t)i,
+ };
+ pics[i] = picture_NewFromResource( &p_dec->fmt_out.video, &res );
+ if (unlikely(pics[i] == NULL))
{
- case cudaVideoSurfaceFormat_YUV444:
- case cudaVideoSurfaceFormat_YUV444_16Bit:
- Height += 2 * Height; // 3 planes
- break;
- case cudaVideoSurfaceFormat_NV12:
- case cudaVideoSurfaceFormat_P016:
- Height += Height / 2; // U and V at quarter resolution
- break;
- default:
- vlc_assert_unreachable();
+ msg_Dbg(p_dec, "failed to get a picture for the buffer");
+ ret = VLC_ENOMEM;
+ goto clean_pics;
}
-
- picture_t *pics[ARRAY_SIZE(p_sys->outputDevicePtr)];
- for (size_t i=0; i < ARRAY_SIZE(p_sys->outputDevicePtr); i++)
- {
- ret = CALL_CUDA_DEC(cuMemAlloc, &p_sys->outputDevicePtr[i], ByteWidth * Height);
- if (ret != VLC_SUCCESS || p_sys->outputDevicePtr[i] == 0)
- goto clean_pics;
- picture_resource_t res = {
- .p_sys = (void*)(uintptr_t)i,
- };
- pics[i] = picture_NewFromResource( &p_dec->fmt_out.video, &res );
- if (unlikely(pics[i] == NULL))
- {
- msg_Dbg(p_dec, "failed to get a picture for the buffer");
- ret = VLC_ENOMEM;
- goto clean_pics;
- }
- continue;
+ continue;
clean_pics:
- if (p_sys->outputDevicePtr[i])
+ if (p_sys->outputDevicePtr[i])
+ {
+ CALL_CUDA_DEC(cuMemFree, p_sys->outputDevicePtr[i]);
+ p_sys->outputDevicePtr[i] = 0;
+ }
+ if (i > 0)
+ {
+ while (i--)
{
+ picture_Release(pics[i]);
CALL_CUDA_DEC(cuMemFree, p_sys->outputDevicePtr[i]);
p_sys->outputDevicePtr[i] = 0;
}
- if (i > 0)
- {
- while (i--)
- {
- picture_Release(pics[i]);
- CALL_CUDA_DEC(cuMemFree, p_sys->outputDevicePtr[i]);
- p_sys->outputDevicePtr[i] = 0;
- }
- }
- break;
}
- if (ret != VLC_SUCCESS)
- goto error;
-
- p_sys->out_pool = picture_pool_New( ARRAY_SIZE(p_sys->outputDevicePtr), pics );
+ break;
}
+ if (ret != VLC_SUCCESS)
+ goto error;
+ p_sys->out_pool = picture_pool_New( ARRAY_SIZE(p_sys->outputDevicePtr), pics );
p_sys->decoderHeight = p_format->coded_height;
CALL_CUDA_DEC(cuCtxPopCurrent, NULL);
@@ -360,140 +340,96 @@ static int CUDAAPI HandlePictureDisplay(void *p_opaque, CUVIDPARSERDISPINFO *p_d
};
int result;
- if ( is_nvdec_opaque(p_dec->fmt_out.video.i_chroma) )
- {
- p_pic = picture_pool_Wait(p_sys->out_pool);
- if (unlikely(p_pic == NULL))
- return 0;
+ p_pic = picture_pool_Wait(p_sys->out_pool);
+ if (unlikely(p_pic == NULL))
+ return 0;
- result = CALL_CUDA_DEC(cuCtxPushCurrent, p_sys->cuCtx);
- if (unlikely(result != VLC_SUCCESS))
- {
- picture_Release(p_pic);
- return 0;
- }
+ result = CALL_CUDA_DEC(cuCtxPushCurrent, p_sys->cuCtx);
+ if (unlikely(result != VLC_SUCCESS))
+ {
+ picture_Release(p_pic);
+ return 0;
+ }
- unsigned int i_pitch;
+ unsigned int i_pitch;
- // Map decoded frame to a device pointer
- result = CALL_CUVID( cuvidMapVideoFrame, p_sys->cudecoder, p_dispinfo->picture_index,
- &frameDevicePtr, &i_pitch, ¶ms );
- if (result != VLC_SUCCESS)
- goto error;
+ // Map decoded frame to a device pointer
+ result = CALL_CUVID( cuvidMapVideoFrame, p_sys->cudecoder, p_dispinfo->picture_index,
+ &frameDevicePtr, &i_pitch, ¶ms );
+ if (result != VLC_SUCCESS)
+ goto error;
- // put a new context in the output picture
- pic_context_nvdec_t *picctx = malloc(sizeof(*picctx));
- if (unlikely(picctx == NULL))
- goto error;
- picctx->ctx = (picture_context_t) {
- NVDecCtxDestroy, NVDecCtxClone,
- p_sys->vctx_out,
- };
- uintptr_t pool_idx = (uintptr_t)p_pic->p_sys;
- picctx->devicePtr = p_sys->outputDevicePtr[pool_idx];
- picctx->bufferPitch = p_sys->outputPitch;
- picctx->bufferHeight = p_sys->decoderHeight;
-
- size_t srcY = 0;
- size_t dstY = 0;
- if (p_pic->format.i_chroma == VLC_CODEC_NVDEC_OPAQUE_444 || p_pic->format.i_chroma == VLC_CODEC_NVDEC_OPAQUE_444_16B)
- {
- for (int i_plane = 0; i_plane < 3; i_plane++) {
- CUDA_MEMCPY2D cu_cpy = {
- .srcMemoryType = CU_MEMORYTYPE_DEVICE,
- .srcDevice = frameDevicePtr,
- .srcY = srcY,
- .srcPitch = i_pitch,
- .dstMemoryType = CU_MEMORYTYPE_DEVICE,
- .dstDevice = picctx->devicePtr,
- .dstPitch = picctx->bufferPitch,
- .dstY = dstY,
- .WidthInBytes = i_pitch,
- .Height = __MIN(picctx->bufferHeight, p_dec->fmt_out.video.i_y_offset + p_dec->fmt_out.video.i_visible_height),
- };
- result = CALL_CUDA_DEC(cuMemcpy2DAsync, &cu_cpy, 0);
- if (unlikely(result != VLC_SUCCESS))
- {
- free(picctx);
- goto error;
- }
- srcY += picctx->bufferHeight;
- dstY += p_sys->decoderHeight;
- }
- }
- else
- {
- for (int i_plane = 0; i_plane < 2; i_plane++) {
- CUDA_MEMCPY2D cu_cpy = {
- .srcMemoryType = CU_MEMORYTYPE_DEVICE,
- .srcDevice = frameDevicePtr,
- .srcY = srcY,
- .srcPitch = i_pitch,
- .dstMemoryType = CU_MEMORYTYPE_DEVICE,
- .dstDevice = picctx->devicePtr,
- .dstPitch = picctx->bufferPitch,
- .dstY = dstY,
- .WidthInBytes = i_pitch,
- .Height = __MIN(picctx->bufferHeight, p_dec->fmt_out.video.i_y_offset + p_dec->fmt_out.video.i_visible_height),
- };
- if (i_plane == 1)
- cu_cpy.Height >>= 1;
- result = CALL_CUDA_DEC(cuMemcpy2DAsync, &cu_cpy, 0);
- if (unlikely(result != VLC_SUCCESS))
- {
- free(picctx);
- goto error;
- }
- srcY += picctx->bufferHeight;
- dstY += p_sys->decoderHeight;
+ // put a new context in the output picture
+ pic_context_nvdec_t *picctx = malloc(sizeof(*picctx));
+ if (unlikely(picctx == NULL))
+ goto error;
+ picctx->ctx = (picture_context_t) {
+ NVDecCtxDestroy, NVDecCtxClone,
+ p_sys->vctx_out,
+ };
+ uintptr_t pool_idx = (uintptr_t)p_pic->p_sys;
+ picctx->devicePtr = p_sys->outputDevicePtr[pool_idx];
+ picctx->bufferPitch = p_sys->outputPitch;
+ picctx->bufferHeight = p_sys->decoderHeight;
+
+ size_t srcY = 0;
+ size_t dstY = 0;
+ if (p_pic->format.i_chroma == VLC_CODEC_NVDEC_OPAQUE_444 || p_pic->format.i_chroma == VLC_CODEC_NVDEC_OPAQUE_444_16B)
+ {
+ for (int i_plane = 0; i_plane < 3; i_plane++) {
+ CUDA_MEMCPY2D cu_cpy = {
+ .srcMemoryType = CU_MEMORYTYPE_DEVICE,
+ .srcDevice = frameDevicePtr,
+ .srcY = srcY,
+ .srcPitch = i_pitch,
+ .dstMemoryType = CU_MEMORYTYPE_DEVICE,
+ .dstDevice = picctx->devicePtr,
+ .dstPitch = picctx->bufferPitch,
+ .dstY = dstY,
+ .WidthInBytes = i_pitch,
+ .Height = __MIN(picctx->bufferHeight, p_dec->fmt_out.video.i_y_offset + p_dec->fmt_out.video.i_visible_height),
+ };
+ result = CALL_CUDA_DEC(cuMemcpy2DAsync, &cu_cpy, 0);
+ if (unlikely(result != VLC_SUCCESS))
+ {
+ free(picctx);
+ goto error;
}
+ srcY += picctx->bufferHeight;
+ dstY += p_sys->decoderHeight;
}
- p_pic->context = &picctx->ctx;
- vlc_video_context_Hold(picctx->ctx.vctx);
}
else
{
- p_pic = decoder_NewPicture(p_dec);
- if (unlikely(p_pic == NULL))
- return 0;
-
- result = CALL_CUDA_DEC(cuCtxPushCurrent, p_sys->cuCtx);
- if (unlikely(result != VLC_SUCCESS))
- {
- picture_Release(p_pic);
- return 0;
- }
-
- unsigned int i_pitch;
-
- // Map decoded frame to a device pointer
- result = CALL_CUVID( cuvidMapVideoFrame, p_sys->cudecoder, p_dispinfo->picture_index,
- &frameDevicePtr, &i_pitch, ¶ms );
- if (result != VLC_SUCCESS)
- goto error;
-
- // Copy decoded frame into a new VLC picture
- size_t srcY = 0;
- for (int i_plane = 0; i_plane < p_pic->i_planes; i_plane++) {
- plane_t plane = p_pic->p[i_plane];
+ for (int i_plane = 0; i_plane < 2; i_plane++) {
CUDA_MEMCPY2D cu_cpy = {
.srcMemoryType = CU_MEMORYTYPE_DEVICE,
.srcDevice = frameDevicePtr,
.srcY = srcY,
.srcPitch = i_pitch,
- .dstMemoryType = CU_MEMORYTYPE_HOST,
- .dstHost = plane.p_pixels,
- .dstPitch = plane.i_pitch,
+ .dstMemoryType = CU_MEMORYTYPE_DEVICE,
+ .dstDevice = picctx->devicePtr,
+ .dstPitch = picctx->bufferPitch,
+ .dstY = dstY,
.WidthInBytes = i_pitch,
- .Height = plane.i_visible_lines,
+ .Height = __MIN(picctx->bufferHeight, p_dec->fmt_out.video.i_y_offset + p_dec->fmt_out.video.i_visible_height),
};
- result = CALL_CUDA_DEC(cuMemcpy2D, &cu_cpy);
- if (result != VLC_SUCCESS)
+ if (i_plane == 1)
+ cu_cpy.Height >>= 1;
+ result = CALL_CUDA_DEC(cuMemcpy2DAsync, &cu_cpy, 0);
+ if (unlikely(result != VLC_SUCCESS))
+ {
+ free(picctx);
goto error;
- srcY += p_sys->decoderHeight;
+ }
+ srcY += picctx->bufferHeight;
+ dstY += p_sys->decoderHeight;
}
}
+ p_pic->context = &picctx->ctx;
+ vlc_video_context_Hold(picctx->ctx.vctx);
+
// Release surface on GPU
result = CALL_CUVID(cuvidUnmapVideoFrame, p_sys->cudecoder, frameDevicePtr);
if (unlikely(result != VLC_SUCCESS))
@@ -926,41 +862,14 @@ static int OpenDecoder(vlc_object_t *p_this)
goto error;
}
- vlc_fourcc_t output_chromas[3];
- size_t chroma_idx = 0;
- if (cudaChroma == cudaVideoChromaFormat_420)
- {
- if (i_depth_luma >= 16)
- output_chromas[chroma_idx++] = VLC_CODEC_NVDEC_OPAQUE_16B;
- else if (i_depth_luma > 8)
- output_chromas[chroma_idx++] = VLC_CODEC_NVDEC_OPAQUE_10B;
- else
- output_chromas[chroma_idx++] = VLC_CODEC_NVDEC_OPAQUE;
- }
- else if (cudaChroma == cudaVideoChromaFormat_444)
- {
- if (i_depth_luma > 8)
- output_chromas[chroma_idx++] = VLC_CODEC_NVDEC_OPAQUE_444_16B;
- else
- output_chromas[chroma_idx++] = VLC_CODEC_NVDEC_OPAQUE_444;
- }
-
- output_chromas[chroma_idx++] = MapSurfaceChroma(cudaChroma, i_depth_luma);
- output_chromas[chroma_idx++] = 0;
-
- for (chroma_idx = 0; output_chromas[chroma_idx] != 0; chroma_idx++)
+ p_dec->fmt_out.i_codec = p_dec->fmt_out.video.i_chroma =
+ MapSurfaceChroma(cudaChroma, i_depth_luma);
+ result = decoder_UpdateVideoOutput(p_dec, p_sys->vctx_out);
+ if (result != VLC_SUCCESS)
{
- p_dec->fmt_out.i_codec = p_dec->fmt_out.video.i_chroma = output_chromas[chroma_idx];
- result = decoder_UpdateVideoOutput(p_dec, p_sys->vctx_out);
- if (result == VLC_SUCCESS)
- {
- msg_Dbg(p_dec, "using chroma %4.4s", (char*)&p_dec->fmt_out.video.i_chroma);
- break;
- }
msg_Warn(p_dec, "Failed to use output chroma %4.4s", (char*)&p_dec->fmt_out.video.i_chroma);
- }
- if (result != VLC_SUCCESS)
goto error;
+ }
int deinterlace_mode = var_InheritInteger(p_dec, "nvdec-deint");
if (deinterlace_mode <= 0)
diff --git a/modules/hw/nvdec/nvdec_fmt.h b/modules/hw/nvdec/nvdec_fmt.h
index 25784cca6d..d84672d8fb 100644
--- a/modules/hw/nvdec/nvdec_fmt.h
+++ b/modules/hw/nvdec/nvdec_fmt.h
@@ -51,15 +51,6 @@ static inline int CudaCheckErr(vlc_object_t *obj, CudaFunctions *cudaFunctions,
return VLC_SUCCESS;
}
-static inline bool is_nvdec_opaque(vlc_fourcc_t fourcc)
-{
- return fourcc == VLC_CODEC_NVDEC_OPAQUE ||
- fourcc == VLC_CODEC_NVDEC_OPAQUE_10B ||
- fourcc == VLC_CODEC_NVDEC_OPAQUE_16B ||
- fourcc == VLC_CODEC_NVDEC_OPAQUE_444 ||
- fourcc == VLC_CODEC_NVDEC_OPAQUE_444_16B;
-}
-
/* for VLC_CODEC_NVDEC_OPAQUE / VLC_CODEC_NVDEC_OPAQUE_16B */
typedef struct
{
diff --git a/modules/hw/nvdec/nvdec_gl.c b/modules/hw/nvdec/nvdec_gl.c
index 137e730028..62a503a8e8 100644
--- a/modules/hw/nvdec/nvdec_gl.c
+++ b/modules/hw/nvdec/nvdec_gl.c
@@ -156,9 +156,6 @@ static void Close(vlc_object_t *obj)
static int Open(vlc_object_t *obj)
{
struct vlc_gl_interop *interop = (void *) obj;
- if (!is_nvdec_opaque(interop->fmt.i_chroma))
- return VLC_EGENERIC;
-
vlc_decoder_device *device = vlc_video_context_HoldDevice(interop->vctx);
if (device == NULL || device->type != VLC_DECODER_DEVICE_NVDEC)
return VLC_EGENERIC;
--
2.17.1
More information about the vlc-devel
mailing list