[vlc-devel] [PATCH 2/3] nvdec: simplify GPU-GPU copy
quentin.chateau at deepskycorp.com
quentin.chateau at deepskycorp.com
Tue Mar 24 17:38:39 CET 2020
From: Quentin Chateau <quentin.chateau at deepskycorp.com>
Store the actual bufferHeight in the picture context
and copy perform the GPU-GPU copy as a single call
to cuMemcpy since neither buffer use planes.
---
modules/hw/nvdec/nvdec.c | 50 +++++-----------------------------------
1 file changed, 6 insertions(+), 44 deletions(-)
diff --git a/modules/hw/nvdec/nvdec.c b/modules/hw/nvdec/nvdec.c
index cff5a263d0..34f20c0a35 100644
--- a/modules/hw/nvdec/nvdec.c
+++ b/modules/hw/nvdec/nvdec.c
@@ -94,10 +94,10 @@ typedef struct nvdec_ctx {
cudaVideoDeinterlaceMode deintMode;
// NVDEC doesn't stop even if HandleVideoSequence fails
bool b_nvparser_success;
- size_t decoderHeight;
CUdeviceptr outputDevicePtr[MAX_POOL_SIZE];
unsigned int outputPitch;
+ unsigned int outputHeight;
picture_pool_t *out_pool;
vlc_video_context *vctx_out;
@@ -258,6 +258,7 @@ static int CUDAAPI HandleVideoSequence(void *p_opaque, CUVIDEOFORMAT *p_format)
default:
vlc_assert_unreachable();
}
+ p_sys->outputHeight = Height;
picture_t *pics[ARRAY_SIZE(p_sys->outputDevicePtr)];
for (size_t i=0; i < ARRAY_SIZE(p_sys->outputDevicePtr); i++)
@@ -299,8 +300,6 @@ clean_pics:
p_sys->out_pool = picture_pool_New( ARRAY_SIZE(p_sys->outputDevicePtr), pics );
}
- p_sys->decoderHeight = p_format->coded_height;
-
CALL_CUDA_DEC(cuCtxPopCurrent, NULL);
ret = decoder_UpdateVideoOutput(p_dec, p_sys->vctx_out);
@@ -394,62 +393,25 @@ static int CUDAAPI HandlePictureDisplay(void *p_opaque, CUVIDPARSERDISPINFO *p_d
uintptr_t pool_idx = (uintptr_t)p_pic->p_sys;
picctx->devicePtr = p_sys->outputDevicePtr[pool_idx];
picctx->bufferPitch = p_sys->outputPitch;
- picctx->bufferHeight = p_sys->decoderHeight;
+ picctx->bufferHeight = p_sys->outputHeight;
- size_t srcY = 0;
- size_t dstY = 0;
- if (p_pic->format.i_chroma == VLC_CODEC_NVDEC_OPAQUE_444 || p_pic->format.i_chroma == VLC_CODEC_NVDEC_OPAQUE_444_16B)
- {
- for (int i_plane = 0; i_plane < 3; i_plane++) {
- CUDA_MEMCPY2D cu_cpy = {
- .srcMemoryType = CU_MEMORYTYPE_DEVICE,
- .srcDevice = frameDevicePtr,
- .srcY = srcY,
- .srcPitch = i_pitch,
- .dstMemoryType = CU_MEMORYTYPE_DEVICE,
- .dstDevice = picctx->devicePtr,
- .dstPitch = picctx->bufferPitch,
- .dstY = dstY,
- .WidthInBytes = i_pitch,
- .Height = __MIN(picctx->bufferHeight, p_dec->fmt_out.video.i_y_offset + p_dec->fmt_out.video.i_visible_height),
- };
- result = CALL_CUDA_DEC(cuMemcpy2DAsync, &cu_cpy, 0);
- if (unlikely(result != VLC_SUCCESS))
- {
- free(picctx);
- goto error;
- }
- srcY += picctx->bufferHeight;
- dstY += p_sys->decoderHeight;
- }
- }
- else
- {
- for (int i_plane = 0; i_plane < 2; i_plane++) {
CUDA_MEMCPY2D cu_cpy = {
.srcMemoryType = CU_MEMORYTYPE_DEVICE,
.srcDevice = frameDevicePtr,
- .srcY = srcY,
.srcPitch = i_pitch,
.dstMemoryType = CU_MEMORYTYPE_DEVICE,
.dstDevice = picctx->devicePtr,
.dstPitch = picctx->bufferPitch,
- .dstY = dstY,
- .WidthInBytes = i_pitch,
- .Height = __MIN(picctx->bufferHeight, p_dec->fmt_out.video.i_y_offset + p_dec->fmt_out.video.i_visible_height),
+ .WidthInBytes = picctx->bufferPitch,
+ .Height = picctx->bufferHeight,
};
- if (i_plane == 1)
- cu_cpy.Height >>= 1;
result = CALL_CUDA_DEC(cuMemcpy2DAsync, &cu_cpy, 0);
if (unlikely(result != VLC_SUCCESS))
{
free(picctx);
goto error;
}
- srcY += picctx->bufferHeight;
- dstY += p_sys->decoderHeight;
- }
- }
+
p_pic->context = &picctx->ctx;
vlc_video_context_Hold(picctx->ctx.vctx);
}
--
2.17.1
More information about the vlc-devel
mailing list