[vlc-devel] [PATCH 2/3] nvdec: simplify GPU-GPU copy

quentin.chateau at deepskycorp.com quentin.chateau at deepskycorp.com
Tue Mar 24 17:38:39 CET 2020


From: Quentin Chateau <quentin.chateau at deepskycorp.com>

Store the actual bufferHeight in the picture context
and copy perform the GPU-GPU copy as a single call
to cuMemcpy since neither buffer use planes.
---
 modules/hw/nvdec/nvdec.c | 50 +++++-----------------------------------
 1 file changed, 6 insertions(+), 44 deletions(-)

diff --git a/modules/hw/nvdec/nvdec.c b/modules/hw/nvdec/nvdec.c
index cff5a263d0..34f20c0a35 100644
--- a/modules/hw/nvdec/nvdec.c
+++ b/modules/hw/nvdec/nvdec.c
@@ -94,10 +94,10 @@ typedef struct nvdec_ctx {
     cudaVideoDeinterlaceMode    deintMode;
     // NVDEC doesn't stop even if HandleVideoSequence fails
     bool                        b_nvparser_success;
-    size_t                      decoderHeight;
 
     CUdeviceptr                 outputDevicePtr[MAX_POOL_SIZE];
     unsigned int                outputPitch;
+    unsigned int                outputHeight;
     picture_pool_t              *out_pool;
 
     vlc_video_context           *vctx_out;
@@ -258,6 +258,7 @@ static int CUDAAPI HandleVideoSequence(void *p_opaque, CUVIDEOFORMAT *p_format)
             default:
                 vlc_assert_unreachable();
         }
+        p_sys->outputHeight = Height;
 
         picture_t *pics[ARRAY_SIZE(p_sys->outputDevicePtr)];
         for (size_t i=0; i < ARRAY_SIZE(p_sys->outputDevicePtr); i++)
@@ -299,8 +300,6 @@ clean_pics:
         p_sys->out_pool = picture_pool_New( ARRAY_SIZE(p_sys->outputDevicePtr), pics );
     }
 
-    p_sys->decoderHeight = p_format->coded_height;
-
     CALL_CUDA_DEC(cuCtxPopCurrent, NULL);
 
     ret = decoder_UpdateVideoOutput(p_dec, p_sys->vctx_out);
@@ -394,62 +393,25 @@ static int CUDAAPI HandlePictureDisplay(void *p_opaque, CUVIDPARSERDISPINFO *p_d
         uintptr_t pool_idx = (uintptr_t)p_pic->p_sys;
         picctx->devicePtr = p_sys->outputDevicePtr[pool_idx];
         picctx->bufferPitch = p_sys->outputPitch;
-        picctx->bufferHeight = p_sys->decoderHeight;
+        picctx->bufferHeight = p_sys->outputHeight;
 
-        size_t srcY = 0;
-        size_t dstY = 0;
-        if (p_pic->format.i_chroma == VLC_CODEC_NVDEC_OPAQUE_444 || p_pic->format.i_chroma == VLC_CODEC_NVDEC_OPAQUE_444_16B)
-        {
-            for (int i_plane = 0; i_plane < 3; i_plane++) {
-                CUDA_MEMCPY2D cu_cpy = {
-                    .srcMemoryType  = CU_MEMORYTYPE_DEVICE,
-                    .srcDevice      = frameDevicePtr,
-                    .srcY           = srcY,
-                    .srcPitch       = i_pitch,
-                    .dstMemoryType  = CU_MEMORYTYPE_DEVICE,
-                    .dstDevice      = picctx->devicePtr,
-                    .dstPitch       = picctx->bufferPitch,
-                    .dstY           = dstY,
-                    .WidthInBytes   = i_pitch,
-                    .Height         = __MIN(picctx->bufferHeight, p_dec->fmt_out.video.i_y_offset + p_dec->fmt_out.video.i_visible_height),
-                };
-                result = CALL_CUDA_DEC(cuMemcpy2DAsync, &cu_cpy, 0);
-                if (unlikely(result != VLC_SUCCESS))
-                {
-                    free(picctx);
-                    goto error;
-                }
-                srcY += picctx->bufferHeight;
-                dstY += p_sys->decoderHeight;
-            }
-        }
-        else
-        {
-            for (int i_plane = 0; i_plane < 2; i_plane++) {
                 CUDA_MEMCPY2D cu_cpy = {
                     .srcMemoryType  = CU_MEMORYTYPE_DEVICE,
                     .srcDevice      = frameDevicePtr,
-                    .srcY           = srcY,
                     .srcPitch       = i_pitch,
                     .dstMemoryType  = CU_MEMORYTYPE_DEVICE,
                     .dstDevice      = picctx->devicePtr,
                     .dstPitch       = picctx->bufferPitch,
-                    .dstY           = dstY,
-                    .WidthInBytes   = i_pitch,
-                    .Height         = __MIN(picctx->bufferHeight, p_dec->fmt_out.video.i_y_offset + p_dec->fmt_out.video.i_visible_height),
+                    .WidthInBytes   = picctx->bufferPitch,
+                    .Height         = picctx->bufferHeight,
                 };
-                if (i_plane == 1)
-                    cu_cpy.Height >>= 1;
                 result = CALL_CUDA_DEC(cuMemcpy2DAsync, &cu_cpy, 0);
                 if (unlikely(result != VLC_SUCCESS))
                 {
                     free(picctx);
                     goto error;
                 }
-                srcY += picctx->bufferHeight;
-                dstY += p_sys->decoderHeight;
-            }
-        }
+
         p_pic->context = &picctx->ctx;
         vlc_video_context_Hold(picctx->ctx.vctx);
     }
-- 
2.17.1



More information about the vlc-devel mailing list