[vlc-devel] [PATCH 4/6] nvdec: output a custom NVDEC opaque format

Steve Lhomme robux4 at ycbcr.xyz
Thu Sep 12 14:44:20 CEST 2019


---
 include/vlc_codec.h              |   2 +
 modules/codec/Makefile.am        |   3 +-
 modules/codec/nvdec.c            | 255 +++++++++++++++++++++++++++----
 modules/video_chroma/nvdec_fmt.h |  50 ++++++
 4 files changed, 277 insertions(+), 33 deletions(-)
 create mode 100644 modules/video_chroma/nvdec_fmt.h

diff --git a/include/vlc_codec.h b/include/vlc_codec.h
index 8ecdcadb396..55e24cb9648 100644
--- a/include/vlc_codec.h
+++ b/include/vlc_codec.h
@@ -491,6 +491,7 @@ enum vlc_decoder_device_type
     VLC_DECODER_DEVICE_DXVA2,
     VLC_DECODER_DEVICE_D3D11VA,
     VLC_DECODER_DEVICE_AWINDOW,
+    VLC_DECODER_DEVICE_NVDEC,
     VLC_DECODER_DEVICE_MMAL,
 };
 
@@ -525,6 +526,7 @@ typedef struct vlc_decoder_device
      * DXVA2: IDirect3DDevice9*
      * D3D11VA: ID3D11DeviceContext*
      * AWindow: android AWindowHandler*
+     * NVDEC: decoder_device_nvdec_t*
      * MMAL: MMAL_PORT_T*
      */
     void *opaque;
diff --git a/modules/codec/Makefile.am b/modules/codec/Makefile.am
index a6a04b0d400..df45ba76bfb 100644
--- a/modules/codec/Makefile.am
+++ b/modules/codec/Makefile.am
@@ -442,7 +442,8 @@ libnvdec_plugin_la_SOURCES = \
 	codec/nvdec.c codec/hxxx_helper.c codec/hxxx_helper.h \
 	packetizer/hxxx_nal.h packetizer/hxxx_nal.c \
 	packetizer/h264_nal.c packetizer/h264_nal.h \
-	packetizer/hevc_nal.c packetizer/hevc_nal.h
+	packetizer/hevc_nal.c packetizer/hevc_nal.h \
+	video_chroma/nvdec_fmt.h
 if HAVE_NVDEC
 codec_LTLIBRARIES += libnvdec_plugin.la
 endif
diff --git a/modules/codec/nvdec.c b/modules/codec/nvdec.c
index ee65f6f1286..3fbcd3ddbdb 100644
--- a/modules/codec/nvdec.c
+++ b/modules/codec/nvdec.c
@@ -28,15 +28,19 @@
 #include <vlc_plugin.h>
 #include <vlc_codec.h>
 #include <vlc_messages.h>
+#include <vlc_picture_pool.h>
 
 #define FFNV_LOG_FUNC(logctx, msg, ...)        msg_Err((vlc_object_t*)logctx, msg, __VA_ARGS__)
 #define FFNV_DEBUG_LOG_FUNC(logctx, msg, ...)  msg_Dbg((vlc_object_t*)logctx, msg, __VA_ARGS__)
 
 #include <ffnvcodec/dynlink_loader.h>
 #include "hxxx_helper.h"
+#include "../video_chroma/nvdec_fmt.h"
 
 #define MAX_HXXX_SURFACES (16 + 1)
 #define NVDEC_DISPLAY_SURFACES 1
+#define MAX_POOL_SIZE     4 // number of in-flight buffers, if more are needed the decoder waits
+
 
 #define OUTPUT_WIDTH_ALIGN   256
 
@@ -61,6 +65,10 @@ typedef struct nvdec_ctx {
     size_t                      decoderHeight;
     unsigned int                outputPitch;
 
+    picture_pool_t              *out_pool;
+
+    CUdeviceptr                 outputDevicePtr[MAX_POOL_SIZE];
+
 } nvdec_ctx_t;
 
 static int OpenDecoder(vlc_object_t *p_this);
@@ -111,9 +119,12 @@ static inline int CudaCall(decoder_t *p_dec, CUresult result, const char *psz_fu
 static cudaVideoSurfaceFormat MapSurfaceFmt(int i_vlc_fourcc)
 {
     switch (i_vlc_fourcc) {
+        case VLC_CODEC_NVDEC_OPAQUE_10B:
+        case VLC_CODEC_NVDEC_OPAQUE_16B:
         case VLC_CODEC_P010:
         case VLC_CODEC_P016:
             return cudaVideoSurfaceFormat_P016;
+        case VLC_CODEC_NVDEC_OPAQUE:
         case VLC_CODEC_NV12:
             return cudaVideoSurfaceFormat_NV12;
         // case VLC_CODEC_I444:
@@ -147,7 +158,10 @@ static int CUtoFMT(video_format_t *fmt, const CUVIDEOFORMAT *p_format)
     // bit depth and chroma
     unsigned int i_bpp = p_format->bit_depth_luma_minus8 + 8;
     vlc_fourcc_t i_chroma;
-    i_chroma = MapSurfaceChroma(p_format->chroma_format, i_bpp);
+    if (is_nvdec_opaque(fmt->i_chroma))
+        i_chroma = fmt->i_chroma;
+    else
+        i_chroma = MapSurfaceChroma(p_format->chroma_format, i_bpp);
     if (i_chroma == 0)
         return VLC_EGENERIC;
 
@@ -171,12 +185,40 @@ static int CUDAAPI HandleVideoSequence(void *p_opaque, CUVIDEOFORMAT *p_format)
     nvdec_ctx_t *p_sys = p_dec->p_sys;
     int ret;
 
+    if ( is_nvdec_opaque(p_dec->fmt_out.video.i_chroma) )
+    {
+        for (size_t i=0; i < ARRAY_SIZE(p_sys->outputDevicePtr); i++)
+        {
+            CALL_CUDA(cuMemFree, p_sys->outputDevicePtr[i]);
+            p_sys->outputDevicePtr[i] = 0;
+        }
+
+        if (p_sys->out_pool)
+        {
+            picture_pool_Release(p_sys->out_pool);
+            p_sys->out_pool = NULL;
+        }
+    }
+
     // update vlc's output format using NVDEC parser's output
     ret = CUtoFMT(&p_dec->fmt_out.video, p_format);
     if (ret != VLC_SUCCESS)
+    {
+        msg_Dbg(p_dec, "unsupported Chroma %d + BitDepth %d", p_format->chroma_format, p_format->bit_depth_luma_minus8 + 8);
         goto error;
+    }
     p_dec->fmt_out.i_codec = p_dec->fmt_out.video.i_chroma;
 
+    ret = CALL_CUDA(cuCtxPushCurrent, p_sys->cuCtx);
+    if (ret != VLC_SUCCESS)
+        goto error;
+
+    if (p_sys->cudecoder)
+    {
+        CALL_CUVID(cuvidDestroyDecoder, p_sys->cudecoder);
+        p_sys->cudecoder = NULL;
+    }
+
     CUVIDDECODECREATEINFO dparams = {
         .ulWidth             = p_dec->fmt_out.video.i_width,
         .ulHeight            = p_dec->fmt_out.video.i_height,
@@ -191,23 +233,71 @@ static int CUDAAPI HandleVideoSequence(void *p_opaque, CUVIDEOFORMAT *p_format)
         .DeinterlaceMode     = p_sys->deintMode
     };
     ret = CALL_CUVID(cuvidCreateDecoder, &p_sys->cudecoder, &dparams);
-
-    ret = CALL_CUDA(cuCtxPushCurrent, p_sys->cuCtx);
     if (ret != VLC_SUCCESS)
         goto error;
 
+    if ( is_nvdec_opaque(p_dec->fmt_out.video.i_chroma) )
+    {
+        unsigned int ByteWidth = p_dec->fmt_out.video.i_width;
+        if ( p_dec->fmt_out.video.i_chroma != VLC_CODEC_NVDEC_OPAQUE)
+            // 10 bits of more use double width in bytes
+            ByteWidth *= 2;
+        unsigned int Height = p_dec->fmt_out.video.i_height;
+        Height += (Height + 1) / 2;
+
+        picture_t *pics[ARRAY_SIZE(p_sys->outputDevicePtr)];
+        for (size_t i=0; i < ARRAY_SIZE(p_sys->outputDevicePtr); i++)
+        {
+            size_t pitch;
+            ret = CALL_CUDA(cuMemAllocPitch, &p_sys->outputDevicePtr[i], &pitch, ByteWidth, Height, 16);
+            if (ret != VLC_SUCCESS || p_sys->outputDevicePtr[i] == 0)
+                goto clean_pics;
+            p_sys->outputPitch = pitch;
+            picture_resource_t res = {
+                .p_sys = (void*)(uintptr_t)i,
+            };
+            pics[i] = picture_NewFromResource( &p_dec->fmt_out.video, &res );
+            if (unlikely(pics[i] == NULL))
+            {
+                msg_Dbg(p_dec, "failed to get a picture for the buffer");
+                ret = VLC_ENOMEM;
+                goto clean_pics;
+            }
+            continue;
+clean_pics:
+            if (p_sys->outputDevicePtr[i])
+            {
+                CALL_CUDA(cuMemFree, p_sys->outputDevicePtr[i]);
+                p_sys->outputDevicePtr[i] = 0;
+            }
+            if (i > 0)
+            {
+                while (i--)
+                {
+                    picture_Release(pics[i]);
+                    CALL_CUDA(cuMemFree, p_sys->outputDevicePtr[i]);
+                    p_sys->outputDevicePtr[i] = 0;
+                }
+            }
+            break;
+        }
+        if (ret != VLC_SUCCESS)
+            goto error;
+
+        p_sys->out_pool = picture_pool_New( ARRAY_SIZE(p_sys->outputDevicePtr), pics );
+    }
+    else
+    {
     CUdeviceptr frameDevicePtr = 0;
     CUVIDPROCPARAMS params = {
         .progressive_frame = 1,
         .top_field_first = 1,
-     };
+    };
     ret = CALL_CUVID( cuvidMapVideoFrame, p_sys->cudecoder, 0, &frameDevicePtr, &p_sys->outputPitch, &params );
     if (ret != VLC_SUCCESS)
         goto error;
     CALL_CUVID(cuvidUnmapVideoFrame, p_sys->cudecoder, frameDevicePtr);
-
-    if (p_sys->cudecoder)
-        CALL_CUVID(cuvidDestroyDecoder, p_sys->cudecoder);
+    }
 
     p_sys->decoderHeight = p_format->coded_height;
 
@@ -215,7 +305,8 @@ static int CUDAAPI HandleVideoSequence(void *p_opaque, CUVIDEOFORMAT *p_format)
 
     // ensure the output surfaces have the same pitch so copies can work properly
     p_dec->fmt_out.video.i_width = p_sys->outputPitch;
-    if ( p_dec->fmt_out.video.i_chroma != VLC_CODEC_NV12 )
+    if ( p_dec->fmt_out.video.i_chroma != VLC_CODEC_NV12 &&
+         p_dec->fmt_out.video.i_chroma != VLC_CODEC_NVDEC_OPAQUE )
         // 10 bits of more use double width in bytes
         p_dec->fmt_out.video.i_width >>= 1;
 
@@ -242,30 +333,114 @@ static int CUDAAPI HandlePictureDecode(void *p_opaque, CUVIDPICPARAMS *p_picpara
     return (ret == VLC_SUCCESS);
 }
 
-static int CUDAAPI HandlePictureDisplay(void *p_opaque, CUVIDPARSERDISPINFO *p_dispinfo)
+static void NVDecCtxDestroy(struct picture_context_t *picctx)
 {
-    decoder_t *p_dec = (decoder_t *) p_opaque;
-    nvdec_ctx_t *p_sys = p_dec->p_sys;
+    pic_context_nvdec_t *srcpic = container_of(picctx, pic_context_nvdec_t, ctx);
+    free(srcpic);
+}
 
-    picture_t * p_pic = decoder_NewPicture(p_dec);
-    if (unlikely(p_pic == NULL))
-        return 0;
+static struct picture_context_t *NVDecCtxClone(struct picture_context_t *srcctx)
+{
+    pic_context_nvdec_t *clonectx = malloc(sizeof(*clonectx));
+    if (unlikely(clonectx == NULL))
+        return NULL;
+    pic_context_nvdec_t *srcpic = container_of(srcctx, pic_context_nvdec_t, ctx);
 
-    int result = CALL_CUDA(cuCtxPushCurrent, p_sys->cuCtx);
-    if (unlikely(result != VLC_SUCCESS))
-    {
-        picture_Release(p_pic);
-        return 0;
-    }
+    clonectx->ctx.destroy = NVDecCtxDestroy;
+    clonectx->ctx.copy = NVDecCtxClone;
+    clonectx->devidePtr = srcpic->devidePtr;
+    clonectx->bufferPitch = srcpic->bufferPitch;
+    return &clonectx->ctx;
+}
 
+
+static int CUDAAPI HandlePictureDisplay(void *p_opaque, CUVIDPARSERDISPINFO *p_dispinfo)
+{
+    decoder_t *p_dec = (decoder_t *) p_opaque;
+    nvdec_ctx_t *p_sys = p_dec->p_sys;
+    picture_t *p_pic = NULL;
     CUdeviceptr frameDevicePtr = 0;
-    unsigned int i_pitch;
     CUVIDPROCPARAMS params = {
         .progressive_frame = p_sys->deintMode == cudaVideoDeinterlaceMode_Weave ? 1 : p_dispinfo->progressive_frame,
         .top_field_first = p_dispinfo->top_field_first,
         .second_field = p_dispinfo->repeat_first_field + 1,
         .unpaired_field = p_dispinfo->repeat_first_field < 0,
     };
+    int result;
+
+    if ( is_nvdec_opaque(p_dec->fmt_out.video.i_chroma) )
+    {
+        p_pic = picture_pool_Wait(p_sys->out_pool);
+        if (unlikely(p_pic == NULL))
+            return 0;
+
+        result = CALL_CUDA(cuCtxPushCurrent, p_sys->cuCtx);
+        if (unlikely(result != VLC_SUCCESS))
+        {
+            picture_Release(p_pic);
+            return 0;
+        }
+
+        unsigned int i_pitch;
+
+        // Map decoded frame to a device pointer
+        result = CALL_CUVID( cuvidMapVideoFrame, p_sys->cudecoder, p_dispinfo->picture_index,
+                            &frameDevicePtr, &i_pitch, &params );
+        if (result != VLC_SUCCESS)
+            goto error;
+
+        // put a new context in the output picture
+        pic_context_nvdec_t *picctx = malloc(sizeof(*picctx));
+        if (unlikely(picctx == NULL))
+            goto error;
+        picctx->ctx.destroy = NVDecCtxDestroy;
+        picctx->ctx.copy = NVDecCtxClone;
+        uintptr_t pool_idx = (uintptr_t)p_pic->p_sys;
+        picctx->devidePtr = p_sys->outputDevicePtr[pool_idx];
+        picctx->bufferPitch = p_sys->outputPitch;
+
+        size_t srcY = 0;
+        size_t dstY = 0;
+        for (int i_plane = 0; i_plane < 2; i_plane++) {
+            CUDA_MEMCPY2D cu_cpy = {
+                .srcMemoryType  = CU_MEMORYTYPE_DEVICE,
+                .srcDevice      = frameDevicePtr,
+                .srcY           = srcY,
+                .srcPitch       = i_pitch,
+                .dstMemoryType  = CU_MEMORYTYPE_DEVICE,
+                .dstDevice      = picctx->devidePtr,
+                .dstPitch       = picctx->bufferPitch,
+                .dstY           = dstY,
+                .WidthInBytes   = i_pitch,
+                .Height         = __MIN(p_sys->decoderHeight, p_dec->fmt_out.video.i_y_offset + p_dec->fmt_out.video.i_visible_height),
+            };
+            if (i_plane == 1)
+                cu_cpy.Height >>= 1;
+            result = CALL_CUDA(cuMemcpy2DAsync, &cu_cpy, 0);
+            if (unlikely(result != VLC_SUCCESS))
+            {
+                free(picctx);
+                goto error;
+            }
+            srcY += p_sys->decoderHeight;
+            dstY += p_dec->fmt_out.video.i_y_offset + p_dec->fmt_out.video.i_visible_height;
+        }
+        p_pic->context = &picctx->ctx;
+    }
+    else
+    {
+        p_pic = decoder_NewPicture(p_dec);
+        if (unlikely(p_pic == NULL))
+            return 0;
+
+        result = CALL_CUDA(cuCtxPushCurrent, p_sys->cuCtx);
+        if (unlikely(result != VLC_SUCCESS))
+        {
+            picture_Release(p_pic);
+            return 0;
+        }
+
+        unsigned int i_pitch;
 
     // Map decoded frame to a device pointer
     result = CALL_CUVID( cuvidMapVideoFrame, p_sys->cudecoder, p_dispinfo->picture_index,
@@ -293,10 +468,11 @@ static int CUDAAPI HandlePictureDisplay(void *p_opaque, CUVIDPARSERDISPINFO *p_d
             goto error;
          srcY += p_sys->decoderHeight;
     }
+    }
 
     // Release surface on GPU
     result = CALL_CUVID(cuvidUnmapVideoFrame, p_sys->cudecoder, frameDevicePtr);
-    if (result != VLC_SUCCESS)
+    if (unlikely(result != VLC_SUCCESS))
         goto error;
 
     CALL_CUDA(cuCtxPopCurrent, NULL);
@@ -319,8 +495,11 @@ static int CUDAAPI HandlePictureDisplay(void *p_opaque, CUVIDPARSERDISPINFO *p_d
     return 1;
 
 error:
+    if (frameDevicePtr)
+        CALL_CUVID(cuvidUnmapVideoFrame, p_sys->cudecoder, frameDevicePtr);
     CALL_CUDA(cuCtxPopCurrent, NULL);
-    picture_Release(p_pic);
+    if (p_pic)
+        picture_Release(p_pic);
     return 0;
 }
 
@@ -626,15 +805,12 @@ static int OpenDecoder(vlc_object_t *p_this)
             p_dec->fmt_out.video.i_visible_height = i_vh;
         }
 
-        if(!p_dec->fmt_in.video.i_sar_num || !p_dec->fmt_in.video.i_sar_den)
+        int i_sar_num, i_sar_den;
+        if (VLC_SUCCESS ==
+            hxxx_helper_get_current_sar(&p_sys->hh, &i_sar_num, &i_sar_den))
         {
-            int i_sar_num, i_sar_den;
-            if (VLC_SUCCESS ==
-                hxxx_helper_get_current_sar(&p_sys->hh, &i_sar_num, &i_sar_den))
-            {
-                p_dec->fmt_out.video.i_sar_num = i_sar_num;
-                p_dec->fmt_out.video.i_sar_den = i_sar_den;
-            }
+            p_dec->fmt_out.video.i_sar_num = i_sar_num;
+            p_dec->fmt_out.video.i_sar_den = i_sar_den;
         }
     }
     else if (p_dec->fmt_in.i_codec == VLC_CODEC_VP9)
@@ -690,8 +866,18 @@ static int OpenDecoder(vlc_object_t *p_this)
         goto error;
     }
 
-    vlc_fourcc_t output_chromas[2];
+    vlc_fourcc_t output_chromas[3];
     size_t chroma_idx = 0;
+    if (cudaChroma == cudaVideoChromaFormat_420)
+    {
+        if (i_depth_luma >= 16)
+            output_chromas[chroma_idx++] = VLC_CODEC_NVDEC_OPAQUE_16B;
+        else if (i_depth_luma >= 10)
+            output_chromas[chroma_idx++] = VLC_CODEC_NVDEC_OPAQUE_10B;
+        else
+            output_chromas[chroma_idx++] = VLC_CODEC_NVDEC_OPAQUE;
+    }
+
     output_chromas[chroma_idx++] = MapSurfaceChroma(cudaChroma, i_depth_luma);
     output_chromas[chroma_idx++] = 0;
 
@@ -735,6 +921,11 @@ static void CloseDecoder(vlc_object_t *p_this)
     nvdec_ctx_t *p_sys = p_dec->p_sys;
     CALL_CUDA(cuCtxPushCurrent, p_sys->cuCtx);
     CALL_CUDA(cuCtxPopCurrent, NULL);
+
+    for (size_t i=0; i < ARRAY_SIZE(p_sys->outputDevicePtr); i++)
+        CALL_CUDA(cuMemFree, p_sys->outputDevicePtr[i]);
+    if (p_sys->out_pool)
+        picture_pool_Release(p_sys->out_pool);
     if (p_sys->cudecoder)
         CALL_CUVID(cuvidDestroyDecoder, p_sys->cudecoder);
     if (p_sys->cuparser)
diff --git a/modules/video_chroma/nvdec_fmt.h b/modules/video_chroma/nvdec_fmt.h
new file mode 100644
index 00000000000..5ba0d8d71f7
--- /dev/null
+++ b/modules/video_chroma/nvdec_fmt.h
@@ -0,0 +1,50 @@
+/*****************************************************************************
+ * nvdec_fmt.h : NVDEC common code
+ *****************************************************************************
+ * Copyright © 2019 VLC authors, VideoLAN and VideoLabs
+ *
+ * Authors: Steve Lhomme <robux4 at videolabs.io>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+#ifndef VLC_VIDEOCHROMA_NVDEC_FMT_H_
+#define VLC_VIDEOCHROMA_NVDEC_FMT_H_
+
+#include <ffnvcodec/dynlink_loader.h>
+
+typedef struct {
+
+    CudaFunctions  *cudaFunctions;
+    CUcontext      cuCtx;
+
+} decoder_device_nvdec_t;
+
+static inline bool is_nvdec_opaque(vlc_fourcc_t fourcc)
+{
+    return fourcc == VLC_CODEC_NVDEC_OPAQUE ||
+           fourcc == VLC_CODEC_NVDEC_OPAQUE_10B ||
+           fourcc == VLC_CODEC_NVDEC_OPAQUE_16B;
+}
+
+/* for VLC_CODEC_NVDEC_OPAQUE / VLC_CODEC_NVDEC_OPAQUE_16B */
+typedef struct
+{
+    picture_context_t ctx;
+    CUdeviceptr  devidePtr;
+    unsigned int bufferPitch;
+} pic_context_nvdec_t;
+
+#endif /* include-guard */
-- 
2.17.1



More information about the vlc-devel mailing list