[vlc-devel] [PATCH 1/3] nvdec: always use opaque chroma as output
Steve Lhomme
robux4 at ycbcr.xyz
Mon Mar 16 09:41:08 CET 2020
On 2020-03-16 9:33, Thomas Guillem wrote:
>
>
> On Mon, Mar 16, 2020, at 09:31, Steve Lhomme wrote:
>> I don't think it's a good idea.
>>
>> It forces a converter filter in the vout thread like this:
>> "A filter to adapt decoder NVD8 to display NV12 is needed"
>>
>> Converter filters consume resources in the vout thread that is time
>> critical to get the proper display timing. It sometimes causes frames to
>> be late or even skipped because the conversion is too long.
>>
>> When the conversion is done by the decoder the vout thread has more
>> liberty to drop frames earlier and less often.
>
> But this is what is done on all other HW decoders, non ?
No, for the others the display system can usually handle it without
conversion. CUDA is not handled by any display systems. Even the OpenGL
interop module does a copy into another buffer so it can be used to display.
>>
>> I understand the opaque chroma is currently used first and always work
>> (with the converter filter) but IMO it should probably be the other way
>> around. Outputting CUDA surfaces will only make sense when/if we have
>> CUDA filters (other than the GPU to CPU converter).
>>
>> We could also move the converter filter outside of the vout thread into
>> the decoder (when it pushes pictures for example) to avoid this but
>> that's a different subject. I don't think we'll do that in 4.0.
>>
>> On 2020-03-13 17:19, quentin.chateau at deepskycorp.com wrote:
>>> From: Quentin Chateau <quentin.chateau at deepskycorp.com>
>>>
>>> MapSurfaceChroma now maps cudaChroma to opaque chromas
>>> ---
>>> modules/hw/nvdec/nvdec.c | 95 +++---------------------------------
>>> modules/hw/nvdec/nvdec_fmt.h | 9 ----
>>> modules/hw/nvdec/nvdec_gl.c | 3 --
>>> 3 files changed, 6 insertions(+), 101 deletions(-)
>>>
>>> diff --git a/modules/hw/nvdec/nvdec.c b/modules/hw/nvdec/nvdec.c
>>> index 162330788f..4fc70a5f5c 100644
>>> --- a/modules/hw/nvdec/nvdec.c
>>> +++ b/modules/hw/nvdec/nvdec.c
>>> @@ -113,14 +113,14 @@ static vlc_fourcc_t MapSurfaceChroma(cudaVideoChromaFormat chroma, unsigned bitD
>>> switch (chroma) {
>>> case cudaVideoChromaFormat_420:
>>> if (bitDepth <= 8)
>>> - return VLC_CODEC_NV12;
>>> + return VLC_CODEC_NVDEC_OPAQUE;
>>> if (bitDepth <= 10)
>>> - return VLC_CODEC_P010;
>>> - return VLC_CODEC_P016;
>>> + return VLC_CODEC_NVDEC_OPAQUE_10B;
>>> + return VLC_CODEC_NVDEC_OPAQUE_16B;
>>> case cudaVideoChromaFormat_444:
>>> if (bitDepth <= 8)
>>> - return VLC_CODEC_I444;
>>> - return VLC_CODEC_I444_16L;
>>> + return VLC_CODEC_NVDEC_OPAQUE_444;
>>> + return VLC_CODEC_NVDEC_OPAQUE_444_16B;
>>> default:
>>> return 0;
>>> }
>>> @@ -131,17 +131,12 @@ static cudaVideoSurfaceFormat MapSurfaceFmt(int i_vlc_fourcc)
>>> switch (i_vlc_fourcc) {
>>> case VLC_CODEC_NVDEC_OPAQUE_10B:
>>> case VLC_CODEC_NVDEC_OPAQUE_16B:
>>> - case VLC_CODEC_P010:
>>> - case VLC_CODEC_P016:
>>> return cudaVideoSurfaceFormat_P016;
>>> case VLC_CODEC_NVDEC_OPAQUE:
>>> - case VLC_CODEC_NV12:
>>> return cudaVideoSurfaceFormat_NV12;
>>> case VLC_CODEC_NVDEC_OPAQUE_444:
>>> - case VLC_CODEC_I444:
>>> return cudaVideoSurfaceFormat_YUV444;
>>> case VLC_CODEC_NVDEC_OPAQUE_444_16B:
>>> - case VLC_CODEC_I444_16L:
>>> return cudaVideoSurfaceFormat_YUV444_16Bit;
>>> default: vlc_assert_unreachable();
>>> }
>>> @@ -151,15 +146,6 @@ static int CUtoFMT(video_format_t *fmt, const CUVIDEOFORMAT *p_format)
>>> {
>>> // bit depth and chroma
>>> unsigned int i_bpp = p_format->bit_depth_luma_minus8 + 8;
>>> - vlc_fourcc_t i_chroma;
>>> - if (is_nvdec_opaque(fmt->i_chroma))
>>> - i_chroma = fmt->i_chroma;
>>> - else
>>> - i_chroma = MapSurfaceChroma(p_format->chroma_format, i_bpp);
>>> - if (i_chroma == 0)
>>> - return VLC_EGENERIC;
>>> -
>>> - fmt->i_chroma = i_chroma;
>>> // use the real padded size when we know it fmt->i_width = p_format->coded_width;
>>> fmt->i_height = p_format->coded_height;
>>> fmt->i_x_offset = p_format->display_area.left;
>>> @@ -179,8 +165,6 @@ static int CUDAAPI HandleVideoSequence(void *p_opaque, CUVIDEOFORMAT *p_format)
>>> nvdec_ctx_t *p_sys = p_dec->p_sys;
>>> int ret;
>>>
>>> - if ( is_nvdec_opaque(p_dec->fmt_out.video.i_chroma) )
>>> - {
>>> for (size_t i=0; i < ARRAY_SIZE(p_sys->outputDevicePtr); i++)
>>> {
>>> CALL_CUDA_DEC(cuMemFree, p_sys->outputDevicePtr[i]);
>>> @@ -192,7 +176,6 @@ static int CUDAAPI HandleVideoSequence(void *p_opaque, CUVIDEOFORMAT *p_format)
>>> picture_pool_Release(p_sys->out_pool);
>>> p_sys->out_pool = NULL;
>>> }
>>> - }
>>>
>>> // update vlc's output format using NVDEC parser's output
>>> ret = CUtoFMT(&p_dec->fmt_out.video, p_format);
>>> @@ -231,8 +214,6 @@ static int CUDAAPI HandleVideoSequence(void *p_opaque, CUVIDEOFORMAT *p_format)
>>> goto error;
>>>
>>> // ensure the output surfaces have the same pitch so copies can work properly
>>> - if ( is_nvdec_opaque(p_dec->fmt_out.video.i_chroma) )
>>> - {
>>> // get the real decoder pitch
>>> CUdeviceptr frameDevicePtr = 0;
>>> CUVIDPROCPARAMS params = {
>>> @@ -298,8 +279,6 @@ clean_pics:
>>> goto error;
>>>
>>> p_sys->out_pool = picture_pool_New( ARRAY_SIZE(p_sys->outputDevicePtr), pics );
>>> - }
>>> -
>>> p_sys->decoderHeight = p_format->coded_height;
>>>
>>> CALL_CUDA_DEC(cuCtxPopCurrent, NULL);
>>> @@ -360,8 +339,6 @@ static int CUDAAPI HandlePictureDisplay(void *p_opaque, CUVIDPARSERDISPINFO *p_d
>>> };
>>> int result;
>>>
>>> - if ( is_nvdec_opaque(p_dec->fmt_out.video.i_chroma) )
>>> - {
>>> p_pic = picture_pool_Wait(p_sys->out_pool);
>>> if (unlikely(p_pic == NULL))
>>> return 0;
>>> @@ -450,49 +427,6 @@ static int CUDAAPI HandlePictureDisplay(void *p_opaque, CUVIDPARSERDISPINFO *p_d
>>> }
>>> p_pic->context = &picctx->ctx;
>>> vlc_video_context_Hold(picctx->ctx.vctx);
>>> - }
>>> - else
>>> - {
>>> - p_pic = decoder_NewPicture(p_dec);
>>> - if (unlikely(p_pic == NULL))
>>> - return 0;
>>> -
>>> - result = CALL_CUDA_DEC(cuCtxPushCurrent, p_sys->cuCtx);
>>> - if (unlikely(result != VLC_SUCCESS))
>>> - {
>>> - picture_Release(p_pic);
>>> - return 0;
>>> - }
>>> -
>>> - unsigned int i_pitch;
>>> -
>>> - // Map decoded frame to a device pointer
>>> - result = CALL_CUVID( cuvidMapVideoFrame, p_sys->cudecoder, p_dispinfo->picture_index,
>>> - &frameDevicePtr, &i_pitch, ¶ms );
>>> - if (result != VLC_SUCCESS)
>>> - goto error;
>>> -
>>> - // Copy decoded frame into a new VLC picture
>>> - size_t srcY = 0;
>>> - for (int i_plane = 0; i_plane < p_pic->i_planes; i_plane++) {
>>> - plane_t plane = p_pic->p[i_plane];
>>> - CUDA_MEMCPY2D cu_cpy = {
>>> - .srcMemoryType = CU_MEMORYTYPE_DEVICE,
>>> - .srcDevice = frameDevicePtr,
>>> - .srcY = srcY,
>>> - .srcPitch = i_pitch,
>>> - .dstMemoryType = CU_MEMORYTYPE_HOST,
>>> - .dstHost = plane.p_pixels,
>>> - .dstPitch = plane.i_pitch,
>>> - .WidthInBytes = i_pitch,
>>> - .Height = plane.i_visible_lines,
>>> - };
>>> - result = CALL_CUDA_DEC(cuMemcpy2D, &cu_cpy);
>>> - if (result != VLC_SUCCESS)
>>> - goto error;
>>> - srcY += p_sys->decoderHeight;
>>> - }
>>> - }
>>>
>>> // Release surface on GPU
>>> result = CALL_CUVID(cuvidUnmapVideoFrame, p_sys->cudecoder, frameDevicePtr);
>>> @@ -932,25 +866,8 @@ static int OpenDecoder(vlc_object_t *p_this)
>>> goto error;
>>> }
>>>
>>> - vlc_fourcc_t output_chromas[3];
>>> + vlc_fourcc_t output_chromas[2];
>>> size_t chroma_idx = 0;
>>> - if (cudaChroma == cudaVideoChromaFormat_420)
>>> - {
>>> - if (i_depth_luma >= 16)
>>> - output_chromas[chroma_idx++] = VLC_CODEC_NVDEC_OPAQUE_16B;
>>> - else if (i_depth_luma > 8)
>>> - output_chromas[chroma_idx++] = VLC_CODEC_NVDEC_OPAQUE_10B;
>>> - else
>>> - output_chromas[chroma_idx++] = VLC_CODEC_NVDEC_OPAQUE;
>>> - }
>>> - else if (cudaChroma == cudaVideoChromaFormat_444)
>>> - {
>>> - if (i_depth_luma > 8)
>>> - output_chromas[chroma_idx++] = VLC_CODEC_NVDEC_OPAQUE_444_16B;
>>> - else
>>> - output_chromas[chroma_idx++] = VLC_CODEC_NVDEC_OPAQUE_444;
>>> - }
>>> -
>>> output_chromas[chroma_idx++] = MapSurfaceChroma(cudaChroma, i_depth_luma);
>>> output_chromas[chroma_idx++] = 0;
>>>
>>> diff --git a/modules/hw/nvdec/nvdec_fmt.h b/modules/hw/nvdec/nvdec_fmt.h
>>> index 25784cca6d..d84672d8fb 100644
>>> --- a/modules/hw/nvdec/nvdec_fmt.h
>>> +++ b/modules/hw/nvdec/nvdec_fmt.h
>>> @@ -51,15 +51,6 @@ static inline int CudaCheckErr(vlc_object_t *obj, CudaFunctions *cudaFunctions,
>>> return VLC_SUCCESS;
>>> }
>>>
>>> -static inline bool is_nvdec_opaque(vlc_fourcc_t fourcc)
>>> -{
>>> - return fourcc == VLC_CODEC_NVDEC_OPAQUE ||
>>> - fourcc == VLC_CODEC_NVDEC_OPAQUE_10B ||
>>> - fourcc == VLC_CODEC_NVDEC_OPAQUE_16B ||
>>> - fourcc == VLC_CODEC_NVDEC_OPAQUE_444 ||
>>> - fourcc == VLC_CODEC_NVDEC_OPAQUE_444_16B;
>>> -}
>>> -
>>> /* for VLC_CODEC_NVDEC_OPAQUE / VLC_CODEC_NVDEC_OPAQUE_16B */
>>> typedef struct
>>> {
>>> diff --git a/modules/hw/nvdec/nvdec_gl.c b/modules/hw/nvdec/nvdec_gl.c
>>> index 137e730028..62a503a8e8 100644
>>> --- a/modules/hw/nvdec/nvdec_gl.c
>>> +++ b/modules/hw/nvdec/nvdec_gl.c
>>> @@ -156,9 +156,6 @@ static void Close(vlc_object_t *obj)
>>> static int Open(vlc_object_t *obj)
>>> {
>>> struct vlc_gl_interop *interop = (void *) obj;
>>> - if (!is_nvdec_opaque(interop->fmt.i_chroma))
>>> - return VLC_EGENERIC;
>>> -
>>> vlc_decoder_device *device = vlc_video_context_HoldDevice(interop->vctx);
>>> if (device == NULL || device->type != VLC_DECODER_DEVICE_NVDEC)
>>> return VLC_EGENERIC;
>>> --
>>> 2.17.1
>>>
>>> _______________________________________________
>>> vlc-devel mailing list
>>> To unsubscribe or modify your subscription options:
>>> https://mailman.videolan.org/listinfo/vlc-devel
>>>
>> _______________________________________________
>> vlc-devel mailing list
>> To unsubscribe or modify your subscription options:
>> https://mailman.videolan.org/listinfo/vlc-devel
> _______________________________________________
> vlc-devel mailing list
> To unsubscribe or modify your subscription options:
> https://mailman.videolan.org/listinfo/vlc-devel
>
More information about the vlc-devel
mailing list