[vlc-devel] [PATCH v2 5/5] dav1d: add DXVA 4:2:0 decoding support

Steve Lhomme robux4 at ycbcr.xyz
Fri Sep 11 11:46:09 CEST 2020


Tested on NVIDIA 3090 GPU and Intel Iris Xe Graphics on 8-bit sources.

The DXVA decoding is only enabled if the decoder device is set to D3D11VA or
DXVA2. If the hardware decoder is not found, we fallback to software decoding.
The profile needs to be known on open to use hardware decoding
as it requires using a single frame thread, so fallback to software after the
open would have impact on performance.

It's using an "nvdec_pool" for hardware buffer pools, directly from the nvdec
folder.

Some code could be shared (in a library) with the other DXVA modules.
---
 modules/codec/Makefile.am |   11 +
 modules/codec/dav1d.c     | 1250 +++++++++++++++++++++++++++++++++++++
 2 files changed, 1261 insertions(+)

diff --git a/modules/codec/Makefile.am b/modules/codec/Makefile.am
index 6995119e07b..af507a9a8d1 100644
--- a/modules/codec/Makefile.am
+++ b/modules/codec/Makefile.am
@@ -566,6 +566,17 @@ libdav1d_plugin_la_CPPFLAGS = $(AM_CPPFLAGS) $(DAV1D_CFLAGS)
 libdav1d_plugin_la_CFLAGS = $(AM_CFLAGS)
 libdav1d_plugin_la_LDFLAGS = $(AM_LDFLAGS) -rpath '$(codecdir)'
 libdav1d_plugin_la_LIBADD = $(DAV1D_LIBS)
+if HAVE_WIN32 #d3d11va
+libdav1d_plugin_la_SOURCES += video_chroma/d3d11_fmt.c video_chroma/d3d11_fmt.h \
+	video_chroma/dxgi_fmt.c video_chroma/dxgi_fmt.h libd3d11_common.la \
+	hw/nvdec/hw_pool.c hw/nvdec/hw_pool.h
+libdav1d_plugin_la_LIBADD += $(LIBCOM)
+if HAVE_WINSTORE
+libdav1d_plugin_la_LIBADD += -ld3d11
+else
+libdav1d_plugin_la_SOURCES += video_chroma/d3d9_fmt.c video_chroma/d3d9_fmt.h
+endif
+endif
 EXTRA_LTLIBRARIES += libdav1d_plugin.la
 codec_LTLIBRARIES += $(LTLIBdav1d)
 
diff --git a/modules/codec/dav1d.c b/modules/codec/dav1d.c
index e8e030cd7ca..069cf186c06 100644
--- a/modules/codec/dav1d.c
+++ b/modules/codec/dav1d.c
@@ -41,6 +41,19 @@
 #include "../packetizer/av1_obu.h"
 #include "cc.h"
 
+
+#ifdef _WIN32
+#include "../hw/nvdec/hw_pool.h"
+#define COBJMACROS
+#include <initguid.h> /* must be last included to not redefine existing GUIDs */
+#include "../video_chroma/d3d11_fmt.h"
+#include "../video_chroma/d3d9_fmt.h"
+#include <dxva.h>
+#include <dxva2api.h>
+
+#define DECODER_SLICES  (10+1) // we only use one thread in DXVA mode
+#endif
+
 /****************************************************************************
  * Local prototypes
  ****************************************************************************/
@@ -83,6 +96,35 @@ typedef struct
     // hardware decoding
     vlc_video_context  *vctx_out;
     vlc_decoder_device *dec_dev;
+
+#ifdef _WIN32
+    union {
+        struct {
+            D3D11_VIDEO_DECODER_DESC decoderDesc;
+
+            d3d11_device_t     *d3d_dev;
+
+            ID3D11VideoDevice  *vdevice;
+
+            ID3D11VideoContext *video_context;
+            ID3D11VideoDecoder *d3ddec;
+
+            ID3D11Asynchronous      *waitCopies;
+        } d3d11;
+        struct {
+            const GUID              *selected_decoder;
+            D3DFORMAT               render;
+            HINSTANCE               dxva2_dll;
+            IDirect3DDeviceManager9 *devmng;
+            HANDLE                   device;
+            IDirectXVideoDecoderService  *d3ddec;
+            IDirectXVideoDecoder     *decoder;
+            DXVA2_ConfigPictureDecode cfg;
+        } d3d9;
+    };
+    nvdec_pool_t         *out_pool;
+    nvdec_pool_owner_t   owner;
+#endif
 } decoder_sys_t;
 
 struct user_data_s
@@ -148,6 +190,978 @@ static void UpdateDecoderOutput(decoder_t *dec, const Dav1dSequenceHeader *seq_h
     }
 }
 
+#ifdef _WIN32
+struct dav1d_d3d11_ctx
+{
+    struct d3d11_pic_context     ctx;
+    ID3D11VideoDecoderOutputView *surface;
+    nvdec_pool_t                 *pool;
+};
+
+typedef struct
+{
+    ID3D11Texture2D              *texture;
+    unsigned                     slice_index;
+    ID3D11VideoDecoderOutputView *view;
+    ID3D11ShaderResourceView     *renderSrc[D3D11_MAX_SHADER_VIEW];
+} pool_picture_sys_d3d11;
+
+#define NVDEC_PICPOOLCTX_FROM_PICCTX(pic_ctx)  \
+    container_of(pic_ctx, struct dav1d_d3d11_ctx, ctx.s)
+
+static void ReleaseD3D11Sys(pool_picture_sys_d3d11 *dsys)
+{
+    for (int j=0; j<D3D11_MAX_SHADER_VIEW; j++)
+    {
+        if (dsys->renderSrc[j])
+            ID3D11ShaderResourceView_Release(dsys->renderSrc[j]);
+    }
+    ID3D11VideoDecoderOutputView_Release(dsys->view);
+    ID3D11Texture2D_Release(dsys->texture);
+    free(dsys);
+}
+
+static void d3d11_picture_CtxDestroy(picture_context_t *picctx)
+{
+    struct dav1d_d3d11_ctx *srcpic = NVDEC_PICPOOLCTX_FROM_PICCTX(picctx);
+    nvdec_pool_Release(srcpic->pool);
+    d3d11_pic_context_destroy(picctx);
+}
+
+static picture_context_t *d3d11_picture_CtxClone(picture_context_t *srcctx)
+{
+    struct dav1d_d3d11_ctx *clonectx = malloc(sizeof(*clonectx));
+    if (unlikely(clonectx == NULL))
+        return NULL;
+    struct dav1d_d3d11_ctx *srcpic = NVDEC_PICPOOLCTX_FROM_PICCTX(srcctx);
+
+    *clonectx = *srcpic;
+    nvdec_pool_AddRef(clonectx->pool);
+    picture_context_t *res;
+    res =  &clonectx->ctx.s;
+    AcquireD3D11PictureSys(&clonectx->ctx.picsys);
+    vlc_video_context_Hold(res->vctx);
+    return res;
+}
+
+static void PoolD3D11Release(nvdec_pool_owner_t *owner, void *buffers[], size_t pics_count)
+{
+    VLC_UNUSED(owner);
+    for (size_t i=0; i<pics_count; i++)
+        ReleaseD3D11Sys(buffers[i]);
+}
+
+static picture_context_t * PoolD3D11AttachPicture(nvdec_pool_owner_t *owner, nvdec_pool_t *pool, void *surface)
+{
+    VLC_UNUSED(surface);
+    decoder_sys_t *p_sys = container_of(owner, decoder_sys_t, owner);
+
+    struct dav1d_d3d11_ctx *picctx = calloc(1, sizeof(*picctx));
+    if (unlikely(!picctx))
+        return NULL;
+
+    pool_picture_sys_d3d11 *pool_picsys = surface;
+    picture_sys_d3d11_t * picsys = &picctx->ctx.picsys;
+
+    picctx->surface = pool_picsys->view;
+
+    for (int i=0; i<D3D11_MAX_SHADER_VIEW; i++)
+    {
+        picsys->texture[i]  = pool_picsys->texture;
+        picsys->renderSrc[i] = pool_picsys->renderSrc[i];
+    }
+    picsys->slice_index = pool_picsys->slice_index;
+
+    picctx->ctx.s = (picture_context_t) {
+        d3d11_picture_CtxDestroy,
+        d3d11_picture_CtxClone,
+        p_sys->vctx_out,
+    };
+    vlc_video_context_Hold(picctx->ctx.s.vctx);
+    AcquireD3D11PictureSys(picsys);
+
+    picctx->pool = pool;
+    nvdec_pool_AddRef(pool);
+    return &picctx->ctx.s;
+}
+
+static const d3d_format_t *D3D11_FindDXGIFormat(DXGI_FORMAT dxgi)
+{
+    for (const d3d_format_t *output_format = GetRenderFormatList();
+         output_format->name != NULL; ++output_format)
+    {
+        if (output_format->formatTexture == dxgi &&
+                is_d3d11_opaque(output_format->fourcc))
+        {
+            return output_format;
+        }
+    }
+    return NULL;
+}
+
+static void ReleaseD3D11Decoder(decoder_sys_t *sys)
+{
+    if (sys->d3d11.d3ddec)
+    {
+        ID3D11VideoDecoder_Release( sys->d3d11.d3ddec );
+        sys->d3d11.d3ddec = NULL;
+    }
+    sys->d3d11.decoderDesc = (D3D11_VIDEO_DECODER_DESC) { 0 };
+    if (sys->d3d11.waitCopies)
+    {
+        ID3D11Asynchronous_Release(sys->d3d11.waitCopies);
+        sys->d3d11.waitCopies = NULL;
+    }
+    if (sys->out_pool)
+    {
+        nvdec_pool_Release(sys->out_pool);
+        sys->out_pool = NULL;
+    }
+}
+
+
+static vlc_fourcc_t D3D11UpdateDecoder(decoder_t *dec, const Dav1dSequenceHeader *seq_hdr)
+{
+    decoder_sys_t *sys = dec->p_sys;
+    const d3d_format_t *render_fmt = NULL;
+
+    switch (seq_hdr->layout)
+    {
+        case DAV1D_PIXEL_LAYOUT_I420:
+            if (seq_hdr->hbd == 1)
+            {
+//                render_fmt = D3D11_FindDXGIFormat(DXGI_FORMAT_P016);
+                render_fmt = D3D11_FindDXGIFormat(DXGI_FORMAT_P010);
+                break;
+            }
+            if (seq_hdr->hbd == 0)
+            {
+                render_fmt = D3D11_FindDXGIFormat(DXGI_FORMAT_NV12);
+                break;
+            }
+            break;
+#if 0 // test with hardware that supports it
+        case DAV1D_PIXEL_LAYOUT_I422:
+            if (seq_hdr->hbd == 1)
+            {
+                render_fmt = D3D11_FindDXGIFormat(DXGI_FORMAT_Y216);
+                break;
+            }
+            if (seq_hdr->hbd == 0)
+            {
+                render_fmt = D3D11_FindDXGIFormat(DXGI_FORMAT_YUY2);
+                break;
+            }
+            break;
+        case DAV1D_PIXEL_LAYOUT_I444:
+            if (seq_hdr->hbd == 1)
+            {
+                render_fmt = D3D11_FindDXGIFormat(DXGI_FORMAT_Y416);
+                break;
+            }
+            if (seq_hdr->hbd == 0)
+            {
+                render_fmt = D3D11_FindDXGIFormat(DXGI_FORMAT_AYUV);
+                break;
+            }
+            break;
+#endif
+        default:
+            break;
+    }
+    if (render_fmt == NULL)
+    {
+        ReleaseD3D11Decoder(sys);
+        return 0;
+    }
+
+    D3D11_VIDEO_DECODER_DESC decoderDesc = {
+        .OutputFormat = render_fmt->formatTexture,
+        .SampleWidth = seq_hdr->max_width,
+        .SampleHeight = seq_hdr->max_height,
+    };
+    switch(seq_hdr->profile) {
+        case 0:
+            decoderDesc.Guid = DXVA_ModeAV1_VLD_Profile0;
+            break;
+        case 1:
+            decoderDesc.Guid = DXVA_ModeAV1_VLD_Profile1;
+            break;
+        case 2:
+            decoderDesc.Guid = DXVA_ModeAV1_VLD_Profile2;
+            break;
+        default:
+            msg_Dbg(dec, "unsupported profile %d with D3D11", seq_hdr->profile);
+            ReleaseD3D11Decoder(sys);
+            return 0;
+    }
+
+    if (memcmp(&sys->d3d11.decoderDesc, &decoderDesc, sizeof(decoderDesc)) == 0)
+        return render_fmt->fourcc;
+
+    // release the old decoder if there was one
+    ReleaseD3D11Decoder(sys);
+
+    HRESULT hr;
+    UINT cfg_count = 0;
+    hr = ID3D11VideoDevice_GetVideoDecoderConfigCount( sys->d3d11.vdevice, &decoderDesc, &cfg_count );
+    if (FAILED(hr))
+    {
+        msg_Err( dec, "Decoder not supported. (hr=0x%lX)", hr );
+        return 0;
+    }
+    int cfg_score = 0;
+    D3D11_VIDEO_DECODER_CONFIG *cfg = NULL;
+    D3D11_VIDEO_DECODER_CONFIG cfg_list[cfg_count];
+    for (UINT idx=0; idx < cfg_count; idx++)
+    {
+        hr = ID3D11VideoDevice_GetVideoDecoderConfig( sys->d3d11.vdevice, &decoderDesc, idx, &cfg_list[idx] );
+        if (FAILED(hr)) {
+            msg_Err(dec, "GetVideoDecoderConfig failed. (hr=0x%lX)", hr);
+        }
+
+        int score;
+        if (cfg_list[idx].ConfigBitstreamRaw == 1)
+            score = 1;
+        else
+            continue;
+        if (IsEqualGUID(&cfg_list[idx].guidConfigBitstreamEncryption, &DXVA2_NoEncrypt))
+            score += 16;
+
+        if (cfg_score < score) {
+            cfg = &cfg_list[idx];
+            cfg_score = score;
+        }
+    }
+    if (unlikely(cfg == NULL))
+    {
+        msg_Err( dec, "Found no suitable decoder configuration." );
+        return 0;
+    }
+
+    // create a new decoder
+    hr = ID3D11VideoDevice_CreateVideoDecoder( sys->d3d11.vdevice, &decoderDesc, cfg, &sys->d3d11.d3ddec );
+    if (FAILED(hr))
+    {
+        msg_Dbg(dec, "Failed to open D3D11 decoder");
+        return 0;
+    }
+
+    D3D11_TEXTURE2D_DESC texDesc;
+    ZeroMemory(&texDesc, sizeof(texDesc));
+    texDesc.Width = decoderDesc.SampleWidth;
+    texDesc.Height = decoderDesc.SampleHeight;
+    texDesc.MipLevels = 1;
+    texDesc.Format = decoderDesc.OutputFormat;
+    texDesc.SampleDesc.Count = 1;
+    texDesc.MiscFlags = 0;
+    texDesc.ArraySize = DECODER_SLICES;
+    texDesc.Usage = D3D11_USAGE_DEFAULT;
+    texDesc.BindFlags = D3D11_BIND_DECODER;
+    texDesc.CPUAccessFlags = 0;
+
+    if (DeviceSupportsFormat(sys->d3d11.d3d_dev->d3ddevice, texDesc.Format, D3D11_FORMAT_SUPPORT_SHADER_LOAD))
+        texDesc.BindFlags |= D3D11_BIND_SHADER_RESOURCE;
+
+    ID3D11Texture2D *p_texture;
+    hr = ID3D11Device_CreateTexture2D( sys->d3d11.d3d_dev->d3ddevice, &texDesc, NULL, &p_texture );
+    if (FAILED(hr)) {
+        msg_Err(dec, "CreateTexture2D with %d slices failed. (hr=0x%lX)", DECODER_SLICES, hr);
+        ID3D11VideoDecoder_Release(sys->d3d11.d3ddec);
+        sys->d3d11.d3ddec = NULL;
+        return 0;
+    }
+
+    D3D11_VIDEO_DECODER_OUTPUT_VIEW_DESC viewDesc = {
+        .DecodeProfile = decoderDesc.Guid,
+        .ViewDimension = D3D11_VDOV_DIMENSION_TEXTURE2D,
+    };
+
+    pool_picture_sys_d3d11 *init_picsys[DECODER_SLICES];
+    for (size_t i=0; i<ARRAY_SIZE(init_picsys); i++)
+    {
+        init_picsys[i] = calloc(1, sizeof(*init_picsys[i]));
+
+        viewDesc.Texture2D.ArraySlice = i;
+        hr = ID3D11VideoDevice_CreateVideoDecoderOutputView( sys->d3d11.vdevice,
+                                                            (ID3D11Resource*)p_texture,
+                                                            &viewDesc,
+                                                            &init_picsys[i]->view );
+        if (unlikely(FAILED(hr))) {
+            msg_Err(dec, "CreateVideoDecoderOutputView %zu failed. (hr=0x%lX)", i, hr);
+            return 0;
+        }
+
+        init_picsys[i]->slice_index = i;
+        init_picsys[i]->texture = p_texture;
+        ID3D11Texture2D_AddRef(init_picsys[i]->texture);
+        if (texDesc.BindFlags & D3D11_BIND_SHADER_RESOURCE)
+        {
+            ID3D11Texture2D *textures[D3D11_MAX_SHADER_VIEW] = {p_texture, p_texture, p_texture};
+            D3D11_AllocateResourceView(dec, sys->d3d11.d3d_dev->d3ddevice, render_fmt, textures, i,
+                                    init_picsys[i]->renderSrc);
+        }
+    }
+
+    sys->vctx_out = D3D11CreateVideoContext(sys->dec_dev, render_fmt->formatTexture);
+
+    dec->fmt_out.video.i_width  = seq_hdr->max_width;
+    dec->fmt_out.video.i_height = seq_hdr->max_height;
+    dec->fmt_out.video.i_chroma = render_fmt->fourcc;
+    // default value
+    dec->fmt_out.video.i_visible_width  = seq_hdr->max_width;
+    dec->fmt_out.video.i_visible_height = seq_hdr->max_height;
+
+    sys->owner = (nvdec_pool_owner_t) {
+        sys, PoolD3D11Release, PoolD3D11AttachPicture,
+    };
+    sys->out_pool = nvdec_pool_Create(&sys->owner, &dec->fmt_out.video, sys->vctx_out, (void**)init_picsys, DECODER_SLICES);
+    if (unlikely(sys->out_pool == NULL))
+    {
+        PoolD3D11Release(&sys->owner, (void**)init_picsys, DECODER_SLICES);
+        vlc_video_context_Release(sys->vctx_out);
+        sys->vctx_out = NULL;
+        return 0;
+    }
+
+    D3D11_QUERY_DESC query = { 0 };
+    query.Query = D3D11_QUERY_EVENT;
+    hr = ID3D11Device_CreateQuery(sys->d3d11.d3d_dev->d3ddevice, &query, (ID3D11Query**)&sys->d3d11.waitCopies);
+
+    ID3D11Texture2D_Release(p_texture);
+    msg_Dbg(dec, "ID3D11VideoDecoderOutputView succeed with %d slices (%dx%d)",
+            DECODER_SLICES, decoderDesc.SampleWidth, decoderDesc.SampleHeight);
+    sys->d3d11.decoderDesc = decoderDesc;
+
+    ID3D10Multithread *pMultithread;
+    hr = ID3D11Device_QueryInterface( sys->d3d11.d3d_dev->d3ddevice, &IID_ID3D10Multithread, (void **)&pMultithread);
+    if (SUCCEEDED(hr)) {
+        ID3D10Multithread_SetMultithreadProtected(pMultithread, TRUE);
+        ID3D10Multithread_Release(pMultithread);
+    }
+
+    return render_fmt->fourcc;
+}
+
+static int D3D11NewSequence(void *cookie, const Dav1dSequenceHeader *sequence_header)
+{
+    decoder_t *dec = cookie;
+    UpdateDecoderOutput(dec, sequence_header);
+
+    if (D3D11UpdateDecoder(dec, sequence_header) == 0)
+        return DAV1D_ERR(EAGAIN);
+    return 0;
+}
+
+static int D3D11Decoder(void *cookie, DXVA_PicParams_AV1 **picture_parameters, DXVA_Tile_AV1 **filled_tiles, const int n_tiles,
+                        Dav1dPicture *output_picture, Dav1dTileGroup *tile_groups, int tile_group_count)
+{
+    decoder_t *dec = cookie;
+    decoder_sys_t *sys = dec->p_sys;
+    HRESULT hr;
+    uint8_t* bitstream_target = NULL;
+    size_t bitstream_size = 0;
+    VLC_UNUSED(picture_parameters);
+    VLC_UNUSED(filled_tiles);
+    VLC_UNUSED(output_picture);
+
+    for (int i = 0; i < tile_group_count; i++)
+        bitstream_size += tile_groups[i].data.sz;
+
+    d3d11_device_lock( sys->d3d11.d3d_dev );
+
+    ID3D11VideoContext_ReleaseDecoderBuffer(sys->d3d11.video_context, sys->d3d11.d3ddec, D3D11_VIDEO_DECODER_BUFFER_PICTURE_PARAMETERS);
+    ID3D11VideoContext_ReleaseDecoderBuffer(sys->d3d11.video_context, sys->d3d11.d3ddec, D3D11_VIDEO_DECODER_BUFFER_SLICE_CONTROL);
+
+    uint32_t size_allocated = 0;
+    hr = ID3D11VideoContext_GetDecoderBuffer(sys->d3d11.video_context, sys->d3d11.d3ddec, D3D11_VIDEO_DECODER_BUFFER_BITSTREAM, &size_allocated, (void**)&bitstream_target);
+    if (FAILED(hr))
+    {
+        msg_Err(dec, "Failed to get the bitstream buffer (error 0x%lX).", hr);
+        d3d11_device_unlock( sys->d3d11.d3d_dev );
+        return DAV1D_ERR(ENOMEM);
+    }
+    if (size_allocated < (uint32_t)bitstream_size)
+    {
+        msg_Err(dec, "bitstream buffer too small %d needed %zu", size_allocated, bitstream_size);
+        ID3D11VideoContext_ReleaseDecoderBuffer(sys->d3d11.video_context, sys->d3d11.d3ddec, D3D11_VIDEO_DECODER_BUFFER_BITSTREAM);
+        d3d11_device_unlock( sys->d3d11.d3d_dev );
+        return DAV1D_ERR(ENOMEM);
+    }
+
+    // this is a GPU bitstream upload
+    for (int i = 0; i < tile_group_count; i++) {
+        const uint8_t *data = tile_groups[i].data.data;
+        size_t size = tile_groups[i].data.sz;
+        memcpy(bitstream_target, data, size);
+        bitstream_target += size;
+    }
+
+    ID3D11VideoContext_ReleaseDecoderBuffer(sys->d3d11.video_context, sys->d3d11.d3ddec, D3D11_VIDEO_DECODER_BUFFER_BITSTREAM);
+
+    ID3D11DeviceContext_End(sys->d3d11.d3d_dev->d3dcontext, sys->d3d11.waitCopies);
+
+    int maxWait = 10;
+    while (S_FALSE == ID3D11DeviceContext_GetData(sys->d3d11.d3d_dev->d3dcontext,
+                                                    sys->d3d11.waitCopies, NULL, 0, 0)
+            && --maxWait)
+    {
+        d3d11_device_unlock( sys->d3d11.d3d_dev );
+        SleepEx(2, TRUE);
+        d3d11_device_lock( sys->d3d11.d3d_dev );
+    }
+
+    D3D11_VIDEO_DECODER_BUFFER_DESC decodeDesc[3] = {0};
+    decodeDesc[0].BufferType = D3D11_VIDEO_DECODER_BUFFER_PICTURE_PARAMETERS;
+    decodeDesc[0].DataSize = sizeof(DXVA_PicParams_AV1);
+    decodeDesc[1].BufferType = D3D11_VIDEO_DECODER_BUFFER_SLICE_CONTROL;
+    decodeDesc[1].DataSize = sizeof(DXVA_Tile_AV1)*n_tiles;
+    decodeDesc[2].BufferType = D3D11_VIDEO_DECODER_BUFFER_BITSTREAM;
+    decodeDesc[2].DataSize = (UINT)bitstream_size;
+
+    hr = ID3D11VideoContext_SubmitDecoderBuffers(sys->d3d11.video_context, sys->d3d11.d3ddec, 3, decodeDesc);
+    if (FAILED(hr))
+    {
+        msg_Err(dec, "Failed to submit decoder buffers (error 0x%lX).", hr);
+        d3d11_device_unlock( sys->d3d11.d3d_dev );
+        return DAV1D_ERR(1);
+    }
+
+    d3d11_device_unlock( sys->d3d11.d3d_dev );
+
+    return 0;
+}
+
+static int D3D11Alloc(void *cookie, Dav1dPicture* picture, DXVA_PicParams_AV1 **pic, DXVA_Tile_AV1 **tiles, int n_tiles)
+{
+    decoder_t *dec = cookie;
+    decoder_sys_t *sys = dec->p_sys;
+    uint32_t size = 0;
+    picture_t *p_pic = picture->allocator_data;
+    struct dav1d_d3d11_ctx *pic_ctx = container_of(p_pic->context, struct dav1d_d3d11_ctx, ctx.s);
+
+    HRESULT hr;
+    int run = 0;
+    do {
+        d3d11_device_lock( sys->d3d11.d3d_dev );
+
+        hr = ID3D11VideoContext_DecoderBeginFrame(sys->d3d11.video_context, sys->d3d11.d3ddec, pic_ctx->surface, 0, NULL);
+        if (hr != E_PENDING || ++run > 50)
+            break;
+        d3d11_device_unlock( sys->d3d11.d3d_dev );
+        // vlc_tick_sleep(VLC_TICK_FROM_MS(10));
+        SleepEx(2, TRUE);
+    } while (1);
+
+    if(FAILED(hr))
+    {
+        msg_Err(dec, "Failed to start decoding into slice %d. (hr=0x%lX)", pic_ctx->ctx.picsys.slice_index, hr);
+        return -1;
+    }
+
+    // ID3D11DeviceContext_Begin(sys->d3d11.d3d_dev->d3dcontext, sys->d3d11.waitCopies);
+
+    hr = ID3D11VideoContext_GetDecoderBuffer(sys->d3d11.video_context, sys->d3d11.d3ddec, D3D11_VIDEO_DECODER_BUFFER_SLICE_CONTROL, &size, (void**)tiles);
+    if (FAILED(hr))
+    {
+        msg_Err(dec, "Failed to get the slice control buffer (error 0x%lX).", hr);
+        return DAV1D_ERR(ENOMEM);
+    }
+    if (unlikely(size < sizeof(DXVA_Tile_AV1) * n_tiles))
+    {
+        ID3D11VideoContext_ReleaseDecoderBuffer(sys->d3d11.video_context, sys->d3d11.d3ddec, D3D11_VIDEO_DECODER_BUFFER_SLICE_CONTROL);
+        return DAV1D_ERR(ENOMEM);
+    }
+
+    hr = ID3D11VideoContext_GetDecoderBuffer(sys->d3d11.video_context, sys->d3d11.d3ddec, D3D11_VIDEO_DECODER_BUFFER_PICTURE_PARAMETERS, &size, (void**)pic);
+    if (FAILED(hr))
+    {
+        msg_Err(dec, "Failed to get the picture param buffer (error 0x%lX).", hr);
+        ID3D11VideoContext_ReleaseDecoderBuffer(sys->d3d11.video_context, sys->d3d11.d3ddec, D3D11_VIDEO_DECODER_BUFFER_SLICE_CONTROL);
+        return DAV1D_ERR(ENOMEM);
+    }
+    if (unlikely(size < sizeof(DXVA_PicParams_AV1)))
+    {
+        ID3D11VideoContext_ReleaseDecoderBuffer(sys->d3d11.video_context, sys->d3d11.d3ddec, D3D11_VIDEO_DECODER_BUFFER_SLICE_CONTROL);
+        ID3D11VideoContext_ReleaseDecoderBuffer(sys->d3d11.video_context, sys->d3d11.d3ddec, D3D11_VIDEO_DECODER_BUFFER_PICTURE_PARAMETERS);
+        return DAV1D_ERR(ENOMEM);
+    }
+
+    d3d11_device_unlock( sys->d3d11.d3d_dev );
+
+    return 0;
+}
+
+static void D3D11Release(void *cookie, DXVA_PicParams_AV1 **picparams, DXVA_Tile_AV1 **tiles)
+{
+    decoder_t *dec = cookie;
+    decoder_sys_t *sys = dec->p_sys;
+    VLC_UNUSED(picparams);
+    VLC_UNUSED(tiles);
+
+    d3d11_device_lock( sys->d3d11.d3d_dev );
+
+    ID3D11VideoContext_DecoderEndFrame(sys->d3d11.video_context, sys->d3d11.d3ddec);
+
+    d3d11_device_unlock( sys->d3d11.d3d_dev );
+}
+
+#if !VLC_WINSTORE_APP
+struct dav1d_d3d9_ctx
+{
+    struct d3d9_pic_context     ctx;
+    size_t                      index;
+    nvdec_pool_t                *pool;
+};
+
+typedef struct
+{
+    IDirect3DSurface9            *texture;
+    unsigned                     slice_index;
+} pool_picture_sys_d3d9;
+
+#define NVDEC_PICPOOLCTX_FROM_PICCTX9(pic_ctx)  \
+    container_of(pic_ctx, struct dav1d_d3d9_ctx, ctx.s)
+
+static void ReleaseD3D9Sys(pool_picture_sys_d3d9 *dsys)
+{
+    IDirect3DSurface9_Release(dsys->texture);
+    free(dsys);
+}
+
+static void PoolD3D9Release(nvdec_pool_owner_t *owner, void *buffers[], size_t pics_count)
+{
+    VLC_UNUSED(owner);
+    for (size_t i=0; i<pics_count; i++)
+        ReleaseD3D9Sys(buffers[i]);
+}
+
+static void d3d9_picture_CtxDestroy(picture_context_t *picctx)
+{
+    struct dav1d_d3d9_ctx *srcpic = NVDEC_PICPOOLCTX_FROM_PICCTX9(picctx);
+    nvdec_pool_Release(srcpic->pool);
+    d3d9_pic_context_destroy(picctx);
+}
+
+static picture_context_t *d3d9_picture_CtxClone(picture_context_t *srcctx)
+{
+    struct dav1d_d3d9_ctx *clonectx = malloc(sizeof(*clonectx));
+    if (unlikely(clonectx == NULL))
+        return NULL;
+    struct dav1d_d3d9_ctx *srcpic = NVDEC_PICPOOLCTX_FROM_PICCTX9(srcctx);
+
+    *clonectx = *srcpic;
+    nvdec_pool_AddRef(clonectx->pool);
+    picture_context_t *res;
+    res = &clonectx->ctx.s;
+    AcquireD3D9PictureSys(&clonectx->ctx.picsys);
+    vlc_video_context_Hold(res->vctx);
+    return res;
+}
+static picture_context_t * PoolD3D9AttachPicture(nvdec_pool_owner_t *owner, nvdec_pool_t *pool, void *surface)
+{
+    VLC_UNUSED(surface);
+    decoder_sys_t *p_sys = container_of(owner, decoder_sys_t, owner);
+
+    struct dav1d_d3d9_ctx *picctx = calloc(1, sizeof(*picctx));
+    if (unlikely(!picctx))
+        return NULL;
+
+    pool_picture_sys_d3d9 *pool_picsys = surface;
+    picture_sys_d3d9_t * picsys = &picctx->ctx.picsys;
+
+    picctx->index = pool_picsys->slice_index;
+
+    picsys->surface = pool_picsys->texture;
+    AcquireD3D9PictureSys(picsys);
+
+    picctx->ctx.s = (picture_context_t) {
+        d3d9_picture_CtxDestroy,
+        d3d9_picture_CtxClone,
+        p_sys->vctx_out,
+    };
+    vlc_video_context_Hold(picctx->ctx.s.vctx);
+
+    picctx->pool = pool;
+    nvdec_pool_AddRef(pool);
+    return &picctx->ctx.s;
+}
+
+static void ReleaseD3D9Decoder(decoder_sys_t *sys)
+{
+    if (sys->out_pool)
+    {
+        nvdec_pool_Release(sys->out_pool);
+        sys->out_pool = NULL;
+    }
+}
+
+static vlc_fourcc_t D3D9UpdateDecoder(decoder_t *dec, const Dav1dSequenceHeader *seq_hdr)
+{
+    decoder_sys_t *sys = dec->p_sys;
+    vlc_fourcc_t fourcc;
+    D3DFORMAT render = 0;
+    switch (seq_hdr->layout)
+    {
+        case DAV1D_PIXEL_LAYOUT_I420:
+            if (seq_hdr->hbd == 1)
+            {
+//                render = MAKEFOURCC('P','0','1','6');
+                render = MAKEFOURCC('P','0','1','0');
+                fourcc = VLC_CODEC_D3D9_OPAQUE_10B;
+                break;
+            }
+            if (seq_hdr->hbd == 0)
+            {
+                render = MAKEFOURCC('N','V','1','2');
+                fourcc = VLC_CODEC_D3D9_OPAQUE;
+                break;
+            }
+            break;
+#if 0 // test with hardware that supports it
+        case DAV1D_PIXEL_LAYOUT_I422:
+            if (seq_hdr->hbd == 1)
+            {
+                render = MAKEFOURCC('Y','2','1','6');
+                break;
+            }
+            if (seq_hdr->hbd == 0)
+            {
+                render = MAKEFOURCC('Y','U','Y','2');
+                break;
+            }
+            break;
+        case DAV1D_PIXEL_LAYOUT_I444:
+            if (seq_hdr->hbd == 1)
+            {
+                render = MAKEFOURCC('Y','4','1','6');
+                break;
+            }
+            if (seq_hdr->hbd == 0)
+            {
+                render = MAKEFOURCC('A','Y','U','V');
+                break;
+            }
+            break;
+#endif
+        default:
+            break;
+    }
+    if (render == 0)
+    {
+        ReleaseD3D9Decoder(sys);
+        return 0;
+    }
+
+    const GUID *selected_decoder;
+    switch(seq_hdr->profile) {
+        case 0:
+            selected_decoder = &DXVA_ModeAV1_VLD_Profile0;
+            break;
+        case 1:
+            selected_decoder = &DXVA_ModeAV1_VLD_Profile1;
+            break;
+        case 2:
+            selected_decoder = &DXVA_ModeAV1_VLD_Profile2;
+            break;
+        default:
+            msg_Dbg(dec, "unsupported profile %d with D3D11", seq_hdr->profile);
+            ReleaseD3D9Decoder(sys);
+            return 0;
+    }
+
+    if (sys->d3d9.selected_decoder == selected_decoder &&
+        sys->d3d9.render == render &&
+        dec->fmt_out.video.i_width == (unsigned)seq_hdr->max_width &&
+        dec->fmt_out.video.i_height == (unsigned)seq_hdr->max_height)
+        return fourcc;
+
+    sys->d3d9.selected_decoder = selected_decoder;
+    sys->d3d9.render = render;
+
+    HRESULT hr;
+    IDirect3DSurface9 *hw_surfaces[DECODER_SLICES];
+    hr = IDirectXVideoDecoderService_CreateSurface(sys->d3d9.d3ddec,
+                                                   seq_hdr->max_width, seq_hdr->max_height,
+                                                   DECODER_SLICES - 1,
+                                                   sys->d3d9.render,
+                                                   D3DPOOL_DEFAULT,
+                                                   0,
+                                                   DXVA2_VideoDecoderRenderTarget,
+                                                   hw_surfaces,
+                                                   NULL);
+    if (FAILED(hr)) {
+        msg_Err(dec, "IDirectXVideoAccelerationService_CreateSurface %d failed (hr=0x%lX)", DECODER_SLICES, hr);
+        return 0;
+    }
+    msg_Dbg(dec, "IDirectXVideoAccelerationService_CreateSurface succeed with %d surfaces (%dx%d)",
+            DECODER_SLICES, seq_hdr->max_width, seq_hdr->max_height);
+
+    /* */
+    DXVA2_VideoDesc dsc;
+    ZeroMemory(&dsc, sizeof(dsc));
+    dsc.SampleWidth     = seq_hdr->max_width;
+    dsc.SampleHeight    = seq_hdr->max_height;
+    dsc.Format          = sys->d3d9.render;
+    if (dec->fmt_in.video.i_frame_rate > 0 && dec->fmt_in.video.i_frame_rate_base > 0) {
+        dsc.InputSampleFreq.Numerator   = dec->fmt_in.video.i_frame_rate;
+        dsc.InputSampleFreq.Denominator = dec->fmt_in.video.i_frame_rate_base;
+    } else {
+        dsc.InputSampleFreq.Numerator   = 0;
+        dsc.InputSampleFreq.Denominator = 0;
+    }
+    dsc.OutputFrameFreq = dsc.InputSampleFreq;
+    dsc.UABProtectionLevel = FALSE;
+    dsc.Reserved = 0;
+
+    /* FIXME I am unsure we can let unknown everywhere */
+    DXVA2_ExtendedFormat *ext = &dsc.SampleFormat;
+    ext->SampleFormat = 0;//DXVA2_SampleUnknown;
+    ext->VideoChromaSubsampling = 0;//DXVA2_VideoChromaSubsampling_Unknown;
+    ext->NominalRange = 0;//DXVA2_NominalRange_Unknown;
+    ext->VideoTransferMatrix = 0;//DXVA2_VideoTransferMatrix_Unknown;
+    ext->VideoLighting = 0;//DXVA2_VideoLighting_Unknown;
+    ext->VideoPrimaries = 0;//DXVA2_VideoPrimaries_Unknown;
+    ext->VideoTransferFunction = 0;//DXVA2_VideoTransFunc_Unknown;
+
+    /* List all configurations available for the decoder */
+    UINT                      cfg_count = 0;
+    DXVA2_ConfigPictureDecode *cfg_list = NULL;
+    hr = IDirectXVideoDecoderService_GetDecoderConfigurations(sys->d3d9.d3ddec,
+                                                              selected_decoder,
+                                                              &dsc,
+                                                              NULL,
+                                                              &cfg_count,
+                                                              &cfg_list);
+    if (FAILED(hr)) {
+        msg_Err(dec, "IDirectXVideoDecoderService_GetDecoderConfigurations failed. (hr=0x%lX)", hr);
+        goto error;
+    }
+    msg_Dbg(dec, "we got %d decoder configurations", cfg_count);
+
+    /* Select the best decoder configuration */
+    int cfg_score = 0;
+    for (unsigned i = 0; i < cfg_count; i++) {
+        const DXVA2_ConfigPictureDecode *cfg = &cfg_list[i];
+
+        /* */
+        msg_Dbg(dec, "configuration[%d] ConfigBitstreamRaw %d",
+                i, cfg->ConfigBitstreamRaw);
+
+        /* */
+        int score;
+        if (cfg->ConfigBitstreamRaw == 1)
+            score = 1;
+        else
+            continue;
+        if (IsEqualGUID(&cfg->guidConfigBitstreamEncryption, &DXVA2_NoEncrypt))
+            score += 16;
+
+        if (cfg_score < score) {
+            sys->d3d9.cfg = *cfg;
+            cfg_score = score;
+        }
+    }
+    CoTaskMemFree(cfg_list);
+    if (cfg_score <= 0) {
+        msg_Err(dec, "Failed to find a supported decoder configuration");
+        goto error;
+    }
+
+    /* Create the decoder */
+    /* adds a reference on each decoder surface */
+    if (FAILED(IDirectXVideoDecoderService_CreateVideoDecoder(sys->d3d9.d3ddec,
+                                                              selected_decoder,
+                                                              &dsc,
+                                                              &sys->d3d9.cfg,
+                                                              hw_surfaces,
+                                                              DECODER_SLICES,
+                                                              &sys->d3d9.decoder))) {
+        msg_Err(dec, "IDirectXVideoDecoderService_CreateVideoDecoder failed");
+        goto error;
+    }
+
+    msg_Dbg(dec, "IDirectXVideoDecoderService_CreateVideoDecoder succeed");
+
+    sys->vctx_out = vlc_video_context_Create(sys->dec_dev, VLC_VIDEO_CONTEXT_DXVA2,
+                                             sizeof(d3d9_video_context_t), &d3d9_vctx_ops);
+    d3d9_video_context_t *octx = GetD3D9ContextPrivate(sys->vctx_out);
+    octx->format = sys->d3d9.render;
+
+    dec->fmt_out.video.i_width = seq_hdr->max_width;
+    dec->fmt_out.video.i_height = seq_hdr->max_height;
+    dec->fmt_out.video.i_chroma = fourcc;
+
+    pool_picture_sys_d3d9 *init_picsys[DECODER_SLICES];
+    for (size_t i=0; i<ARRAY_SIZE(init_picsys); i++)
+    {
+        init_picsys[i] = malloc(sizeof(*init_picsys[i]));
+
+        init_picsys[i]->slice_index = i;
+        init_picsys[i]->texture = hw_surfaces[i];
+    }
+
+    sys->owner = (nvdec_pool_owner_t) {
+        sys, PoolD3D9Release, PoolD3D9AttachPicture,
+    };
+    sys->out_pool = nvdec_pool_Create(&sys->owner, &dec->fmt_out.video, sys->vctx_out, (void**)init_picsys, DECODER_SLICES);
+    if (unlikely(sys->out_pool == NULL))
+    {
+        PoolD3D9Release(&sys->owner, (void**)init_picsys, DECODER_SLICES);
+        vlc_video_context_Release(sys->vctx_out);
+        sys->vctx_out = NULL;
+        goto error;
+    }
+
+    return fourcc;
+error:
+    return 0;
+}
+
+static int D3D9NewSequence(void *cookie, const Dav1dSequenceHeader *sequence_header)
+{
+    decoder_t *dec = cookie;
+    UpdateDecoderOutput(dec, sequence_header);
+
+    if (D3D9UpdateDecoder(dec, sequence_header) == 0)
+        return DAV1D_ERR(EAGAIN);
+    return 0;
+}
+
+static int D3D9Decoder(void *cookie, DXVA_PicParams_AV1 **picture_parameters, DXVA_Tile_AV1 **filled_tiles, const int n_tiles,
+                        Dav1dPicture *output_picture, Dav1dTileGroup *tile_groups, int tile_group_count)
+{
+    decoder_t *dec = cookie;
+    decoder_sys_t *sys = dec->p_sys;
+    uint8_t* bitstream_target = NULL;
+    HRESULT hr;
+    size_t bitstream_size = 0;
+    VLC_UNUSED(picture_parameters);
+    VLC_UNUSED(filled_tiles);
+    VLC_UNUSED(output_picture);
+
+    IDirectXVideoDecoder_ReleaseBuffer(sys->d3d9.decoder, DXVA2_PictureParametersBufferType);
+    IDirectXVideoDecoder_ReleaseBuffer(sys->d3d9.decoder, DXVA2_SliceControlBufferType);
+
+    for (int i = 0; i < tile_group_count; i++)
+        bitstream_size += tile_groups[i].data.sz;
+
+    UINT size_allocated = 0;
+    hr = IDirectXVideoDecoder_GetBuffer(sys->d3d9.decoder, DXVA2_BitStreamDateBufferType, (void**)&bitstream_target, &size_allocated);
+    if (FAILED(hr))
+    {
+        msg_Err(dec, "Failed to get the bitstream buffer (error 0x%lX).", hr);
+        return DAV1D_ERR(ENOMEM);
+    }
+    if (size_allocated < (uint32_t)bitstream_size)
+    {
+        msg_Err(dec, "bitstream buffer too small %d needed %zu", size_allocated, bitstream_size);
+        IDirectXVideoDecoder_ReleaseBuffer(sys->d3d9.decoder, DXVA2_BitStreamDateBufferType);
+        return DAV1D_ERR(ENOMEM);
+    }
+
+    // this is a GPU bitstream upload
+    for (int i = 0; i < tile_group_count; i++) {
+        const uint8_t *data = tile_groups[i].data.data;
+        size_t size = tile_groups[i].data.sz;
+        memcpy(bitstream_target, data, size);
+        bitstream_target += size;
+    }
+
+    IDirectXVideoDecoder_ReleaseBuffer(sys->d3d9.decoder, DXVA2_BitStreamDateBufferType);
+
+    DXVA2_DecodeBufferDesc buffers[3] = {
+        {
+            .CompressedBufferType = DXVA2_PictureParametersBufferType,
+            .DataSize = sizeof(DXVA_PicParams_AV1),
+        },
+        {
+            .CompressedBufferType = DXVA2_SliceControlBufferType,
+            .DataSize = sizeof(DXVA_Tile_AV1) * n_tiles,
+        },
+        {
+            .CompressedBufferType = DXVA2_BitStreamDateBufferType,
+            .DataSize = bitstream_size,
+        },
+    };
+
+    DXVA2_DecodeExecuteParams exec = {
+        .NumCompBuffers = 3,
+        .pCompressedBuffers = buffers,
+    };
+    if (FAILED(IDirectXVideoDecoder_Execute(sys->d3d9.decoder, &exec)))
+    {
+        return DAV1D_ERR(1);
+    }
+
+    return 0;
+}
+
+static int D3D9Alloc(void *cookie, Dav1dPicture* picture, DXVA_PicParams_AV1 **pic, DXVA_Tile_AV1 **tiles, int n_tiles)
+{
+    decoder_t *dec = cookie;
+    decoder_sys_t *sys = dec->p_sys;
+    UINT size = 0;
+    picture_t *p_pic = picture->allocator_data;
+    struct dav1d_d3d9_ctx *pic_ctx = container_of(p_pic->context, struct dav1d_d3d9_ctx, ctx.s);
+
+    HRESULT hr;
+    int run = 0;
+    do {
+        hr = IDirectXVideoDecoder_BeginFrame(sys->d3d9.decoder, pic_ctx->ctx.picsys.surface, NULL);
+        if (hr != E_PENDING || ++run > 50)
+            break;
+        SleepEx(2, TRUE);
+    } while (1);
+
+    if(FAILED(hr))
+    {
+        msg_Err(dec, "Failed to start decoding into slice. (hr=0x%lX)", hr);
+        return -1;
+    }
+
+    hr = IDirectXVideoDecoder_GetBuffer(sys->d3d9.decoder, DXVA2_SliceControlBufferType, (void**)tiles, &size);
+    if (FAILED(hr))
+    {
+        msg_Err(dec, "Failed to get the slice control buffer (error 0x%lX).", hr);
+        return DAV1D_ERR(ENOMEM);
+    }
+    if (size < sizeof(DXVA_Tile_AV1) * n_tiles)
+    {
+        msg_Err(dec, "slice control buffer too small %d needed %zu", size, sizeof(DXVA_Tile_AV1) * n_tiles);
+        IDirectXVideoDecoder_ReleaseBuffer(sys->d3d9.decoder, DXVA2_BitStreamDateBufferType);
+        return DAV1D_ERR(ENOMEM);
+    }
+
+    hr = IDirectXVideoDecoder_GetBuffer(sys->d3d9.decoder, DXVA2_PictureParametersBufferType, (void**)pic, &size);
+    if (FAILED(hr))
+    {
+        msg_Err(dec, "Failed to get the picture param buffer (error 0x%lX).", hr);
+        return DAV1D_ERR(ENOMEM);
+    }
+    if (size < sizeof(DXVA_PicParams_AV1))
+    {
+        msg_Err(dec, "picture param buffer too small %d needed %zu", size, sizeof(DXVA_PicParams_AV1));
+        IDirectXVideoDecoder_ReleaseBuffer(sys->d3d9.decoder, DXVA2_BitStreamDateBufferType);
+        return DAV1D_ERR(ENOMEM);
+    }
+
+    memset(*pic, 0, sizeof(DXVA_PicParams_AV1));
+    return 0;
+}
+
+static void D3D9Release(void *cookie, DXVA_PicParams_AV1 **picparams, DXVA_Tile_AV1 **tiles)
+{
+    decoder_t *dec = cookie;
+    decoder_sys_t *sys = dec->p_sys;
+    VLC_UNUSED(picparams);
+    VLC_UNUSED(tiles);
+
+    IDirectXVideoDecoder_EndFrame(sys->d3d9.decoder, NULL);
+}
+#endif // !VLC_WINSTORE_APP
+#endif // _WIN32
+
 static int NewPicture(Dav1dPicture *img, void *cookie)
 {
     decoder_t *dec = cookie;
@@ -191,6 +1205,14 @@ static int NewPicture(Dav1dPicture *img, void *cookie)
     v->multiview_mode = dec->fmt_in.video.multiview_mode;
     v->pose = dec->fmt_in.video.pose;
     dec->fmt_out.i_codec = 0;
+#ifdef _WIN32
+    if (sys->dec_dev && sys->dec_dev->type == VLC_DECODER_DEVICE_D3D11VA)
+        dec->fmt_out.i_codec = D3D11UpdateDecoder(dec, img->seq_hdr);
+#if !VLC_WINSTORE_APP
+    else if (sys->dec_dev && sys->dec_dev->type == VLC_DECODER_DEVICE_DXVA2)
+        dec->fmt_out.i_codec = D3D9UpdateDecoder(dec, img->seq_hdr);
+#endif
+#endif
     if (dec->fmt_out.i_codec == 0)
     {
         dec->fmt_out.i_codec = FindVlcChroma(img);
@@ -202,6 +1224,39 @@ static int NewPicture(Dav1dPicture *img, void *cookie)
     if (decoder_UpdateVideoOutput(dec, sys->vctx_out) == 0)
     {
         picture_t *pic;
+#ifdef _WIN32
+        if (is_d3d11_opaque(v->i_chroma))
+        {
+            pic = nvdec_pool_Wait(sys->out_pool);
+            if (unlikely(pic==NULL))
+                return -1;
+            // TODO update the pool format when the source changes
+            pic->format.i_visible_width  = img->p.w;
+            pic->format.i_visible_height = img->p.h;
+
+            struct dav1d_d3d11_ctx *picctx = NVDEC_PICPOOLCTX_FROM_PICCTX(pic->context);
+
+            img->dxva_picture_index = picctx->ctx.picsys.slice_index;
+            img->data[0] = picctx->surface;
+        }
+#if !VLC_WINSTORE_APP
+        else if (is_d3d9_opaque(v->i_chroma))
+        {
+            pic = nvdec_pool_Wait(sys->out_pool);
+            if (unlikely(pic==NULL))
+                return -1;
+            // TODO update the pool format when the source changes
+            pic->format.i_visible_width  = img->p.w;
+            pic->format.i_visible_height = img->p.h;
+
+            struct dav1d_d3d9_ctx *picctx = NVDEC_PICPOOLCTX_FROM_PICCTX9(pic->context);
+
+            img->dxva_picture_index = picctx->index;
+            img->data[0] = picctx->ctx.picsys.surface;
+        }
+#endif
+        else
+#endif // _WIN32
         {
             pic = decoder_NewPicture(dec);
             if (unlikely(pic == NULL))
@@ -447,6 +1502,174 @@ static int OpenDecoder(vlc_object_t *p_this)
     dec->fmt_out.video.i_visible_width  = dec->fmt_out.video.i_width;
     dec->fmt_out.video.i_visible_height = dec->fmt_out.video.i_height;
 
+#ifdef _WIN32
+    if (sequence_hdr != NULL)
+    {
+        vlc_decoder_device *dec_dev = decoder_GetDecoderDevice( dec );
+        if (dec_dev)
+        {
+            Dav1dSequenceHeader seq_hdr;
+            const Dav1dSequenceHeader *test_seq_hdr = &seq_hdr;
+            unsigned width, height;
+            AV1_get_frame_max_dimensions(sequence_hdr, &width, &height);
+            seq_hdr.max_width  = width;
+            seq_hdr.max_height = height;
+
+            AV1_get_colorimetry(sequence_hdr, &dec->fmt_out.video.primaries, &dec->fmt_out.video.transfer,
+                                &dec->fmt_out.video.space, &dec->fmt_out.video.color_range);
+
+            vlc_fourcc_t chroma = AV1_get_chroma(sequence_hdr);
+            for (unsigned int i = 0; i < ARRAY_SIZE(chroma_table); i++)
+                if (chroma_table[i].i_chroma == chroma)
+                {
+                    seq_hdr.layout = chroma_table[i].i_chroma_id;
+                    seq_hdr.hbd = (chroma_table[i].i_bitdepth - 8) / 2;
+                    break;
+                }
+
+            int level,tier;
+            AV1_get_profile_level(sequence_hdr, &seq_hdr.profile, &level, &tier);
+
+            p_sys->out_pool = NULL;
+            p_sys->dec_dev = NULL;
+            p_sys->s.dxva.decode_callback = NULL;
+
+            d3d11_decoder_device_t *devsys = GetD3D11OpaqueDevice( dec_dev );
+            if (devsys)
+            {
+                p_sys->d3d11.vdevice = NULL;
+                p_sys->d3d11.video_context = NULL;
+                p_sys->d3d11.d3d_dev = NULL;
+                p_sys->d3d11.waitCopies = NULL;
+                p_sys->d3d11.d3ddec = NULL;
+                p_sys->d3d11.decoderDesc = (D3D11_VIDEO_DECODER_DESC) { 0 };
+
+                void *d3dviddev = NULL;
+                HRESULT hr;
+                hr = ID3D11Device_QueryInterface(devsys->d3d_dev.d3ddevice, &IID_ID3D11VideoDevice, &d3dviddev);
+                if (FAILED(hr)) {
+                    msg_Err(dec, "Could not Query ID3D11VideoDevice Interface. (hr=0x%lX)", hr);
+                    vlc_decoder_device_Release(dec_dev);
+                }
+                else
+                {
+                    void *d3dvidctx = NULL;
+                    hr = ID3D11Device_QueryInterface(devsys->d3d_dev.d3dcontext, &IID_ID3D11VideoContext, &d3dvidctx);
+                    if (FAILED(hr)) {
+                        msg_Err(dec, "Could not Query ID3D11VideoContext Interface. (hr=0x%lX)", hr);
+                        vlc_decoder_device_Release(dec_dev);
+                    }
+                    else
+                    {
+                        p_sys->d3d11.vdevice = d3dviddev;
+                        p_sys->d3d11.video_context = d3dvidctx;
+                        p_sys->d3d11.d3d_dev = &devsys->d3d_dev;
+                        p_sys->dec_dev = dec_dev;
+                        vlc_fourcc_t d3d_chroma = D3D11UpdateDecoder(dec, test_seq_hdr);
+
+                        if (d3d_chroma != 0)
+                        {
+                            msg_Dbg(dec, "Using D3D11VA");
+                            dec->fmt_out.i_codec = d3d_chroma;
+                            if (p_sys->s.n_frame_threads == 0)
+                                p_sys->s.n_frame_threads = 1;
+                            p_sys->s.dxva = (Dav1dDXVA) {
+                                dec, D3D11NewSequence, D3D11Decoder, D3D11Alloc, D3D11Release,
+                            };
+                        }
+                        else
+                        {
+                            p_sys->dec_dev = NULL;
+                            ID3D11VideoDevice_Release(p_sys->d3d11.vdevice);
+                            p_sys->d3d11.vdevice = NULL;
+
+                            ID3D11VideoContext_Release(p_sys->d3d11.video_context);
+                            p_sys->d3d11.video_context = NULL;
+                        }
+                    }
+                }
+            }
+#if !VLC_WINSTORE_APP
+            else
+            {
+                d3d9_decoder_device_t *devsys = GetD3D9OpaqueDevice( dec_dev );
+                if (devsys)
+                {
+                    p_sys->d3d9.dxva2_dll = LoadLibrary(TEXT("DXVA2.DLL"));
+                    if (!p_sys->d3d9.dxva2_dll) {
+                        msg_Err(dec, " OurDirect3DCreateDeviceManager9 failed");
+                    } else {
+                    HRESULT (WINAPI *CreateDeviceManager9)(UINT *pResetToken,
+                                                        IDirect3DDeviceManager9 **);
+                    CreateDeviceManager9 =
+                    (void *)GetProcAddress(p_sys->d3d9.dxva2_dll,
+                                            "DXVA2CreateDirect3DDeviceManager9");
+
+                    if (!CreateDeviceManager9) {
+                        msg_Err(dec, "cannot load function");
+                    } else {
+                        UINT token;
+                        if (FAILED(CreateDeviceManager9(&token, &p_sys->d3d9.devmng))) {
+                            msg_Err(dec, " OurDirect3DCreateDeviceManager9 failed");
+                        } else {
+                            HRESULT hr = IDirect3DDeviceManager9_ResetDevice(p_sys->d3d9.devmng, devsys->d3ddev.dev, token);
+                            if (FAILED(hr)) {
+                                msg_Err(dec, "IDirect3DDeviceManager9_ResetDevice failed: 0x%lX)", hr);
+                                IDirect3DDeviceManager9_Release(p_sys->d3d9.devmng);
+                            } else {
+                                hr = IDirect3DDeviceManager9_OpenDeviceHandle(p_sys->d3d9.devmng, &p_sys->d3d9.device);
+                                if (FAILED(hr)) {
+                                    msg_Err(dec, "OpenDeviceHandle failed");
+                                    IDirect3DDeviceManager9_Release(p_sys->d3d9.devmng);
+                                } else {
+                                    void *pv;
+                                    hr = IDirect3DDeviceManager9_GetVideoService(p_sys->d3d9.devmng, p_sys->d3d9.device,
+                                                                                &IID_IDirectXVideoDecoderService, &pv);
+                                    if (FAILED(hr)) {
+                                        msg_Err(dec, "GetVideoService failed");
+                                        IDirect3DDeviceManager9_CloseDeviceHandle(p_sys->d3d9.devmng, p_sys->d3d9.device);
+                                        p_sys->d3d9.device = NULL;
+                                        IDirect3DDeviceManager9_Release(p_sys->d3d9.devmng);
+                                        p_sys->d3d9.devmng = NULL;
+                                    } else {
+                                        p_sys->d3d9.d3ddec = pv;
+                                        p_sys->dec_dev = dec_dev;
+                                        vlc_fourcc_t d3d_chroma = D3D9UpdateDecoder(dec, test_seq_hdr);
+
+                                        if (d3d_chroma != 0)
+                                        {
+                                            msg_Dbg(dec, "Using DXVA2");
+                                            dec->fmt_out.i_codec = d3d_chroma;
+                                            if (p_sys->s.n_frame_threads == 0)
+                                                p_sys->s.n_frame_threads = 1;
+                                            p_sys->s.dxva = (Dav1dDXVA) {
+                                                dec, D3D9NewSequence, D3D9Decoder, D3D9Alloc, D3D9Release,
+                                            };
+                                        }
+                                        else
+                                        {
+                                            p_sys->dec_dev = NULL;
+                                            IDirectXVideoDecoderService_Release(p_sys->d3d9.d3ddec);
+                                            p_sys->d3d9.d3ddec = NULL;
+                                            IDirect3DDeviceManager9_CloseDeviceHandle(p_sys->d3d9.devmng, p_sys->d3d9.device);
+                                            p_sys->d3d9.device = NULL;
+                                            IDirect3DDeviceManager9_Release(p_sys->d3d9.devmng);
+                                            p_sys->d3d9.devmng = NULL;
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    }
+
+                }
+            }
+#endif // !VLC_WINSTORE_APP
+        }
+    }
+#endif // _WIN32
+
     if (p_sys->s.n_tile_threads == 0)
         p_sys->s.n_tile_threads = VLC_CLIP(vlc_GetCPUCount(), 1, 4);
     if (p_sys->s.n_frame_threads == 0)
@@ -507,6 +1730,33 @@ static void CloseDecoder(vlc_object_t *p_this)
     /* Flush decoder */
     FlushDecoder(dec);
 
+#ifdef _WIN32
+    if (p_sys->dec_dev && p_sys->dec_dev->type == VLC_DECODER_DEVICE_D3D11VA)
+    {
+        ReleaseD3D11Decoder(p_sys);
+
+        if (p_sys->d3d11.vdevice)
+            ID3D11VideoDevice_Release(p_sys->d3d11.vdevice);
+        if (p_sys->d3d11.video_context)
+            ID3D11VideoContext_Release(p_sys->d3d11.video_context);
+    }
+#if !VLC_WINSTORE_APP
+    if (p_sys->dec_dev && p_sys->dec_dev->type == VLC_DECODER_DEVICE_DXVA2)
+    {
+        ReleaseD3D9Decoder(p_sys);
+
+        if (p_sys->d3d9.decoder)
+            IDirectXVideoDecoder_Release(p_sys->d3d9.decoder);
+        if (p_sys->d3d9.d3ddec)
+            IDirectXVideoDecoderService_Release(p_sys->d3d9.d3ddec);
+        if (p_sys->d3d9.device)
+            IDirect3DDeviceManager9_CloseDeviceHandle(p_sys->d3d9.devmng, p_sys->d3d9.device);
+        if (p_sys->d3d9.devmng)
+            IDirect3DDeviceManager9_Release(p_sys->d3d9.devmng);
+    }
+#endif
+#endif
+
     if (p_sys->vctx_out)
         vlc_video_context_Release(p_sys->vctx_out);
     if (p_sys->dec_dev)
-- 
2.26.2



More information about the vlc-devel mailing list