[vlc-devel] [PATCH 2/2] dav1d: add DXVA 4:2:0 decoding support

Steve Lhomme robux4 at ycbcr.xyz
Thu Sep 3 15:44:04 CEST 2020


Tested on NVIDIA 3090 GPU.

Some code could be shared (in a library) with the other DXVA modules.

It's using an "nvdec_pool". The copy is copied from the nvdec module. It could
be turned into a library as well to factorize the code.
---
 modules/codec/Makefile.am |   10 +
 modules/codec/dav1d.c     | 1473 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 1470 insertions(+), 13 deletions(-)

diff --git a/modules/codec/Makefile.am b/modules/codec/Makefile.am
index 0aede611336..854e3001beb 100644
--- a/modules/codec/Makefile.am
+++ b/modules/codec/Makefile.am
@@ -557,6 +557,16 @@ libdav1d_plugin_la_CPPFLAGS = $(AM_CPPFLAGS) $(DAV1D_CFLAGS)
 libdav1d_plugin_la_CFLAGS = $(AM_CFLAGS)
 libdav1d_plugin_la_LDFLAGS = $(AM_LDFLAGS) -rpath '$(codecdir)'
 libdav1d_plugin_la_LIBADD = $(DAV1D_LIBS)
+if HAVE_WIN32 #d3d11va
+libdav1d_plugin_la_SOURCES += video_chroma/d3d11_fmt.c video_chroma/d3d11_fmt.h \
+	video_chroma/dxgi_fmt.c video_chroma/dxgi_fmt.h libd3d11_common.la
+libdav1d_plugin_la_LIBADD += $(LIBCOM)
+if HAVE_WINSTORE
+libdav1d_plugin_la_LIBADD += -ld3d11
+else
+libdav1d_plugin_la_SOURCES += video_chroma/d3d9_fmt.c video_chroma/d3d9_fmt.h
+endif
+endif
 EXTRA_LTLIBRARIES += libdav1d_plugin.la
 codec_LTLIBRARIES += $(LTLIBdav1d)
 
diff --git a/modules/codec/dav1d.c b/modules/codec/dav1d.c
index 92f6f441464..371d68ce7df 100644
--- a/modules/codec/dav1d.c
+++ b/modules/codec/dav1d.c
@@ -33,6 +33,8 @@
 #include <vlc_plugin.h>
 #include <vlc_codec.h>
 #include <vlc_timestamp_helper.h>
+#include <vlc_picture_pool.h>
+#include <vlc_atomic.h>
 
 #include <errno.h>
 #include <dav1d/dav1d.h>
@@ -40,6 +42,22 @@
 #include "../packetizer/iso_color_tables.h"
 #include "cc.h"
 
+
+#ifdef _WIN32
+#define COBJMACROS
+#include <initguid.h> /* must be last included to not redefine existing GUIDs */
+#include "../video_chroma/d3d11_fmt.h"
+#include "../video_chroma/d3d9_fmt.h"
+#include <dav1d/dxva_av1.h>
+#include <dxva2api.h>
+
+DEFINE_GUID(DXVA_ModeAV1_VLD_Profile0, 0xb8be4ccb, 0xcf53, 0x46ba, 0x8d, 0x59, 0xd6, 0xb8, 0xa6, 0xda, 0x5d, 0x2a);
+DEFINE_GUID(DXVA_ModeAV1_VLD_Profile1, 0x6936ff0f, 0x45b1, 0x4163, 0x9c, 0xc1, 0x64, 0x6e, 0xf6, 0x94, 0x61, 0x08);
+DEFINE_GUID(DXVA_ModeAV1_VLD_Profile2, 0x0c5f2aa1, 0xe541, 0x4089, 0xbb, 0x7b, 0x98, 0x11, 0x0a, 0x19, 0xd7, 0xc8);
+
+#define DECODER_SLICES  (10+1) // we only use one thread in DXVA mode
+#endif
+
 /****************************************************************************
  * Local prototypes
  ****************************************************************************/
@@ -70,14 +88,546 @@ vlc_module_begin ()
                 THREAD_TILES_TEXT, THREAD_TILES_LONGTEXT, false)
 vlc_module_end ()
 
+
+#if 0
+/* Codec capabilities GUID, sorted by codec */
+MS_GUID    (DXVA2_ModeMPEG2_MoComp,                 0xe6a9f44b, 0x61b0, 0x4563, 0x9e, 0xa4, 0x63, 0xd2, 0xa3, 0xc6, 0xfe, 0x66);
+MS_GUID    (DXVA2_ModeMPEG2_IDCT,                   0xbf22ad00, 0x03ea, 0x4690, 0x80, 0x77, 0x47, 0x33, 0x46, 0x20, 0x9b, 0x7e);
+MS_GUID    (DXVA2_ModeMPEG2_VLD,                    0xee27417f, 0x5e28, 0x4e65, 0xbe, 0xea, 0x1d, 0x26, 0xb5, 0x08, 0xad, 0xc9);
+DEFINE_GUID(DXVA_ModeMPEG1_A,                       0x1b81be09, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+DEFINE_GUID(DXVA_ModeMPEG2_A,                       0x1b81be0A, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+DEFINE_GUID(DXVA_ModeMPEG2_B,                       0x1b81be0B, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+DEFINE_GUID(DXVA_ModeMPEG2_C,                       0x1b81be0C, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+DEFINE_GUID(DXVA_ModeMPEG2_D,                       0x1b81be0D, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+DEFINE_GUID(DXVA2_ModeMPEG2and1_VLD,                0x86695f12, 0x340e, 0x4f04, 0x9f, 0xd3, 0x92, 0x53, 0xdd, 0x32, 0x74, 0x60);
+DEFINE_GUID(DXVA2_ModeMPEG1_VLD,                    0x6f3ec719, 0x3735, 0x42cc, 0x80, 0x63, 0x65, 0xcc, 0x3c, 0xb3, 0x66, 0x16);
+
+MS_GUID    (DXVA2_ModeH264_A,                       0x1b81be64, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+MS_GUID    (DXVA2_ModeH264_B,                       0x1b81be65, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+MS_GUID    (DXVA2_ModeH264_C,                       0x1b81be66, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+MS_GUID    (DXVA2_ModeH264_D,                       0x1b81be67, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+MS_GUID    (DXVA2_ModeH264_E,                       0x1b81be68, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+MS_GUID    (DXVA2_ModeH264_F,                       0x1b81be69, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+DEFINE_GUID(DXVA_ModeH264_VLD_Multiview,            0x9901CCD3, 0xca12, 0x4b7e, 0x86, 0x7a, 0xe2, 0x22, 0x3d, 0x92, 0x55, 0xc3); // MVC
+DEFINE_GUID(DXVA_ModeH264_VLD_WithFMOASO_NoFGT,     0xd5f04ff9, 0x3418, 0x45d8, 0x95, 0x61, 0x32, 0xa7, 0x6a, 0xae, 0x2d, 0xdd);
+DEFINE_GUID(DXVADDI_Intel_ModeH264_A,               0x604F8E64, 0x4951, 0x4c54, 0x88, 0xFE, 0xAB, 0xD2, 0x5C, 0x15, 0xB3, 0xD6);
+DEFINE_GUID(DXVADDI_Intel_ModeH264_C,               0x604F8E66, 0x4951, 0x4c54, 0x88, 0xFE, 0xAB, 0xD2, 0x5C, 0x15, 0xB3, 0xD6);
+DEFINE_GUID(DXVA_Intel_H264_NoFGT_ClearVideo,       0x604F8E68, 0x4951, 0x4c54, 0x88, 0xFE, 0xAB, 0xD2, 0x5C, 0x15, 0xB3, 0xD6);
+DEFINE_GUID(DXVA_ModeH264_VLD_NoFGT_Flash,          0x4245F676, 0x2BBC, 0x4166, 0xa0, 0xBB, 0x54, 0xE7, 0xB8, 0x49, 0xC3, 0x80);
+
+MS_GUID    (DXVA2_ModeWMV8_A,                       0x1b81be80, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+MS_GUID    (DXVA2_ModeWMV8_B,                       0x1b81be81, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+
+MS_GUID    (DXVA2_ModeWMV9_A,                       0x1b81be90, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+MS_GUID    (DXVA2_ModeWMV9_B,                       0x1b81be91, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+MS_GUID    (DXVA2_ModeWMV9_C,                       0x1b81be94, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+
+MS_GUID    (DXVA2_ModeVC1_A,                        0x1b81beA0, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+MS_GUID    (DXVA2_ModeVC1_B,                        0x1b81beA1, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+MS_GUID    (DXVA2_ModeVC1_C,                        0x1b81beA2, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+MS_GUID    (DXVA2_ModeVC1_D,                        0x1b81beA3, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+DEFINE_GUID(DXVA2_ModeVC1_D2010,                    0x1b81beA4, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5); // August 2010 update
+DEFINE_GUID(DXVA_Intel_VC1_ClearVideo,              0xBCC5DB6D, 0xA2B6, 0x4AF0, 0xAC, 0xE4, 0xAD, 0xB1, 0xF7, 0x87, 0xBC, 0x89);
+DEFINE_GUID(DXVA_Intel_VC1_ClearVideo_2,            0xE07EC519, 0xE651, 0x4CD6, 0xAC, 0x84, 0x13, 0x70, 0xCC, 0xEE, 0xC8, 0x51);
+
+DEFINE_GUID(DXVA_nVidia_MPEG4_ASP,                  0x9947EC6F, 0x689B, 0x11DC, 0xA3, 0x20, 0x00, 0x19, 0xDB, 0xBC, 0x41, 0x84);
+DEFINE_GUID(DXVA_ModeMPEG4pt2_VLD_Simple,           0xefd64d74, 0xc9e8, 0x41d7, 0xa5, 0xe9, 0xe9, 0xb0, 0xe3, 0x9f, 0xa3, 0x19);
+DEFINE_GUID(DXVA_ModeMPEG4pt2_VLD_AdvSimple_NoGMC,  0xed418a9f, 0x010d, 0x4eda, 0x9a, 0xe3, 0x9a, 0x65, 0x35, 0x8d, 0x8d, 0x2e);
+DEFINE_GUID(DXVA_ModeMPEG4pt2_VLD_AdvSimple_GMC,    0xab998b5b, 0x4258, 0x44a9, 0x9f, 0xeb, 0x94, 0xe5, 0x97, 0xa6, 0xba, 0xae);
+DEFINE_GUID(DXVA_ModeMPEG4pt2_VLD_AdvSimple_Avivo,  0x7C74ADC6, 0xe2ba, 0x4ade, 0x86, 0xde, 0x30, 0xbe, 0xab, 0xb4, 0x0c, 0xc1);
+
+DEFINE_GUID(DXVA_ModeHEVC_VLD_Main,                 0x5b11d51b, 0x2f4c, 0x4452,0xbc,0xc3,0x09,0xf2,0xa1,0x16,0x0c,0xc0);
+DEFINE_GUID(DXVA_ModeHEVC_VLD_Main10,               0x107af0e0, 0xef1a, 0x4d19,0xab,0xa8,0x67,0xa1,0x63,0x07,0x3d,0x13);
+
+DEFINE_GUID(DXVA_ModeHEVC_VLD_Main_Intel,           0x8c56eb1e, 0x2b47, 0x466f, 0x8d, 0x33, 0x7d, 0xbc, 0xd6, 0x3f, 0x3d, 0xf2);
+DEFINE_GUID(DXVA_ModeHEVC_VLD_Main10_Intel,         0x75fc75f7, 0xc589, 0x4a07, 0xa2, 0x5b, 0x72, 0xe0, 0x3b, 0x03, 0x83, 0xb3);
+DEFINE_GUID(DXVA_ModeHEVC_VLD_Main12_Intel,         0x8ff8a3aa, 0xc456, 0x4132, 0xb6, 0xef, 0x69, 0xd9, 0xdd, 0x72, 0x57, 0x1d);
+DEFINE_GUID(DXVA_ModeHEVC_VLD_Main422_10_Intel,     0xe484dcb8, 0xcac9, 0x4859, 0x99, 0xf5, 0x5c, 0x0d, 0x45, 0x06, 0x90, 0x89);
+DEFINE_GUID(DXVA_ModeHEVC_VLD_Main422_12_Intel,     0xc23dd857, 0x874b, 0x423c, 0xb6, 0xe0, 0x82, 0xce, 0xaa, 0x9b, 0x11, 0x8a);
+DEFINE_GUID(DXVA_ModeHEVC_VLD_Main444_Intel,        0x41a5af96, 0xe415, 0x4b0c, 0x9d, 0x03, 0x90, 0x78, 0x58, 0xe2, 0x3e, 0x78);
+DEFINE_GUID(DXVA_ModeHEVC_VLD_Main444_10_Intel,     0x6a6a81ba, 0x912a, 0x485d, 0xb5, 0x7f, 0xcc, 0xd2, 0xd3, 0x7b, 0x8d, 0x94);
+DEFINE_GUID(DXVA_ModeHEVC_VLD_Main444_12_Intel,     0x5b08e35d, 0x0c66, 0x4c51, 0xa6, 0xf1, 0x89, 0xd0, 0x0c, 0xb2, 0xc1, 0x97);
+
+DEFINE_GUID(DXVA_ModeH264_VLD_Stereo_Progressive_NoFGT,     0xd79be8da, 0x0cf1, 0x4c81,0xb8,0x2a,0x69,0xa4,0xe2,0x36,0xf4,0x3d);
+DEFINE_GUID(DXVA_ModeH264_VLD_Stereo_NoFGT,                 0xf9aaccbb, 0xc2b6, 0x4cfc,0x87,0x79,0x57,0x07,0xb1,0x76,0x05,0x52);
+DEFINE_GUID(DXVA_ModeH264_VLD_Multiview_NoFGT,              0x705b9d82, 0x76cf, 0x49d6,0xb7,0xe6,0xac,0x88,0x72,0xdb,0x01,0x3c);
+
+DEFINE_GUID(DXVA_ModeH264_VLD_SVC_Scalable_Baseline,                    0xc30700c4, 0xe384, 0x43e0, 0xb9, 0x82, 0x2d, 0x89, 0xee, 0x7f, 0x77, 0xc4);
+DEFINE_GUID(DXVA_ModeH264_VLD_SVC_Restricted_Scalable_Baseline,         0x9b8175d4, 0xd670, 0x4cf2, 0xa9, 0xf0, 0xfa, 0x56, 0xdf, 0x71, 0xa1, 0xae);
+DEFINE_GUID(DXVA_ModeH264_VLD_SVC_Scalable_High,                        0x728012c9, 0x66a8, 0x422f, 0x97, 0xe9, 0xb5, 0xe3, 0x9b, 0x51, 0xc0, 0x53);
+DEFINE_GUID(DXVA_ModeH264_VLD_SVC_Restricted_Scalable_High_Progressive, 0x8efa5926, 0xbd9e, 0x4b04, 0x8b, 0x72, 0x8f, 0x97, 0x7d, 0xc4, 0x4c, 0x36);
+
+DEFINE_GUID(DXVA_ModeH261_A,                        0x1b81be01, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+DEFINE_GUID(DXVA_ModeH261_B,                        0x1b81be02, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+
+DEFINE_GUID(DXVA_ModeH263_A,                        0x1b81be03, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+DEFINE_GUID(DXVA_ModeH263_B,                        0x1b81be04, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+DEFINE_GUID(DXVA_ModeH263_C,                        0x1b81be05, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+DEFINE_GUID(DXVA_ModeH263_D,                        0x1b81be06, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+DEFINE_GUID(DXVA_ModeH263_E,                        0x1b81be07, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+DEFINE_GUID(DXVA_ModeH263_F,                        0x1b81be08, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5);
+
+DEFINE_GUID(DXVA_ModeVP8_VLD,                       0x90b899ea, 0x3a62, 0x4705, 0x88, 0xb3, 0x8d, 0xf0, 0x4b, 0x27, 0x44, 0xe7);
+DEFINE_GUID(DXVA_ModeVP9_VLD_Profile0,              0x463707f8, 0xa1d0, 0x4585, 0x87, 0x6d, 0x83, 0xaa, 0x6d, 0x60, 0xb8, 0x9e);
+DEFINE_GUID(DXVA_ModeVP9_VLD_10bit_Profile2,        0xa4c749ef, 0x6ecf, 0x48aa, 0x84, 0x48, 0x50, 0xa7, 0xa1, 0x16, 0x5f, 0xf7);
+DEFINE_GUID(DXVA_ModeVP9_VLD_Intel,                 0x76988a52, 0xdf13, 0x419a, 0x8e, 0x64, 0xff, 0xcf, 0x4a, 0x33, 0x6c, 0xf5);
+
+enum {
+    AV_CODEC_ID_MPEG2VIDEO,
+    AV_CODEC_ID_MPEG1VIDEO,
+    AV_CODEC_ID_H264,
+    AV_CODEC_ID_VC1,
+    AV_CODEC_ID_WMV3,
+    AV_CODEC_ID_HEVC,
+    AV_CODEC_ID_VP9,
+    AV_CODEC_ID_AV1,
+};
+enum {
+    FF_PROFILE_MPEG2_SIMPLE,
+    FF_PROFILE_MPEG2_MAIN,
+    FF_PROFILE_H264_BASELINE,
+    FF_PROFILE_H264_CONSTRAINED_BASELINE,
+    FF_PROFILE_H264_MAIN,
+    FF_PROFILE_H264_HIGH,
+    FF_PROFILE_HEVC_MAIN,
+    FF_PROFILE_HEVC_MAIN_10,
+    FF_PROFILE_HEVC_REXT,
+    FF_PROFILE_VP9_0,
+    FF_PROFILE_VP9_2,
+    FF_PROFILE_AV1_0,
+    FF_PROFILE_AV1_1,
+    FF_PROFILE_AV1_2,
+    FF_PROFILE_UNKNOWN,
+};
+
+#define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO 1
+#define FF_DXVA2_WORKAROUND_HEVC_REXT        2
+
+static const int PROF_MPEG2_MAIN[]   = { FF_PROFILE_MPEG2_SIMPLE,
+                                         FF_PROFILE_MPEG2_MAIN,
+                                         FF_PROFILE_UNKNOWN };
+static const int PROF_H264_HIGH[]    = { FF_PROFILE_H264_BASELINE,
+                                         FF_PROFILE_H264_CONSTRAINED_BASELINE,
+                                         FF_PROFILE_H264_MAIN,
+                                         FF_PROFILE_H264_HIGH,
+                                         FF_PROFILE_UNKNOWN };
+static const int PROF_HEVC_MAIN[]    = { FF_PROFILE_HEVC_MAIN,
+                                         FF_PROFILE_UNKNOWN };
+static const int PROF_HEVC_MAIN10[]  = { FF_PROFILE_HEVC_MAIN,
+                                         FF_PROFILE_HEVC_MAIN_10,
+                                         FF_PROFILE_UNKNOWN };
+
+#ifdef FF_DXVA2_WORKAROUND_HEVC_REXT
+static const int PROF_HEVC_MAIN_REXT[]  = { FF_PROFILE_HEVC_REXT,
+                                            FF_PROFILE_UNKNOWN };
+#endif
+
+static const int PROF_VP9_MAIN[]    = { FF_PROFILE_VP9_0, FF_PROFILE_UNKNOWN };
+static const int PROF_VP9_10[]      = { FF_PROFILE_VP9_2, FF_PROFILE_UNKNOWN };
+
+static const int PROF_AV1_PROFILE0[] = { FF_PROFILE_AV1_0, FF_PROFILE_UNKNOWN };
+static const int PROF_AV1_PROFILE1[] = { FF_PROFILE_AV1_1, FF_PROFILE_UNKNOWN };
+static const int PROF_AV1_PROFILE2[] = { FF_PROFILE_AV1_2, FF_PROFILE_UNKNOWN };
+
+
+typedef struct {
+    const char   *name;
+    const GUID   *guid;
+    int           bit_depth;
+    struct {
+        uint8_t log2_chroma_w;
+        uint8_t log2_chroma_h;
+    };
+int codec;
+    const int    *p_profiles; // NULL or ends with 0
+    int           workaround;
+} directx_va_mode_t;
+
+/* XXX Prefered modes must come first */
+static const directx_va_mode_t DXVA_MODES[] = {
+    /* MPEG-1/2 */
+    { "MPEG-1 decoder, restricted profile A",                                         &DXVA_ModeMPEG1_A,                      8, {1, 1}, 0, NULL, 0 },
+    { "MPEG-2 decoder, restricted profile A",                                         &DXVA_ModeMPEG2_A,                      8, {1, 1}, 0, NULL, 0 },
+    { "MPEG-2 decoder, restricted profile B",                                         &DXVA_ModeMPEG2_B,                      8, {1, 1}, 0, NULL, 0 },
+    { "MPEG-2 decoder, restricted profile C",                                         &DXVA_ModeMPEG2_C,                      8, {1, 1}, 0, NULL, 0 },
+    { "MPEG-2 decoder, restricted profile D",                                         &DXVA_ModeMPEG2_D,                      8, {1, 1}, 0, NULL, 0 },
+
+    { "MPEG-2 variable-length decoder",                                               &DXVA2_ModeMPEG2_VLD,                   8, {1, 1}, AV_CODEC_ID_MPEG2VIDEO, PROF_MPEG2_MAIN, 0 },
+    { "MPEG-2 & MPEG-1 variable-length decoder",                                      &DXVA2_ModeMPEG2and1_VLD,               8, {1, 1}, AV_CODEC_ID_MPEG2VIDEO, PROF_MPEG2_MAIN, 0 },
+    { "MPEG-2 & MPEG-1 variable-length decoder",                                      &DXVA2_ModeMPEG2and1_VLD,               8, {1, 1}, AV_CODEC_ID_MPEG1VIDEO, NULL, 0 },
+    { "MPEG-2 motion compensation",                                                   &DXVA2_ModeMPEG2_MoComp,                8, {1, 1}, 0, NULL, 0 },
+    { "MPEG-2 inverse discrete cosine transform",                                     &DXVA2_ModeMPEG2_IDCT,                  8, {1, 1}, 0, NULL, 0 },
+
+    /* MPEG-1 http://download.microsoft.com/download/B/1/7/B172A3C8-56F2-4210-80F1-A97BEA9182ED/DXVA_MPEG1_VLD.pdf */
+    { "MPEG-1 variable-length decoder, no D pictures",                                &DXVA2_ModeMPEG1_VLD,                   8, {1, 1}, 0, NULL, 0 },
+
+    /* H.264 http://www.microsoft.com/downloads/details.aspx?displaylang=en&FamilyID=3d1c290b-310b-4ea2-bf76-714063a6d7a6 */
+    { "H.264 variable-length decoder, film grain technology",                         &DXVA2_ModeH264_F,                      8, {1, 1}, AV_CODEC_ID_H264, PROF_H264_HIGH, 0 },
+    { "H.264 variable-length decoder, no film grain technology",                      &DXVA2_ModeH264_E,                      8, {1, 1}, AV_CODEC_ID_H264, PROF_H264_HIGH, 0 },
+    { "H.264 variable-length decoder, no film grain technology (Intel ClearVideo)",   &DXVA_Intel_H264_NoFGT_ClearVideo,      8, {1, 1}, AV_CODEC_ID_H264, PROF_H264_HIGH, FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO },
+    { "H.264 variable-length decoder, no film grain technology, FMO/ASO",             &DXVA_ModeH264_VLD_WithFMOASO_NoFGT,    8, {1, 1}, AV_CODEC_ID_H264, PROF_H264_HIGH, 0 },
+    { "H.264 variable-length decoder, no film grain technology, Flash",               &DXVA_ModeH264_VLD_NoFGT_Flash,         8, {1, 1}, AV_CODEC_ID_H264, PROF_H264_HIGH, 0 },
+
+    { "H.264 inverse discrete cosine transform, film grain technology",               &DXVA2_ModeH264_D,                      8, {1, 1}, 0, NULL, 0 },
+    { "H.264 inverse discrete cosine transform, no film grain technology",            &DXVA2_ModeH264_C,                      8, {1, 1}, 0, NULL, 0 },
+    { "H.264 inverse discrete cosine transform, no film grain technology (Intel)",    &DXVADDI_Intel_ModeH264_C,              8, {1, 1}, 0, NULL, 0 },
+
+    { "H.264 motion compensation, film grain technology",                             &DXVA2_ModeH264_B,                      8, {1, 1}, 0, NULL, 0 },
+    { "H.264 motion compensation, no film grain technology",                          &DXVA2_ModeH264_A,                      8, {1, 1}, 0, NULL, 0 },
+    { "H.264 motion compensation, no film grain technology (Intel)",                  &DXVADDI_Intel_ModeH264_A,              8, {1, 1}, 0, NULL, 0 },
+
+    /* http://download.microsoft.com/download/2/D/0/2D02E72E-7890-430F-BA91-4A363F72F8C8/DXVA_H264_MVC.pdf */
+    { "H.264 stereo high profile, mbs flag set",                                      &DXVA_ModeH264_VLD_Stereo_Progressive_NoFGT, 8, {1, 1}, 0, NULL, 0 },
+    { "H.264 stereo high profile",                                                    &DXVA_ModeH264_VLD_Stereo_NoFGT,             8, {1, 1}, 0, NULL, 0 },
+    { "H.264 multiview high profile",                                                 &DXVA_ModeH264_VLD_Multiview_NoFGT,          8, {1, 1}, 0, NULL, 0 },
+
+    /* SVC http://download.microsoft.com/download/C/8/A/C8AD9F1B-57D1-4C10-85A0-09E3EAC50322/DXVA_SVC_2012_06.pdf */
+    { "H.264 scalable video coding, Scalable Baseline Profile",                       &DXVA_ModeH264_VLD_SVC_Scalable_Baseline,            8, {1, 1}, 0, NULL, 0 },
+    { "H.264 scalable video coding, Scalable Constrained Baseline Profile",           &DXVA_ModeH264_VLD_SVC_Restricted_Scalable_Baseline, 8, {1, 1}, 0, NULL, 0 },
+    { "H.264 scalable video coding, Scalable High Profile",                           &DXVA_ModeH264_VLD_SVC_Scalable_High,                8, {1, 1}, 0, NULL, 0 },
+    { "H.264 scalable video coding, Scalable Constrained High Profile",               &DXVA_ModeH264_VLD_SVC_Restricted_Scalable_High_Progressive, 8, {1, 1}, 0, NULL, 0 },
+
+    /* WMV */
+    { "Windows Media Video 8 motion compensation",                                    &DXVA2_ModeWMV8_B,                      8, {1, 1}, 0, NULL, 0 },
+    { "Windows Media Video 8 post processing",                                        &DXVA2_ModeWMV8_A,                      8, {1, 1}, 0, NULL, 0 },
+
+    { "Windows Media Video 9 IDCT",                                                   &DXVA2_ModeWMV9_C,                      8, {1, 1}, 0, NULL, 0 },
+    { "Windows Media Video 9 motion compensation",                                    &DXVA2_ModeWMV9_B,                      8, {1, 1}, 0, NULL, 0 },
+    { "Windows Media Video 9 post processing",                                        &DXVA2_ModeWMV9_A,                      8, {1, 1}, 0, NULL, 0 },
+
+    /* VC-1 */
+    { "VC-1 variable-length decoder",                                                 &DXVA2_ModeVC1_D,                       8, {1, 1}, AV_CODEC_ID_VC1, NULL, 0 },
+    { "VC-1 variable-length decoder",                                                 &DXVA2_ModeVC1_D,                       8, {1, 1}, AV_CODEC_ID_WMV3, NULL, 0 },
+    { "VC-1 variable-length decoder",                                                 &DXVA2_ModeVC1_D2010,                   8, {1, 1}, AV_CODEC_ID_VC1, NULL, 0 },
+    { "VC-1 variable-length decoder",                                                 &DXVA2_ModeVC1_D2010,                   8, {1, 1}, AV_CODEC_ID_WMV3, NULL, 0 },
+    { "VC-1 variable-length decoder 2 (Intel)",                                       &DXVA_Intel_VC1_ClearVideo_2,           8, {1, 1}, 0, NULL, 0 },
+    { "VC-1 variable-length decoder (Intel)",                                         &DXVA_Intel_VC1_ClearVideo,             8, {1, 1}, 0, NULL, 0 },
+
+    { "VC-1 inverse discrete cosine transform",                                       &DXVA2_ModeVC1_C,                       8, {1, 1}, 0, NULL, 0 },
+    { "VC-1 motion compensation",                                                     &DXVA2_ModeVC1_B,                       8, {1, 1}, 0, NULL, 0 },
+    { "VC-1 post processing",                                                         &DXVA2_ModeVC1_A,                       8, {1, 1}, 0, NULL, 0 },
+
+    /* Xvid/Divx: TODO */
+    { "MPEG-4 Part 2 nVidia bitstream decoder",                                       &DXVA_nVidia_MPEG4_ASP,                 8, {1, 1}, 0, NULL, 0 },
+    { "MPEG-4 Part 2 variable-length decoder, Simple Profile",                        &DXVA_ModeMPEG4pt2_VLD_Simple,          8, {1, 1}, 0, NULL, 0 },
+    { "MPEG-4 Part 2 variable-length decoder, Simple&Advanced Profile, no GMC",       &DXVA_ModeMPEG4pt2_VLD_AdvSimple_NoGMC, 8, {1, 1}, 0, NULL, 0 },
+    { "MPEG-4 Part 2 variable-length decoder, Simple&Advanced Profile, GMC",          &DXVA_ModeMPEG4pt2_VLD_AdvSimple_GMC,   8, {1, 1}, 0, NULL, 0 },
+    { "MPEG-4 Part 2 variable-length decoder, Simple&Advanced Profile, Avivo",        &DXVA_ModeMPEG4pt2_VLD_AdvSimple_Avivo, 8, {1, 1}, 0, NULL, 0 },
+
+    /* HEVC */
+    // Intel specific GUID support
+    { "HEVC Main profile (Intel)",                                                    &DXVA_ModeHEVC_VLD_Main_Intel,           8, {1, 1}, AV_CODEC_ID_HEVC, PROF_HEVC_MAIN, 0 },
+    { "HEVC Main 10 profile (Intel)",                                                 &DXVA_ModeHEVC_VLD_Main10_Intel,        10, {1, 1}, AV_CODEC_ID_HEVC, PROF_HEVC_MAIN10, 0 },
+#ifdef FF_DXVA2_WORKAROUND_HEVC_REXT
+    { "HEVC Main profile 4:2:2 Range Extension (Intel)",                              &DXVA_ModeHEVC_VLD_Main12_Intel,         8, {1, 0}, AV_CODEC_ID_HEVC, PROF_HEVC_MAIN_REXT, FF_DXVA2_WORKAROUND_HEVC_REXT },
+    { "HEVC Main 10 profile 4:2:2 Range Extension (Intel)",                           &DXVA_ModeHEVC_VLD_Main422_10_Intel,    10, {1, 0}, AV_CODEC_ID_HEVC, PROF_HEVC_MAIN_REXT, FF_DXVA2_WORKAROUND_HEVC_REXT },
+    { "HEVC Main 12 profile 4:2:2 Range Extension (Intel)",                           &DXVA_ModeHEVC_VLD_Main422_12_Intel,    12, {1, 0}, AV_CODEC_ID_HEVC, PROF_HEVC_MAIN_REXT, FF_DXVA2_WORKAROUND_HEVC_REXT },
+    { "HEVC Main profile 4:4:4 Range Extension (Intel)",                              &DXVA_ModeHEVC_VLD_Main444_Intel,        8, {0, 0}, AV_CODEC_ID_HEVC, PROF_HEVC_MAIN_REXT, FF_DXVA2_WORKAROUND_HEVC_REXT },
+    { "HEVC Main 10 profile 4:4:4 Range Extension (Intel)",                           &DXVA_ModeHEVC_VLD_Main444_10_Intel,    10, {0, 0}, AV_CODEC_ID_HEVC, PROF_HEVC_MAIN_REXT, FF_DXVA2_WORKAROUND_HEVC_REXT },
+    { "HEVC Main 12 profile 4:4:4 Range Extension (Intel)",                           &DXVA_ModeHEVC_VLD_Main444_12_Intel,    12, {0, 0}, AV_CODEC_ID_HEVC, PROF_HEVC_MAIN_REXT, FF_DXVA2_WORKAROUND_HEVC_REXT },
+#endif
+    { "HEVC Main profile",                                                            &DXVA_ModeHEVC_VLD_Main,                8, {1, 1}, AV_CODEC_ID_HEVC, PROF_HEVC_MAIN, 0 },
+    { "HEVC Main 10 profile",                                                         &DXVA_ModeHEVC_VLD_Main10,              10, {1, 1}, AV_CODEC_ID_HEVC, PROF_HEVC_MAIN10, 0 },
+
+    /* H.261 */
+    { "H.261 decoder, restricted profile A",                                          &DXVA_ModeH261_A,                       8, {1, 1}, 0, NULL, 0 },
+    { "H.261 decoder, restricted profile B",                                          &DXVA_ModeH261_B,                       8, {1, 1}, 0, NULL, 0 },
+
+    /* H.263 */
+    { "H.263 decoder, restricted profile A",                                          &DXVA_ModeH263_A,                       8, {1, 1}, 0, NULL, 0 },
+    { "H.263 decoder, restricted profile B",                                          &DXVA_ModeH263_B,                       8, {1, 1}, 0, NULL, 0 },
+    { "H.263 decoder, restricted profile C",                                          &DXVA_ModeH263_C,                       8, {1, 1}, 0, NULL, 0 },
+    { "H.263 decoder, restricted profile D",                                          &DXVA_ModeH263_D,                       8, {1, 1}, 0, NULL, 0 },
+    { "H.263 decoder, restricted profile E",                                          &DXVA_ModeH263_E,                       8, {1, 1}, 0, NULL, 0 },
+    { "H.263 decoder, restricted profile F",                                          &DXVA_ModeH263_F,                       8, {1, 1}, 0, NULL, 0 },
+
+    /* VPx */
+    { "VP8",                                                                          &DXVA_ModeVP8_VLD,                      8, {1, 1}, 0, NULL, 0 },
+    { "VP9 profile 0",                                                                &DXVA_ModeVP9_VLD_Profile0,             8, {1, 1}, AV_CODEC_ID_VP9, PROF_VP9_MAIN, 0 },
+    { "VP9 profile 2",                                                                &DXVA_ModeVP9_VLD_10bit_Profile2,       10, {1, 1}, AV_CODEC_ID_VP9, PROF_VP9_10, 0 },
+    { "VP9 profile Intel",                                                            &DXVA_ModeVP9_VLD_Intel,                8, {1, 1}, 0, NULL, 0 },
+
+    { "AV1 Profile 0",                                                                &DXVA_ModeAV1_VLD_Profile0,             8, {1, 1}, AV_CODEC_ID_AV1, PROF_AV1_PROFILE0, 0},
+    { "AV1 Profile 1",                                                                &DXVA_ModeAV1_VLD_Profile1,             8, {1, 1}, AV_CODEC_ID_AV1, PROF_AV1_PROFILE1, 0},
+    { "AV1 Profile 2",                                                                &DXVA_ModeAV1_VLD_Profile2,             8, {1, 1}, AV_CODEC_ID_AV1, PROF_AV1_PROFILE2, 0},
+
+    { NULL, NULL, 0, {0, 0}, 0, NULL, 0 }
+};
+#endif
+
 /*****************************************************************************
  * decoder_sys_t: libaom decoder descriptor
  *****************************************************************************/
+#ifdef _WIN32
+struct dav1d_d3d11_ctx
+{
+    struct d3d11_pic_context     ctx;
+    ID3D11VideoDecoderOutputView *surface;
+};
+
+struct dav1d_d3d9_ctx
+{
+    struct d3d9_pic_context     ctx;
+    size_t                      index;
+};
+
+typedef struct
+{
+    ID3D11Texture2D              *texture;
+    unsigned                     slice_index;
+    ID3D11VideoDecoderOutputView *view;
+    ID3D11ShaderResourceView     *renderSrc[D3D11_MAX_SHADER_VIEW];
+} pool_picture_sys_d3d11;
+
+typedef struct
+{
+    IDirect3DSurface9            *texture;
+    unsigned                     slice_index;
+} pool_picture_sys_d3d9;
+
+typedef struct nvdec_pool_pic_ctx
+{
+    union {
+        struct dav1d_d3d11_ctx   d3d11;
+        struct dav1d_d3d9_ctx    d3d9;
+    };
+} nvdec_pool_pic_ctx;
+
+
+typedef struct pic_pool_context_nvdec_t {
+    nvdec_pool_pic_ctx  ctx;
+    struct nvdec_pool_t *pool;
+} pic_pool_context_nvdec_t;
+
+typedef struct nvdec_pool_t {
+    vlc_video_context           *vctx;
+    size_t                      pool_size;
+#if !VLC_WINSTORE_APP
+    bool                        is_d3d9;
+#endif
+
+    void                        *res[DECODER_SLICES];
+    picture_pool_t              *picture_pool;
+
+    vlc_atomic_rc_t             rc;
+} nvdec_pool_t;
+
+#define NVDEC_PICPOOLCTX_FROM_PICCTX(pic_ctx)  \
+    container_of(pic_ctx, pic_pool_context_nvdec_t, ctx.d3d11.ctx.s)
+#define NVDEC_PICPOOLCTX_FROM_PICCTX9(pic_ctx)  \
+    container_of(pic_ctx, pic_pool_context_nvdec_t, ctx.d3d9.ctx.s)
+
+static void ReleaseD3D9Sys(pool_picture_sys_d3d9 *dsys)
+{
+    IDirect3DSurface9_Release(dsys->texture);
+    free(dsys);
+}
+
+static void ReleaseD3D11Sys(pool_picture_sys_d3d11 *dsys)
+{
+    for (int j=0; j<D3D11_MAX_SHADER_VIEW; j++)
+    {
+        if (dsys->renderSrc[j])
+            ID3D11ShaderResourceView_Release(dsys->renderSrc[j]);
+    }
+    ID3D11VideoDecoderOutputView_Release(dsys->view);
+    ID3D11Texture2D_Release(dsys->texture);
+    free(dsys);
+}
+
+static void nvdec_pool_Destroy(nvdec_pool_t *pool)
+{
+#if !VLC_WINSTORE_APP
+    if (pool->is_d3d9)
+    {
+        for (size_t i=0; i<pool->pool_size; i++)
+        {
+            ReleaseD3D9Sys(pool->res[i]);
+        }
+    }
+    else
+#endif
+    {
+        for (size_t i=0; i<pool->pool_size; i++)
+        {
+            ReleaseD3D11Sys(pool->res[i]);
+        }
+    }
+
+    picture_pool_Release(pool->picture_pool);
+    vlc_video_context_Release(pool->vctx);
+}
+
+static void nvdec_pool_AddRef(nvdec_pool_t *pool)
+{
+    vlc_atomic_rc_inc(&pool->rc);
+}
+
+static void nvdec_pool_Release(nvdec_pool_t *pool)
+{
+    if (!vlc_atomic_rc_dec(&pool->rc))
+        return;
+
+    nvdec_pool_Destroy(pool);
+}
+
+static void nvdec_picture_CtxDestroy(picture_context_t *picctx)
+{
+    pic_pool_context_nvdec_t *srcpic = NVDEC_PICPOOLCTX_FROM_PICCTX(picctx);
+    nvdec_pool_Release(srcpic->pool);
+#if !VLC_WINSTORE_APP
+    if (srcpic->pool->is_d3d9)
+        d3d9_pic_context_destroy(picctx);
+    else
+#endif
+        d3d11_pic_context_destroy(picctx);
+}
+
+static picture_context_t *nvdec_picture_CtxClone(picture_context_t *srcctx)
+{
+    pic_pool_context_nvdec_t *clonectx = malloc(sizeof(*clonectx));
+    if (unlikely(clonectx == NULL))
+        return NULL;
+    pic_pool_context_nvdec_t *srcpic = NVDEC_PICPOOLCTX_FROM_PICCTX(srcctx);
+
+    *clonectx = *srcpic;
+    nvdec_pool_AddRef(clonectx->pool);
+    picture_context_t *res;
+#if !VLC_WINSTORE_APP
+    if (clonectx->pool->is_d3d9)
+    {
+        res = &clonectx->ctx.d3d9.ctx.s;
+        AcquireD3D9PictureSys(&clonectx->ctx.d3d9.ctx.picsys);
+    }
+    else
+#endif
+    {
+        res =  &clonectx->ctx.d3d11.ctx.s;
+        AcquireD3D11PictureSys(&clonectx->ctx.d3d11.ctx.picsys);
+    }
+    vlc_video_context_Hold(res->vctx);
+    return res;
+}
+
+static nvdec_pool_t* nvdec_pool_Create(vlc_video_context *vctx,
+                                       const video_format_t *fmt,
+                                       size_t pool_size,
+                                       void *pic_sys[],
+                                       bool is_d3d9)
+{
+    picture_t *pics[pool_size];
+    size_t init_pic = 0;
+
+    nvdec_pool_t *pool = calloc(1, sizeof(*pool));
+    if (!pool)
+        goto error;
+#if !VLC_WINSTORE_APP
+    pool->is_d3d9 = is_d3d9;
+#else
+    VLC_UNUSED(is_d3d9);
+#endif
+    pool->pool_size = pool_size;
+
+    for (init_pic=0; init_pic < pool_size; init_pic++)
+    {
+        picture_resource_t res = { 0 };
+        pics[init_pic] = picture_NewFromResource(fmt, &res);
+        if (!pics[init_pic])
+            goto free_pool;
+        pics[init_pic]->p_sys = pic_sys[init_pic];
+        pool->res[init_pic] = pic_sys[init_pic];
+    }
+
+    pool->picture_pool = picture_pool_New(pool_size, pics);
+    if (!pool->picture_pool)
+        goto free_pool;
+
+    pool->vctx = vctx;
+    vlc_video_context_Hold(pool->vctx);
+
+    vlc_atomic_rc_init(&pool->rc);
+    return pool;
+
+free_pool:
+    for (size_t i=0; i < init_pic; i++)
+        picture_Release(pics[i]);
+error:
+    if (pool)
+        free(pool);
+    return NULL;
+}
+
+static picture_t* nvdec_pool_Wait(nvdec_pool_t *pool)
+{
+    picture_t *pic = picture_pool_Wait(pool->picture_pool);
+    if (!pic)
+        return NULL;
+
+    pic_pool_context_nvdec_t *picctx = calloc(1, sizeof(*picctx));
+    if (!picctx)
+        goto error;
+
+#if !VLC_WINSTORE_APP
+    if (pool->is_d3d9)
+        pic->context = &picctx->ctx.d3d9.ctx.s;
+    else
+#endif
+        pic->context = &picctx->ctx.d3d11.ctx.s;
+    *pic->context = (picture_context_t) {
+        nvdec_picture_CtxDestroy,
+        nvdec_picture_CtxClone,
+        pool->vctx,
+    };
+    vlc_video_context_Hold(pool->vctx);
+
+    picctx->pool = pool;
+    nvdec_pool_AddRef(picctx->pool);
+
+    return pic;
+
+error:
+    picture_Release(pic);
+    return NULL;
+}
+#endif
+
 typedef struct
 {
     Dav1dSettings s;
     Dav1dContext *c;
     cc_data_t cc;
+
+    // hardware decoding
+    vlc_video_context  *vctx_out;
+    vlc_decoder_device *dec_dev;
+
+#ifdef _WIN32
+    union {
+        struct {
+            D3D11_VIDEO_DECODER_DESC decoderDesc;
+
+            d3d11_device_t     *d3d_dev;
+
+            ID3D11VideoDevice  *vdevice;
+
+            ID3D11VideoContext *video_context;
+            ID3D11VideoDecoder *d3ddec;
+
+            ID3D11Asynchronous      *waitCopies;
+        } d3d11;
+        struct {
+            const GUID              *selected_decoder;
+            D3DFORMAT               render;
+            HINSTANCE               dxva2_dll;
+            IDirect3DDeviceManager9 *devmng;
+            HANDLE                   device;
+            IDirectXVideoDecoderService  *d3ddec;
+            IDirectXVideoDecoder     *decoder;
+            DXVA2_ConfigPictureDecode cfg;
+        } d3d9;
+    };
+    nvdec_pool_t            *out_pool;
+#endif
 } decoder_sys_t;
 
 struct user_data_s
@@ -124,16 +674,516 @@ static vlc_fourcc_t FindVlcChroma(const Dav1dPicture *img)
     return 0;
 }
 
+#ifdef _WIN32
+#if !VLC_WINSTORE_APP
+static void ReleaseD3D9Decoder(decoder_sys_t *sys)
+{
+    if (sys->out_pool)
+    {
+        nvdec_pool_Release(sys->out_pool);
+        sys->out_pool = NULL;
+    }
+}
+
+
+static vlc_fourcc_t D3D9UpdateDecoder(decoder_t *dec, const Dav1dPicture *img)
+{
+    decoder_sys_t *sys = dec->p_sys;
+    vlc_fourcc_t fourcc;
+    D3DFORMAT render = 0;
+    switch (img->p.layout)
+    {
+        case DAV1D_PIXEL_LAYOUT_I420:
+            if (img->p.bpc == 10)
+            {
+//                render = MAKEFOURCC('P','0','1','6');
+                render = MAKEFOURCC('P','0','1','0');
+                fourcc = VLC_CODEC_D3D9_OPAQUE_10B;
+                break;
+            }
+            if (img->p.bpc == 8)
+            {
+                render = MAKEFOURCC('N','V','1','2');
+                fourcc = VLC_CODEC_D3D9_OPAQUE;
+                break;
+            }
+            break;
+#if 0 // test with hardware that supports it
+        case DAV1D_PIXEL_LAYOUT_I422:
+            if (img->p.bpc == 10)
+            {
+                render = MAKEFOURCC('Y','2','1','6');
+                break;
+            }
+            if (img->p.bpc == 8)
+            {
+                render = MAKEFOURCC('Y','U','Y','2');
+                break;
+            }
+            break;
+        case DAV1D_PIXEL_LAYOUT_I444:
+            if (img->p.bpc == 10)
+            {
+                render = MAKEFOURCC('Y','4','1','6');
+                break;
+            }
+            if (img->p.bpc == 8)
+            {
+                render = MAKEFOURCC('A','Y','U','V');
+                break;
+            }
+            break;
+#endif
+        default:
+            break;
+    }
+    if (render == 0)
+    {
+        ReleaseD3D9Decoder(sys);
+        return 0;
+    }
+
+    const GUID *selected_decoder;
+    switch(img->seq_hdr->profile) {
+        case 0:
+            selected_decoder = &DXVA_ModeAV1_VLD_Profile0;
+            break;
+        case 1:
+            selected_decoder = &DXVA_ModeAV1_VLD_Profile1;
+            break;
+        case 2:
+            selected_decoder = &DXVA_ModeAV1_VLD_Profile2;
+            break;
+        default:
+            msg_Dbg(dec, "unsupported profile %d with D3D11", img->seq_hdr->profile);
+            ReleaseD3D9Decoder(sys);
+            return 0;
+    }
+
+    if (sys->d3d9.selected_decoder == selected_decoder &&
+        sys->d3d9.render == render &&
+        dec->fmt_out.video.i_width == (unsigned)img->seq_hdr->max_width &&
+        dec->fmt_out.video.i_height == (unsigned)img->seq_hdr->max_height)
+        return fourcc;
+
+    sys->d3d9.selected_decoder = selected_decoder;
+    sys->d3d9.render = render;
+
+    HRESULT hr;
+    IDirect3DSurface9 *hw_surfaces[DECODER_SLICES];
+    hr = IDirectXVideoDecoderService_CreateSurface(sys->d3d9.d3ddec,
+                                                   img->seq_hdr->max_width, img->seq_hdr->max_height,
+                                                   DECODER_SLICES - 1,
+                                                   sys->d3d9.render,
+                                                   D3DPOOL_DEFAULT,
+                                                   0,
+                                                   DXVA2_VideoDecoderRenderTarget,
+                                                   hw_surfaces,
+                                                   NULL);
+    if (FAILED(hr)) {
+        msg_Err(dec, "IDirectXVideoAccelerationService_CreateSurface %d failed (hr=0x%lX)", DECODER_SLICES, hr);
+        return 0;
+    }
+    msg_Dbg(dec, "IDirectXVideoAccelerationService_CreateSurface succeed with %d surfaces (%dx%d)",
+            DECODER_SLICES, img->seq_hdr->max_width, img->seq_hdr->max_height);
+
+    /* */
+    DXVA2_VideoDesc dsc;
+    ZeroMemory(&dsc, sizeof(dsc));
+    dsc.SampleWidth     = img->seq_hdr->max_width;
+    dsc.SampleHeight    = img->seq_hdr->max_height;
+    dsc.Format          = sys->d3d9.render;
+    if (dec->fmt_in.video.i_frame_rate > 0 && dec->fmt_in.video.i_frame_rate_base > 0) {
+        dsc.InputSampleFreq.Numerator   = dec->fmt_in.video.i_frame_rate;
+        dsc.InputSampleFreq.Denominator = dec->fmt_in.video.i_frame_rate_base;
+    } else {
+        dsc.InputSampleFreq.Numerator   = 0;
+        dsc.InputSampleFreq.Denominator = 0;
+    }
+    dsc.OutputFrameFreq = dsc.InputSampleFreq;
+    dsc.UABProtectionLevel = FALSE;
+    dsc.Reserved = 0;
+
+    /* FIXME I am unsure we can let unknown everywhere */
+    DXVA2_ExtendedFormat *ext = &dsc.SampleFormat;
+    ext->SampleFormat = 0;//DXVA2_SampleUnknown;
+    ext->VideoChromaSubsampling = 0;//DXVA2_VideoChromaSubsampling_Unknown;
+    ext->NominalRange = 0;//DXVA2_NominalRange_Unknown;
+    ext->VideoTransferMatrix = 0;//DXVA2_VideoTransferMatrix_Unknown;
+    ext->VideoLighting = 0;//DXVA2_VideoLighting_Unknown;
+    ext->VideoPrimaries = 0;//DXVA2_VideoPrimaries_Unknown;
+    ext->VideoTransferFunction = 0;//DXVA2_VideoTransFunc_Unknown;
+
+    /* List all configurations available for the decoder */
+    UINT                      cfg_count = 0;
+    DXVA2_ConfigPictureDecode *cfg_list = NULL;
+    hr = IDirectXVideoDecoderService_GetDecoderConfigurations(sys->d3d9.d3ddec,
+                                                              selected_decoder,
+                                                              &dsc,
+                                                              NULL,
+                                                              &cfg_count,
+                                                              &cfg_list);
+    if (FAILED(hr)) {
+        msg_Err(dec, "IDirectXVideoDecoderService_GetDecoderConfigurations failed. (hr=0x%lX)", hr);
+        goto error;
+    }
+    msg_Dbg(dec, "we got %d decoder configurations", cfg_count);
+
+    /* Select the best decoder configuration */
+    int cfg_score = 0;
+    for (unsigned i = 0; i < cfg_count; i++) {
+        const DXVA2_ConfigPictureDecode *cfg = &cfg_list[i];
+
+        /* */
+        msg_Dbg(dec, "configuration[%d] ConfigBitstreamRaw %d",
+                i, cfg->ConfigBitstreamRaw);
+
+        /* */
+        int score;
+        if (cfg->ConfigBitstreamRaw == 1)
+            score = 1;
+        else
+            continue;
+        if (IsEqualGUID(&cfg->guidConfigBitstreamEncryption, &DXVA2_NoEncrypt))
+            score += 16;
+
+        if (cfg_score < score) {
+            sys->d3d9.cfg = *cfg;
+            cfg_score = score;
+        }
+    }
+    CoTaskMemFree(cfg_list);
+    if (cfg_score <= 0) {
+        msg_Err(dec, "Failed to find a supported decoder configuration");
+        goto error;
+    }
+
+    /* Create the decoder */
+    /* adds a reference on each decoder surface */
+    if (FAILED(IDirectXVideoDecoderService_CreateVideoDecoder(sys->d3d9.d3ddec,
+                                                              selected_decoder,
+                                                              &dsc,
+                                                              &sys->d3d9.cfg,
+                                                              hw_surfaces,
+                                                              DECODER_SLICES,
+                                                              &sys->d3d9.decoder))) {
+        msg_Err(dec, "IDirectXVideoDecoderService_CreateVideoDecoder failed");
+        goto error;
+    }
+
+    msg_Dbg(dec, "IDirectXVideoDecoderService_CreateVideoDecoder succeed");
+
+    sys->vctx_out = vlc_video_context_Create(sys->dec_dev, VLC_VIDEO_CONTEXT_DXVA2,
+                                             sizeof(d3d9_video_context_t), &d3d9_vctx_ops);
+    d3d9_video_context_t *octx = GetD3D9ContextPrivate(sys->vctx_out);
+    octx->format = sys->d3d9.render;
+
+    dec->fmt_out.video.i_width = img->seq_hdr->max_width;
+    dec->fmt_out.video.i_height = img->seq_hdr->max_height;
+    dec->fmt_out.video.i_chroma = fourcc;
+
+    pool_picture_sys_d3d9 *init_picsys[DECODER_SLICES];
+    for (size_t i=0; i<ARRAY_SIZE(init_picsys); i++)
+    {
+        init_picsys[i] = malloc(sizeof(*init_picsys[i]));
+
+        init_picsys[i]->slice_index = i;
+        init_picsys[i]->texture = hw_surfaces[i];
+    }
+
+    sys->out_pool = nvdec_pool_Create(sys->vctx_out, &dec->fmt_out.video, DECODER_SLICES, (void**)init_picsys, true);
+    if (unlikely(sys->out_pool == NULL))
+    {
+        for (size_t i=0; i<ARRAY_SIZE(init_picsys); i++)
+        {
+            ReleaseD3D9Sys(init_picsys[i]);
+        }
+    }
+
+    return fourcc;
+error:
+    return 0;
+}
+#endif // !VLC_WINSTORE_APP
+
+static const d3d_format_t *D3D11_FindDXGIFormat(DXGI_FORMAT dxgi)
+{
+    for (const d3d_format_t *output_format = GetRenderFormatList();
+         output_format->name != NULL; ++output_format)
+    {
+        if (output_format->formatTexture == dxgi &&
+                is_d3d11_opaque(output_format->fourcc))
+        {
+            return output_format;
+        }
+    }
+    return NULL;
+}
+
+static void ReleaseD3D11Decoder(decoder_sys_t *sys)
+{
+    if (sys->d3d11.d3ddec)
+    {
+        ID3D11VideoDecoder_Release( sys->d3d11.d3ddec );
+        sys->d3d11.d3ddec = NULL;
+    }
+    sys->d3d11.decoderDesc = (D3D11_VIDEO_DECODER_DESC) { 0 };
+    if (sys->d3d11.waitCopies)
+    {
+        ID3D11Asynchronous_Release(sys->d3d11.waitCopies);
+        sys->d3d11.waitCopies = NULL;
+    }
+    if (sys->out_pool)
+    {
+        nvdec_pool_Release(sys->out_pool);
+        sys->out_pool = NULL;
+    }
+}
+
+
+static vlc_fourcc_t D3D11UpdateDecoder(decoder_t *dec, const Dav1dPicture *img)
+{
+    decoder_sys_t *sys = dec->p_sys;
+    const d3d_format_t *render_fmt = NULL;
+
+    switch (img->p.layout)
+    {
+        case DAV1D_PIXEL_LAYOUT_I420:
+            if (img->p.bpc == 10)
+            {
+//                render_fmt = D3D11_FindDXGIFormat(DXGI_FORMAT_P016);
+                render_fmt = D3D11_FindDXGIFormat(DXGI_FORMAT_P010);
+                break;
+            }
+            if (img->p.bpc == 8)
+            {
+                render_fmt = D3D11_FindDXGIFormat(DXGI_FORMAT_NV12);
+                break;
+            }
+            break;
+#if 0 // test with hardware that supports it
+        case DAV1D_PIXEL_LAYOUT_I422:
+            if (img->p.bpc == 10)
+            {
+                render_fmt = D3D11_FindDXGIFormat(DXGI_FORMAT_Y216);
+                break;
+            }
+            if (img->p.bpc == 8)
+            {
+                render_fmt = D3D11_FindDXGIFormat(DXGI_FORMAT_YUY2);
+                break;
+            }
+            break;
+        case DAV1D_PIXEL_LAYOUT_I444:
+            if (img->p.bpc == 10)
+            {
+                render_fmt = D3D11_FindDXGIFormat(DXGI_FORMAT_Y416);
+                break;
+            }
+            if (img->p.bpc == 8)
+            {
+                render_fmt = D3D11_FindDXGIFormat(DXGI_FORMAT_AYUV);
+                break;
+            }
+            break;
+#endif
+        default:
+            break;
+    }
+    if (render_fmt == NULL)
+    {
+        ReleaseD3D11Decoder(sys);
+        return 0;
+    }
+
+    D3D11_VIDEO_DECODER_DESC decoderDesc = {
+        .OutputFormat = render_fmt->formatTexture,
+        .SampleWidth = img->seq_hdr->max_width,
+        .SampleHeight = img->seq_hdr->max_height,
+    };
+    switch(img->seq_hdr->profile) {
+        case 0:
+            decoderDesc.Guid = DXVA_ModeAV1_VLD_Profile0;
+            break;
+        case 1:
+            decoderDesc.Guid = DXVA_ModeAV1_VLD_Profile1;
+            break;
+        case 2:
+            decoderDesc.Guid = DXVA_ModeAV1_VLD_Profile2;
+            break;
+        default:
+            msg_Dbg(dec, "unsupported profile %d with D3D11", img->seq_hdr->profile);
+            ReleaseD3D11Decoder(sys);
+            return 0;
+    }
+
+    if (memcmp(&sys->d3d11.decoderDesc, &decoderDesc, sizeof(decoderDesc)) == 0)
+        return render_fmt->fourcc;
+
+    // release the old decoder if there was one
+    ReleaseD3D11Decoder(sys);
+
+    HRESULT hr;
+#if 0
+    UINT count = ID3D11VideoDevice_GetVideoDecoderProfileCount( sys->d3d11.vdevice );
+    for (UINT j=0; j<count; j++)
+    {
+        GUID guid;
+        hr  = ID3D11VideoDevice_GetVideoDecoderProfile( sys->d3d11.vdevice, j, &guid);
+        if (SUCCEEDED(hr))
+        {
+            for (unsigned i = 0; DXVA_MODES[i].name; i++) {
+                if (IsEqualGUID(DXVA_MODES[i].guid, &guid)) {
+                    msg_Dbg(dec, "supports %s", DXVA_MODES[i].name);
+                }
+            }
+        }
+    }
+#endif
+
+    UINT cfg_count = 0;
+    hr = ID3D11VideoDevice_GetVideoDecoderConfigCount( sys->d3d11.vdevice, &decoderDesc, &cfg_count );
+    if (FAILED(hr))
+    {
+        msg_Err( dec, "Decoder not supported. (hr=0x%lX)", hr );
+        return 0;
+    }
+    int cfg_score = 0;
+    D3D11_VIDEO_DECODER_CONFIG *cfg = NULL;
+    D3D11_VIDEO_DECODER_CONFIG cfg_list[cfg_count];
+    for (UINT idx=0; idx < cfg_count; idx++)
+    {
+        hr = ID3D11VideoDevice_GetVideoDecoderConfig( sys->d3d11.vdevice, &decoderDesc, idx, &cfg_list[idx] );
+        if (FAILED(hr)) {
+            msg_Err(dec, "GetVideoDecoderConfig failed. (hr=0x%lX)", hr);
+        }
+
+        int score;
+        if (cfg_list[idx].ConfigBitstreamRaw == 1)
+            score = 1;
+        else
+            continue;
+        if (IsEqualGUID(&cfg_list[idx].guidConfigBitstreamEncryption, &DXVA2_NoEncrypt))
+            score += 16;
+
+        if (cfg_score < score) {
+            cfg = &cfg_list[idx];
+            cfg_score = score;
+        }
+    }
+    if (unlikely(cfg == NULL))
+    {
+        msg_Err( dec, "Found no suitable decoder configuration." );
+        return 0;
+    }
+
+    // create a new decoder
+    hr = ID3D11VideoDevice_CreateVideoDecoder( sys->d3d11.vdevice, &decoderDesc, cfg, &sys->d3d11.d3ddec );
+    if (FAILED(hr))
+    {
+        msg_Dbg(dec, "Failed to open D3D11 decoder");
+        return 0;
+    }
+
+    D3D11_TEXTURE2D_DESC texDesc;
+    ZeroMemory(&texDesc, sizeof(texDesc));
+    texDesc.Width = decoderDesc.SampleWidth;
+    texDesc.Height = decoderDesc.SampleHeight;
+    texDesc.MipLevels = 1;
+    texDesc.Format = decoderDesc.OutputFormat;
+    texDesc.SampleDesc.Count = 1;
+    texDesc.MiscFlags = 0;
+    texDesc.ArraySize = DECODER_SLICES;
+    texDesc.Usage = D3D11_USAGE_DEFAULT;
+    texDesc.BindFlags = D3D11_BIND_DECODER;
+    texDesc.CPUAccessFlags = 0;
+
+    if (DeviceSupportsFormat(sys->d3d11.d3d_dev->d3ddevice, texDesc.Format, D3D11_FORMAT_SUPPORT_SHADER_LOAD))
+        texDesc.BindFlags |= D3D11_BIND_SHADER_RESOURCE;
+
+    ID3D11Texture2D *p_texture;
+    hr = ID3D11Device_CreateTexture2D( sys->d3d11.d3d_dev->d3ddevice, &texDesc, NULL, &p_texture );
+    if (FAILED(hr)) {
+        msg_Err(dec, "CreateTexture2D with %d slices failed. (hr=0x%lX)", DECODER_SLICES, hr);
+        ID3D11VideoDecoder_Release(sys->d3d11.d3ddec);
+        sys->d3d11.d3ddec = NULL;
+        return 0;
+    }
+    sys->vctx_out = D3D11CreateVideoContext(sys->dec_dev, render_fmt->formatTexture);
+
+    D3D11_VIDEO_DECODER_OUTPUT_VIEW_DESC viewDesc = {
+        .DecodeProfile = decoderDesc.Guid,
+        .ViewDimension = D3D11_VDOV_DIMENSION_TEXTURE2D,
+    };
+
+    pool_picture_sys_d3d11 *init_picsys[DECODER_SLICES];
+    for (size_t i=0; i<ARRAY_SIZE(init_picsys); i++)
+    {
+        init_picsys[i] = calloc(1, sizeof(*init_picsys[i]));
+
+        viewDesc.Texture2D.ArraySlice = i;
+        hr = ID3D11VideoDevice_CreateVideoDecoderOutputView( sys->d3d11.vdevice,
+                                                            (ID3D11Resource*)p_texture,
+                                                            &viewDesc,
+                                                            &init_picsys[i]->view );
+        if (unlikely(FAILED(hr))) {
+            msg_Err(dec, "CreateVideoDecoderOutputView %zu failed. (hr=0x%lX)", i, hr);
+            return 0;
+        }
+
+        init_picsys[i]->slice_index = i;
+        init_picsys[i]->texture = p_texture;
+        ID3D11Texture2D_AddRef(init_picsys[i]->texture);
+        if (texDesc.BindFlags & D3D11_BIND_SHADER_RESOURCE)
+        {
+            ID3D11Texture2D *textures[D3D11_MAX_SHADER_VIEW] = {p_texture, p_texture, p_texture};
+            D3D11_AllocateResourceView(dec, sys->d3d11.d3d_dev->d3ddevice, render_fmt, textures, i,
+                                    init_picsys[i]->renderSrc);
+        }
+    }
+
+    dec->fmt_out.video.i_width = img->seq_hdr->max_width;
+    dec->fmt_out.video.i_height = img->seq_hdr->max_height;
+    dec->fmt_out.video.i_chroma = render_fmt->fourcc;
+
+    sys->out_pool = nvdec_pool_Create(sys->vctx_out, &dec->fmt_out.video, DECODER_SLICES, (void**)init_picsys, false);
+    if (unlikely(sys->out_pool == NULL))
+    {
+        for (size_t i=0; i<ARRAY_SIZE(init_picsys); i++)
+        {
+            ReleaseD3D11Sys(init_picsys[i]);
+        }
+    }
+
+    D3D11_QUERY_DESC query = { 0 };
+    query.Query = D3D11_QUERY_EVENT;
+    hr = ID3D11Device_CreateQuery(sys->d3d11.d3d_dev->d3ddevice, &query, (ID3D11Query**)&sys->d3d11.waitCopies);
+
+    ID3D11Texture2D_Release(p_texture);
+    msg_Dbg(dec, "ID3D11VideoDecoderOutputView succeed with %d slices (%dx%d)",
+            DECODER_SLICES, decoderDesc.SampleWidth, decoderDesc.SampleHeight);
+    sys->d3d11.decoderDesc = decoderDesc;
+
+    ID3D10Multithread *pMultithread;
+    hr = ID3D11Device_QueryInterface( sys->d3d11.d3d_dev->d3ddevice, &IID_ID3D10Multithread, (void **)&pMultithread);
+    if (SUCCEEDED(hr)) {
+        ID3D10Multithread_SetMultithreadProtected(pMultithread, TRUE);
+        ID3D10Multithread_Release(pMultithread);
+    }
+
+    return render_fmt->fourcc;
+}
+#endif
+
 static int NewPicture(Dav1dPicture *img, void *cookie)
 {
     decoder_t *dec = cookie;
+    decoder_sys_t *sys = dec->p_sys;
 
     video_format_t *v = &dec->fmt_out.video;
 
     v->i_visible_width  = img->p.w;
     v->i_visible_height = img->p.h;
-    v->i_width  = (img->p.w + 0x7F) & ~0x7F;
-    v->i_height = (img->p.h + 0x7F) & ~0x7F;
 
     if( !v->i_sar_num || !v->i_sar_den )
     {
@@ -179,21 +1229,80 @@ static int NewPicture(Dav1dPicture *img, void *cookie)
     v->projection_mode = dec->fmt_in.video.projection_mode;
     v->multiview_mode = dec->fmt_in.video.multiview_mode;
     v->pose = dec->fmt_in.video.pose;
-    dec->fmt_out.video.i_chroma = dec->fmt_out.i_codec = FindVlcChroma(img);
+#ifdef _WIN32
+    if (sys->d3d11.vdevice != NULL)
+        dec->fmt_out.i_codec = D3D11UpdateDecoder(dec, img);
+#if !VLC_WINSTORE_APP
+    else if (sys->d3d9.d3ddec != NULL)
+        dec->fmt_out.i_codec = D3D9UpdateDecoder(dec, img);
+#endif
+    else
+        v->i_chroma = 0;
+    if (v->i_chroma == 0)
+#endif
+    {
+        v->i_chroma = dec->fmt_out.i_codec = FindVlcChroma(img);
+        v->i_width  = (img->p.w + 0x7F) & ~0x7F;
+        v->i_height = (img->p.h + 0x7F) & ~0x7F;
+    }
 
-    if (decoder_UpdateVideoFormat(dec) == 0)
+    if (decoder_UpdateVideoOutput(dec, sys->vctx_out) == 0)
     {
-        picture_t *pic = decoder_NewPicture(dec);
-        if (likely(pic != NULL))
+        picture_t *pic;// = decoder_NewPicture(dec);
+        // if (likely(pic != NULL))
         {
-            img->data[0] = pic->p[0].p_pixels;
-            img->stride[0] = pic->p[0].i_pitch;
-            img->data[1] = pic->p[1].p_pixels;
-            img->data[2] = pic->p[2].p_pixels;
-            assert(pic->p[1].i_pitch == pic->p[2].i_pitch);
-            img->stride[1] = pic->p[1].i_pitch;
-            img->allocator_data = pic;
+#ifdef _WIN32
+            if (is_d3d11_opaque(v->i_chroma))
+            {
+                pic = nvdec_pool_Wait(sys->out_pool);
+                pool_picture_sys_d3d11 *pool_picsys = pic->p_sys;
+                pic->p_sys = NULL;
 
+                pic_pool_context_nvdec_t *picctx = NVDEC_PICPOOLCTX_FROM_PICCTX(pic->context);
+                picture_sys_d3d11_t *picsys = &picctx->ctx.d3d11.ctx.picsys;
+                picctx->ctx.d3d11.surface = pool_picsys->view;
+
+                for (int i=0; i<D3D11_MAX_SHADER_VIEW; i++)
+                {
+                    picsys->texture[i]  = pool_picsys->texture;
+                    picsys->renderSrc[i] = pool_picsys->renderSrc[i];
+                }
+                picsys->slice_index = pool_picsys->slice_index;
+                AcquireD3D11PictureSys(picsys);
+
+                img->dxva_picture_index = pool_picsys->slice_index;
+                img->data[0] = pool_picsys->view;
+            }
+#if !VLC_WINSTORE_APP
+            else if (is_d3d9_opaque(v->i_chroma))
+            {
+                pic = nvdec_pool_Wait(sys->out_pool);
+                pool_picture_sys_d3d9 *pool_picsys = pic->p_sys;
+                pic->p_sys = NULL;
+
+                pic_pool_context_nvdec_t *picctx = NVDEC_PICPOOLCTX_FROM_PICCTX9(pic->context);
+                picture_sys_d3d9_t *picsys = &picctx->ctx.d3d9.ctx.picsys;
+                picctx->ctx.d3d9.index = pool_picsys->slice_index;
+
+                picsys->surface = pool_picsys->texture;
+                AcquireD3D9PictureSys(picsys);
+
+                img->dxva_picture_index = pool_picsys->slice_index;
+                img->data[0] = pool_picsys->texture;
+            }
+#endif
+            else
+#endif
+            {
+                pic = decoder_NewPicture(dec);
+                img->data[0] = pic->p[0].p_pixels;
+                img->stride[0] = pic->p[0].i_pitch;
+                img->data[1] = pic->p[1].p_pixels;
+                img->data[2] = pic->p[2].p_pixels;
+                assert(pic->p[1].i_pitch == pic->p[2].i_pitch);
+                img->stride[1] = pic->p[1].i_pitch;
+            }
+            img->allocator_data = pic;
             return 0;
         }
     }
@@ -366,6 +1475,231 @@ static int Decode(decoder_t *dec, block_t *block)
     return i_ret;
 }
 
+#ifdef _WIN32
+static int D3D9Decoder(void *cookie, DXVA_PicParams_AV1 *picture_parameters, DXVA_Tile_AV1 *filled_tiles, const int n_tiles,
+                        Dav1dPicture *output_picture, Dav1dTileGroup *tile_groups, int tile_group_count)
+{
+    decoder_t *dec = cookie;
+    decoder_sys_t *sys = dec->p_sys;
+    uint8_t* bitstream_target = NULL;
+    size_t bitstream_size = 0;
+    VLC_UNUSED(picture_parameters);
+    VLC_UNUSED(filled_tiles);
+    VLC_UNUSED(output_picture);
+
+    IDirectXVideoDecoder_ReleaseBuffer(sys->d3d9.decoder, DXVA2_PictureParametersBufferType);
+    IDirectXVideoDecoder_ReleaseBuffer(sys->d3d9.decoder, DXVA2_SliceControlBufferType);
+
+    for (int i = 0; i < tile_group_count; i++)
+        bitstream_size += tile_groups[i].data.sz;
+
+    UINT size_allocated = 0;
+    if (FAILED(IDirectXVideoDecoder_GetBuffer(sys->d3d9.decoder, DXVA2_BitStreamDateBufferType, (void**)&bitstream_target, &size_allocated)) || size_allocated < (uint32_t)bitstream_size)
+        return DAV1D_ERR(ENOMEM);
+
+    // this is a GPU bitstream upload
+    for (int i = 0; i < tile_group_count; i++) {
+        const uint8_t *data = tile_groups[i].data.data;
+        size_t size = tile_groups[i].data.sz;
+        memcpy(bitstream_target, data, size);
+        bitstream_target += size;
+    }
+
+    IDirectXVideoDecoder_ReleaseBuffer(sys->d3d9.decoder, DXVA2_BitStreamDateBufferType);
+
+    DXVA2_DecodeBufferDesc buffers[3] = {
+        {
+            .CompressedBufferType = DXVA2_PictureParametersBufferType,
+            .DataSize = sizeof(DXVA_PicParams_AV1),
+        },
+        {
+            .CompressedBufferType = DXVA2_SliceControlBufferType,
+            .DataSize = sizeof(DXVA_Tile_AV1) * n_tiles,
+        },
+        {
+            .CompressedBufferType = DXVA2_BitStreamDateBufferType,
+            .DataSize = bitstream_size,
+        },
+    };
+
+    DXVA2_DecodeExecuteParams exec = {
+        .NumCompBuffers = 3,
+        .pCompressedBuffers = buffers,
+    };
+    if (FAILED(IDirectXVideoDecoder_Execute(sys->d3d9.decoder, &exec)))
+    {
+        return DAV1D_ERR(1);
+    }
+
+    return 0;
+}
+
+static int D3D9Alloc(void *cookie, Dav1dPicture* picture, DXVA_PicParams_AV1 **pic, DXVA_Tile_AV1 **tiles, int n_tiles)
+{
+    decoder_t *dec = cookie;
+    decoder_sys_t *sys = dec->p_sys;
+    UINT size = 0;
+    picture_t *p_pic = picture->allocator_data;
+    struct dav1d_d3d9_ctx *pic_ctx = container_of(p_pic->context, struct dav1d_d3d9_ctx, ctx.s);
+
+    HRESULT hr;
+    int run = 0;
+    do {
+        hr = IDirectXVideoDecoder_BeginFrame(sys->d3d9.decoder, pic_ctx->ctx.picsys.surface, NULL);
+        if (hr != E_PENDING || ++run > 50)
+            break;
+        SleepEx(2, TRUE);
+    } while (1);
+
+    if(FAILED(hr))
+    {
+        msg_Err(dec, "Failed to start decoding into slice. (hr=0x%lX)", hr);
+        return -1;
+    }
+
+    if (FAILED(IDirectXVideoDecoder_GetBuffer(sys->d3d9.decoder, DXVA2_SliceControlBufferType, (void**)tiles, &size)) || size < sizeof(DXVA_Tile_AV1) * n_tiles)
+        return DAV1D_ERR(ENOMEM);
+
+    if (FAILED(IDirectXVideoDecoder_GetBuffer(sys->d3d9.decoder, DXVA2_PictureParametersBufferType, (void**)pic, &size)) || size < sizeof(DXVA_PicParams_AV1))
+        return DAV1D_ERR(ENOMEM);
+
+    memset(*pic, 0, sizeof(DXVA_PicParams_AV1));
+    return 0;
+}
+
+static void D3D9Release(void *cookie, DXVA_PicParams_AV1 *picparams, DXVA_Tile_AV1 *tiles)
+{
+    decoder_t *dec = cookie;
+    decoder_sys_t *sys = dec->p_sys;
+    VLC_UNUSED(picparams);
+    VLC_UNUSED(tiles);
+
+    IDirectXVideoDecoder_EndFrame(sys->d3d9.decoder, NULL);
+}
+
+static int D3D11Decoder(void *cookie, DXVA_PicParams_AV1 *picture_parameters, DXVA_Tile_AV1 *filled_tiles, const int n_tiles,
+                        Dav1dPicture *output_picture, Dav1dTileGroup *tile_groups, int tile_group_count)
+{
+    decoder_t *dec = cookie;
+    decoder_sys_t *sys = dec->p_sys;
+    uint8_t* bitstream_target = NULL;
+    size_t bitstream_size = 0;
+    VLC_UNUSED(picture_parameters);
+    VLC_UNUSED(filled_tiles);
+    VLC_UNUSED(output_picture);
+
+    for (int i = 0; i < tile_group_count; i++)
+        bitstream_size += tile_groups[i].data.sz;
+
+    d3d11_device_lock( sys->d3d11.d3d_dev );
+
+    ID3D11VideoContext_ReleaseDecoderBuffer(sys->d3d11.video_context, sys->d3d11.d3ddec, D3D11_VIDEO_DECODER_BUFFER_PICTURE_PARAMETERS);
+    ID3D11VideoContext_ReleaseDecoderBuffer(sys->d3d11.video_context, sys->d3d11.d3ddec, D3D11_VIDEO_DECODER_BUFFER_SLICE_CONTROL);
+
+    uint32_t size_allocated = 0;
+    if (FAILED(ID3D11VideoContext_GetDecoderBuffer(sys->d3d11.video_context, sys->d3d11.d3ddec, D3D11_VIDEO_DECODER_BUFFER_BITSTREAM, &size_allocated, (void**)&bitstream_target)) || size_allocated < (uint32_t)bitstream_size)
+    {
+        d3d11_device_unlock( sys->d3d11.d3d_dev );
+        return DAV1D_ERR(ENOMEM);
+    }
+
+    // this is a GPU bitstream upload
+    for (int i = 0; i < tile_group_count; i++) {
+        const uint8_t *data = tile_groups[i].data.data;
+        size_t size = tile_groups[i].data.sz;
+        memcpy(bitstream_target, data, size);
+        bitstream_target += size;
+    }
+
+    ID3D11VideoContext_ReleaseDecoderBuffer(sys->d3d11.video_context, sys->d3d11.d3ddec, D3D11_VIDEO_DECODER_BUFFER_BITSTREAM);
+
+    ID3D11DeviceContext_End(sys->d3d11.d3d_dev->d3dcontext, sys->d3d11.waitCopies);
+
+    int maxWait = 10;
+    while (S_FALSE == ID3D11DeviceContext_GetData(sys->d3d11.d3d_dev->d3dcontext,
+                                                    sys->d3d11.waitCopies, NULL, 0, 0)
+            && --maxWait)
+    {
+        d3d11_device_unlock( sys->d3d11.d3d_dev );
+        SleepEx(2, TRUE);
+        d3d11_device_lock( sys->d3d11.d3d_dev );
+    }
+
+    D3D11_VIDEO_DECODER_BUFFER_DESC decodeDesc[3] = {0};
+    decodeDesc[0].BufferType = D3D11_VIDEO_DECODER_BUFFER_PICTURE_PARAMETERS;
+    decodeDesc[0].DataSize = sizeof(DXVA_PicParams_AV1);
+    decodeDesc[1].BufferType = D3D11_VIDEO_DECODER_BUFFER_SLICE_CONTROL;
+    decodeDesc[1].DataSize = sizeof(DXVA_Tile_AV1)*n_tiles;
+    decodeDesc[2].BufferType = D3D11_VIDEO_DECODER_BUFFER_BITSTREAM;
+    decodeDesc[2].DataSize = (UINT)bitstream_size;
+
+    if (FAILED(ID3D11VideoContext_SubmitDecoderBuffers(sys->d3d11.video_context, sys->d3d11.d3ddec, 3, decodeDesc)))
+    {
+        d3d11_device_unlock( sys->d3d11.d3d_dev );
+        return DAV1D_ERR(1);
+    }
+
+    d3d11_device_unlock( sys->d3d11.d3d_dev );
+
+    return 0;
+}
+
+static int D3D11Alloc(void *cookie, Dav1dPicture* picture, DXVA_PicParams_AV1 **pic, DXVA_Tile_AV1 **tiles, int n_tiles)
+{
+    decoder_t *dec = cookie;
+    decoder_sys_t *sys = dec->p_sys;
+    uint32_t size = 0;
+    picture_t *p_pic = picture->allocator_data;
+    struct dav1d_d3d11_ctx *pic_ctx = container_of(p_pic->context, struct dav1d_d3d11_ctx, ctx.s);
+
+    HRESULT hr;
+    int run = 0;
+    do {
+        d3d11_device_lock( sys->d3d11.d3d_dev );
+
+        hr = ID3D11VideoContext_DecoderBeginFrame(sys->d3d11.video_context, sys->d3d11.d3ddec, pic_ctx->surface, 0, NULL);
+        if (hr != E_PENDING || ++run > 50)
+            break;
+        d3d11_device_unlock( sys->d3d11.d3d_dev );
+        // vlc_tick_sleep(VLC_TICK_FROM_MS(10));
+        SleepEx(2, TRUE);
+    } while (1);
+
+    if(FAILED(hr))
+    {
+        msg_Err(dec, "Failed to start decoding into slice %d. (hr=0x%lX)", pic_ctx->ctx.picsys.slice_index, hr);
+        return -1;
+    }
+
+    // ID3D11DeviceContext_Begin(sys->d3d11.d3d_dev->d3dcontext, sys->d3d11.waitCopies);
+
+    if (FAILED(ID3D11VideoContext_GetDecoderBuffer(sys->d3d11.video_context, sys->d3d11.d3ddec, D3D11_VIDEO_DECODER_BUFFER_SLICE_CONTROL, &size, (void**)tiles)) || size < sizeof(DXVA_Tile_AV1) * n_tiles)
+        return DAV1D_ERR(ENOMEM);
+
+    if (FAILED(ID3D11VideoContext_GetDecoderBuffer(sys->d3d11.video_context, sys->d3d11.d3ddec, D3D11_VIDEO_DECODER_BUFFER_PICTURE_PARAMETERS, &size, (void**)pic)) || size < sizeof(DXVA_PicParams_AV1))
+        return DAV1D_ERR(ENOMEM);
+
+    d3d11_device_unlock( sys->d3d11.d3d_dev );
+
+    memset(*pic, 0, sizeof(DXVA_PicParams_AV1));
+    return 0;
+}
+
+static void D3D11Release(void *cookie, DXVA_PicParams_AV1 *picparams, DXVA_Tile_AV1 *tiles)
+{
+    decoder_t *dec = cookie;
+    decoder_sys_t *sys = dec->p_sys;
+    VLC_UNUSED(picparams);
+    VLC_UNUSED(tiles);
+
+    d3d11_device_lock( sys->d3d11.d3d_dev );
+
+    ID3D11VideoContext_DecoderEndFrame(sys->d3d11.video_context, sys->d3d11.d3ddec);
+
+    d3d11_device_unlock( sys->d3d11.d3d_dev );
+}
+#endif
+
 /*****************************************************************************
  * OpenDecoder: probe the decoder
  *****************************************************************************/
@@ -390,6 +1724,111 @@ static int OpenDecoder(vlc_object_t *p_this)
     p_sys->s.allocator.cookie = dec;
     p_sys->s.allocator.alloc_picture_callback = NewPicture;
     p_sys->s.allocator.release_picture_callback = FreePicture;
+    p_sys->vctx_out = NULL;
+    p_sys->dec_dev = NULL;
+
+#ifdef _WIN32
+    p_sys->d3d11.vdevice = NULL;
+    p_sys->d3d11.video_context = NULL;
+    p_sys->d3d11.d3d_dev = NULL;
+    p_sys->d3d11.waitCopies = NULL;
+    p_sys->d3d11.d3ddec = NULL;
+    p_sys->out_pool = NULL;
+    p_sys->dec_dev = NULL;
+    p_sys->s.dxva.decode_callback = NULL;
+
+    vlc_decoder_device *dec_dev = decoder_GetDecoderDevice( dec );
+    if (dec_dev)
+    {
+        d3d11_decoder_device_t *devsys = GetD3D11OpaqueDevice( dec_dev );
+        if (devsys)
+        {
+            void *d3dviddev = NULL;
+            HRESULT hr;
+            hr = ID3D11Device_QueryInterface(devsys->d3d_dev.d3ddevice, &IID_ID3D11VideoDevice, &d3dviddev);
+            if (FAILED(hr)) {
+                msg_Err(dec, "Could not Query ID3D11VideoDevice Interface. (hr=0x%lX)", hr);
+                vlc_decoder_device_Release(dec_dev);
+            }
+            else
+            {
+                void *d3dvidctx = NULL;
+                hr = ID3D11Device_QueryInterface(devsys->d3d_dev.d3dcontext, &IID_ID3D11VideoContext, &d3dvidctx);
+                if (FAILED(hr)) {
+                    msg_Err(dec, "Could not Query ID3D11VideoContext Interface. (hr=0x%lX)", hr);
+                    vlc_decoder_device_Release(dec_dev);
+                }
+                else
+                {
+                    p_sys->d3d11.vdevice = d3dviddev;
+                    p_sys->d3d11.video_context = d3dvidctx;
+                    p_sys->d3d11.d3d_dev = &devsys->d3d_dev;
+                    p_sys->dec_dev = dec_dev;
+                    p_sys->s.dxva.cookie = dec;
+                    p_sys->s.dxva.decode_callback = D3D11Decoder;
+                    p_sys->s.dxva.alloc_callback = D3D11Alloc;
+                    p_sys->s.dxva.release_callback = D3D11Release;
+                }
+            }
+        }
+        else
+        {
+            d3d9_decoder_device_t *devsys = GetD3D9OpaqueDevice( dec_dev );
+            if (devsys)
+            {
+                p_sys->d3d9.dxva2_dll = LoadLibrary(TEXT("DXVA2.DLL"));
+                if (!p_sys->d3d9.dxva2_dll) {
+                    msg_Err(dec, " OurDirect3DCreateDeviceManager9 failed");
+                } else {
+                HRESULT (WINAPI *CreateDeviceManager9)(UINT *pResetToken,
+                                                       IDirect3DDeviceManager9 **);
+                CreateDeviceManager9 =
+                (void *)GetProcAddress(p_sys->d3d9.dxva2_dll,
+                                        "DXVA2CreateDirect3DDeviceManager9");
+
+                if (!CreateDeviceManager9) {
+                    msg_Err(dec, "cannot load function");
+                } else {
+                    UINT token;
+                    if (FAILED(CreateDeviceManager9(&token, &p_sys->d3d9.devmng))) {
+                        msg_Err(dec, " OurDirect3DCreateDeviceManager9 failed");
+                    } else {
+                        HRESULT hr = IDirect3DDeviceManager9_ResetDevice(p_sys->d3d9.devmng, devsys->d3ddev.dev, token);
+                        if (FAILED(hr)) {
+                            msg_Err(dec, "IDirect3DDeviceManager9_ResetDevice failed: 0x%lX)", hr);
+                            IDirect3DDeviceManager9_Release(p_sys->d3d9.devmng);
+                        } else {
+                            hr = IDirect3DDeviceManager9_OpenDeviceHandle(p_sys->d3d9.devmng, &p_sys->d3d9.device);
+                            if (FAILED(hr)) {
+                                msg_Err(dec, "OpenDeviceHandle failed");
+                                IDirect3DDeviceManager9_Release(p_sys->d3d9.devmng);
+                            } else {
+                                void *pv;
+                                hr = IDirect3DDeviceManager9_GetVideoService(p_sys->d3d9.devmng, p_sys->d3d9.device,
+                                                                             &IID_IDirectXVideoDecoderService, &pv);
+                                if (FAILED(hr)) {
+                                    msg_Err(dec, "GetVideoService failed");
+                                    IDirect3DDeviceManager9_CloseDeviceHandle(p_sys->d3d9.devmng, p_sys->d3d9.device);
+                                    IDirect3DDeviceManager9_Release(p_sys->d3d9.devmng);
+                                } else {
+                                    p_sys->d3d9.d3ddec = pv;
+                                    p_sys->dec_dev = dec_dev;
+                                    p_sys->s.dxva.cookie = dec;
+                                    p_sys->s.dxva.decode_callback = D3D9Decoder;
+                                    p_sys->s.dxva.alloc_callback = D3D9Alloc;
+                                    p_sys->s.dxva.release_callback = D3D9Release;
+                                }
+                            }
+                        }
+                    }
+                }
+                }
+
+            }
+        }
+
+    }
+#endif
 
     if (dav1d_open(&p_sys->c, &p_sys->s) < 0)
     {
@@ -436,6 +1875,14 @@ static void CloseDecoder(vlc_object_t *p_this)
     /* Flush decoder */
     FlushDecoder(dec);
 
+    if (p_sys->dec_dev)
+        vlc_decoder_device_Release(p_sys->dec_dev);
+
+#ifdef _WIN32
+    if (p_sys->out_pool)
+        nvdec_pool_Release(p_sys->out_pool);
+#endif
+
     dav1d_close(&p_sys->c);
 }
 
-- 
2.26.2



More information about the vlc-devel mailing list