From a7110862c8a828e556dd2da0905c3e69f56eca29 Mon Sep 17 00:00:00 2001 From: Kevin Mitchell Date: Thu, 17 Mar 2016 23:28:29 -0700 Subject: vd_lavc: add d3d11va hwdec This commit adds the d3d11va-copy hwdec mode using the ffmpeg d3d11va api. Functions in common with dxva2 are handled in a separate decode/d3d.c file. A future commit will rewrite decode/dxva2.c to share this code. --- DOCS/man/options.rst | 1 + options/options.c | 1 + video/d3d11va.c | 86 +++++++++ video/d3d11va.h | 30 +++ video/decode/d3d.c | 241 ++++++++++++++++++++++++ video/decode/d3d.h | 50 +++++ video/decode/d3d11va.c | 498 +++++++++++++++++++++++++++++++++++++++++++++++++ video/decode/vd_lavc.c | 4 + video/fmt-conversion.c | 3 + video/hwdec.h | 5 +- video/img_format.h | 3 +- wscript | 12 +- wscript_build.py | 3 + 13 files changed, 933 insertions(+), 4 deletions(-) create mode 100644 video/d3d11va.c create mode 100644 video/d3d11va.h create mode 100644 video/decode/d3d.c create mode 100644 video/decode/d3d.h create mode 100644 video/decode/d3d11va.c diff --git a/DOCS/man/options.rst b/DOCS/man/options.rst index d8de023a60..6f280eebb7 100644 --- a/DOCS/man/options.rst +++ b/DOCS/man/options.rst @@ -583,6 +583,7 @@ Video :dxva2: requires ``--vo=opengl:backend=angle`` or ``--vo=opengl:backend=dxinterop`` (Windows only) :dxva2-copy: copies video back to system RAM (Windows only) + :d3d11va-copy: experimental (Windows only) :rpi: requires ``--vo=rpi`` (Raspberry Pi only - default if available) ``auto`` tries to automatically enable hardware decoding using the first diff --git a/options/options.c b/options/options.c index de08b4ecdb..3446674857 100644 --- a/options/options.c +++ b/options/options.c @@ -88,6 +88,7 @@ const struct m_opt_choice_alternatives mp_hwdec_names[] = { {"vaapi-copy", HWDEC_VAAPI_COPY}, {"dxva2", HWDEC_DXVA2}, {"dxva2-copy", HWDEC_DXVA2_COPY}, + {"d3d11va-copy",HWDEC_D3D11VA_COPY}, {"rpi", HWDEC_RPI}, {"mediacodec", HWDEC_MEDIACODEC}, {0} diff --git a/video/d3d11va.c b/video/d3d11va.c new file mode 100644 index 0000000000..a9be571e9c --- /dev/null +++ b/video/d3d11va.c @@ -0,0 +1,86 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#include "mp_image.h" +#include "d3d11va.h" + +struct d3d11va_surface { + HMODULE d3d11_dll; + ID3D11Texture2D *texture; + ID3D11VideoDecoderOutputView *surface; +}; + +ID3D11VideoDecoderOutputView *d3d11_surface_in_mp_image(struct mp_image *mpi) +{ + return mpi && mpi->imgfmt == IMGFMT_D3D11VA ? + (ID3D11VideoDecoderOutputView *)mpi->planes[3] : NULL; +} + +ID3D11Texture2D *d3d11_texture_in_mp_image(struct mp_image *mpi) +{ + if (!mpi || mpi->imgfmt != IMGFMT_D3D11VA) + return NULL; + struct d3d11va_surface *surface = (void *)mpi->planes[0]; + return surface->texture; +} + +static void d3d11va_release_img(void *arg) +{ + struct d3d11va_surface *surface = arg; + if (surface->surface) + ID3D11VideoDecoderOutputView_Release(surface->surface); + + if (surface->texture) + ID3D11Texture2D_Release(surface->texture); + + if (surface->d3d11_dll) + FreeLibrary(surface->d3d11_dll); + + talloc_free(surface); +} + +struct mp_image *d3d11va_new_ref(ID3D11VideoDecoderOutputView *view, + int w, int h) +{ + if (!view) + return NULL; + struct d3d11va_surface *surface = talloc_zero(NULL, struct d3d11va_surface); + + surface->d3d11_dll = LoadLibrary(L"d3d11.dll"); + if (!surface->d3d11_dll) + goto fail; + + surface->surface = view; + ID3D11VideoDecoderOutputView_AddRef(surface->surface); + ID3D11VideoDecoderOutputView_GetResource( + surface->surface, (ID3D11Resource **)&surface->texture); + + struct mp_image *mpi = mp_image_new_custom_ref( + &(struct mp_image){0}, surface, d3d11va_release_img); + if (!mpi) + abort(); + + mp_image_setfmt(mpi, IMGFMT_D3D11VA); + mp_image_set_size(mpi, w, h); + mpi->planes[0] = (void *)surface; + mpi->planes[3] = (void *)surface->surface; + + return mpi; +fail: + d3d11va_release_img(surface); + return NULL; +} diff --git a/video/d3d11va.h b/video/d3d11va.h new file mode 100644 index 0000000000..db2f295241 --- /dev/null +++ b/video/d3d11va.h @@ -0,0 +1,30 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#ifndef MPV_D3D11_H +#define MPV_D3D11_H + +#include + +struct mp_image; + +ID3D11VideoDecoderOutputView *d3d11_surface_in_mp_image(struct mp_image *mpi); +ID3D11Texture2D *d3d11_texture_in_mp_image(struct mp_image *mpi); +struct mp_image *d3d11va_new_ref(ID3D11VideoDecoderOutputView *view, + int w, int h); + +#endif diff --git a/video/decode/d3d.c b/video/decode/d3d.c new file mode 100644 index 0000000000..14d94384be --- /dev/null +++ b/video/decode/d3d.c @@ -0,0 +1,241 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#include + +#include "lavc.h" +#include "common/common.h" +#include "common/av_common.h" +#include "video/fmt-conversion.h" +#include "video/mp_image.h" +#include "osdep/windows_utils.h" + +#include "d3d.h" + +// define all the GUIDs used directly here, to avoid problems with inconsistent +// dxva2api.h versions in mingw-w64 and different MSVC version +#include +DEFINE_GUID(DXVA2_ModeMPEG2_VLD, 0xee27417f, 0x5e28, 0x4e65, 0xbe, 0xea, 0x1d, 0x26, 0xb5, 0x08, 0xad, 0xc9); +DEFINE_GUID(DXVA2_ModeMPEG2and1_VLD, 0x86695f12, 0x340e, 0x4f04, 0x9f, 0xd3, 0x92, 0x53, 0xdd, 0x32, 0x74, 0x60); + +DEFINE_GUID(DXVA2_ModeH264_E, 0x1b81be68, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5); +DEFINE_GUID(DXVA2_ModeH264_F, 0x1b81be69, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5); +DEFINE_GUID(DXVA_ModeH264_VLD_WithFMOASO_NoFGT, 0xd5f04ff9, 0x3418, 0x45d8, 0x95, 0x61, 0x32, 0xa7, 0x6a, 0xae, 0x2d, 0xdd); +DEFINE_GUID(DXVA_Intel_H264_NoFGT_ClearVideo, 0x604F8E68, 0x4951, 0x4c54, 0x88, 0xFE, 0xAB, 0xD2, 0x5C, 0x15, 0xB3, 0xD6); +DEFINE_GUID(DXVA_ModeH264_VLD_NoFGT_Flash, 0x4245F676, 0x2BBC, 0x4166, 0xa0, 0xBB, 0x54, 0xE7, 0xB8, 0x49, 0xC3, 0x80); + +DEFINE_GUID(DXVA2_ModeVC1_D, 0x1b81beA3, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5); +DEFINE_GUID(DXVA2_ModeVC1_D2010, 0x1b81beA4, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5); // August 2010 update + +DEFINE_GUID(DXVA2_ModeHEVC_VLD_Main, 0x5b11d51b, 0x2f4c, 0x4452, 0xbc, 0xc3, 0x09, 0xf2, 0xa1, 0x16, 0x0c, 0xc0); +DEFINE_GUID(DXVA2_ModeHEVC_VLD_Main10, 0x107af0e0, 0xef1a, 0x4d19, 0xab, 0xa8, 0x67, 0xa1, 0x63, 0x07, 0x3d, 0x13); + +DEFINE_GUID(DXVA2_ModeVP9_VLD_Profile0, 0x463707f8, 0xa1d0, 0x4585, 0x87, 0x6d, 0x83, 0xaa, 0x6d, 0x60, 0xb8, 0x9e); + +DEFINE_GUID(DXVA2_NoEncrypt, 0x1b81beD0, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5); + +static const int PROF_MPEG2_SIMPLE[] = {FF_PROFILE_MPEG2_SIMPLE, 0}; +static const int PROF_MPEG2_MAIN[] = {FF_PROFILE_MPEG2_SIMPLE, + FF_PROFILE_MPEG2_MAIN, 0}; +static const int PROF_H264_HIGH[] = {FF_PROFILE_H264_CONSTRAINED_BASELINE, + FF_PROFILE_H264_MAIN, + FF_PROFILE_H264_HIGH, 0}; +static const int PROF_HEVC_MAIN[] = {FF_PROFILE_HEVC_MAIN, 0}; +static const int PROF_HEVC_MAIN10[] = {FF_PROFILE_HEVC_MAIN, + FF_PROFILE_HEVC_MAIN_10, 0}; + +struct d3dva_mode { + const GUID *guid; + const char *name; + enum AVCodecID codec; + const int *profiles; // NULL or ends with 0 +}; + +#define MODE2(id) &MP_CONCAT(DXVA2_Mode, id), # id +#define MODE(id) &MP_CONCAT(DXVA_, id), # id +// Prefered modes must come first +static const struct d3dva_mode d3dva_modes[] = { + // MPEG-1/2 + {MODE2(MPEG2_VLD), AV_CODEC_ID_MPEG2VIDEO, PROF_MPEG2_SIMPLE}, + {MODE2(MPEG2and1_VLD), AV_CODEC_ID_MPEG2VIDEO, PROF_MPEG2_MAIN}, + {MODE2(MPEG2and1_VLD), AV_CODEC_ID_MPEG1VIDEO}, + + // H.264 + {MODE2(H264_F), AV_CODEC_ID_H264, PROF_H264_HIGH}, + {MODE (Intel_H264_NoFGT_ClearVideo), AV_CODEC_ID_H264, PROF_H264_HIGH}, + {MODE2(H264_E), AV_CODEC_ID_H264, PROF_H264_HIGH}, + {MODE (ModeH264_VLD_WithFMOASO_NoFGT), AV_CODEC_ID_H264, PROF_H264_HIGH}, + {MODE (ModeH264_VLD_NoFGT_Flash), AV_CODEC_ID_H264, PROF_H264_HIGH}, + + // VC-1 / WMV3 + {MODE2(VC1_D), AV_CODEC_ID_VC1}, + {MODE2(VC1_D), AV_CODEC_ID_WMV3}, + {MODE2(VC1_D2010), AV_CODEC_ID_VC1}, + {MODE2(VC1_D2010), AV_CODEC_ID_WMV3}, + + // HEVC + {MODE2(HEVC_VLD_Main), AV_CODEC_ID_HEVC, PROF_HEVC_MAIN}, + {MODE2(HEVC_VLD_Main10), AV_CODEC_ID_HEVC, PROF_HEVC_MAIN10}, + + // VP9 + {MODE2(VP9_VLD_Profile0), AV_CODEC_ID_VP9}, +}; +#undef MODE +#undef MODE2 + +int d3d_probe_codec(const char *decoder) +{ + enum AVCodecID codec = mp_codec_to_av_codec_id(decoder); + for (int i = 0; i < MP_ARRAY_SIZE(d3dva_modes); i++) { + const struct d3dva_mode *mode = &d3dva_modes[i]; + if (mode->codec == codec) + return 0; + } + return HWDEC_ERR_NO_CODEC; +} + +static bool profile_compatible(const struct d3dva_mode *mode, int profile) +{ + if (!mode->profiles) + return true; + + for (int i = 0; mode->profiles[i]; i++){ + if(mode->profiles[i] == profile) + return true; + } + return false; +} + +static bool mode_supported(const struct d3dva_mode *mode, + const GUID *device_modes, UINT n_modes) +{ + for (int i = 0; i < n_modes; i++) { + if (IsEqualGUID(mode->guid, &device_modes[i])) + return true; + } + return false; +} + +struct d3d_decoder_fmt d3d_select_decoder_mode( + struct lavc_ctx *s, const GUID *device_guids, UINT n_guids, + DWORD (*get_dxfmt_cb)(struct lavc_ctx *s, const GUID *guid, int depth)) +{ + struct d3d_decoder_fmt fmt = { + .guid = &GUID_NULL, + .mpfmt_decoded = IMGFMT_NONE, + .dxfmt_decoded = 0, + }; + + // this has the right bit-depth, but is unfortunately not the native format + int sw_img_fmt = pixfmt2imgfmt(s->avctx->sw_pix_fmt); + if (sw_img_fmt == IMGFMT_NONE) + return fmt; + + int depth = IMGFMT_RGB_DEPTH(sw_img_fmt); + int p010 = mp_imgfmt_find(1, 1, 2, 10, MP_IMGFLAG_YUV_NV); + int mpfmt_decoded = depth <= 8 ? IMGFMT_NV12 : p010; + + for (int i = 0; i < MP_ARRAY_SIZE(d3dva_modes); i++) { + const struct d3dva_mode *mode = &d3dva_modes[i]; + if (mode->codec == s->avctx->codec_id && + profile_compatible(mode, s->avctx->profile) && + mode_supported(mode, device_guids, n_guids)) { + + DWORD dxfmt_decoded = get_dxfmt_cb(s, mode->guid, depth); + if (dxfmt_decoded) { + fmt.guid = mode->guid; + fmt.mpfmt_decoded = mpfmt_decoded; + fmt.dxfmt_decoded = dxfmt_decoded; + return fmt; + } + } + } + return fmt; +} + +char *d3d_decoder_guid_to_desc_buf(char *buf, size_t buf_size, + const GUID *mode_guid) +{ + const char *name = ""; + for (int i = 0; i < MP_ARRAY_SIZE(d3dva_modes); i++) { + const struct d3dva_mode *mode = &d3dva_modes[i]; + if (IsEqualGUID(mode->guid, mode_guid)) { + name = mode->name; + break; + } + } + snprintf(buf, buf_size, "%s %s", mp_GUID_to_str(mode_guid), name); + return buf; +} + +void d3d_surface_align(struct lavc_ctx *s, int *w, int *h) +{ + int alignment = 16; + switch (s->avctx->codec_id) { + // decoding MPEG-2 requires additional alignment on some Intel GPUs, but it + // causes issues for H.264 on certain AMD GPUs..... + case AV_CODEC_ID_MPEG2VIDEO: + alignment = 32; + break; + // the HEVC DXVA2 spec asks for 128 pixel aligned surfaces to ensure + // all coding features have enough room to work with + case AV_CODEC_ID_HEVC: + alignment = 128; + break; + } + *w = FFALIGN(*w, alignment); + *h = FFALIGN(*h, alignment); +} + +unsigned d3d_decoder_config_score(struct lavc_ctx *s, + GUID *guidConfigBitstreamEncryption, + UINT ConfigBitstreamRaw) +{ + unsigned score = 0; + if (ConfigBitstreamRaw == 1) { + score = 1; + } else if (s->avctx->codec_id == AV_CODEC_ID_H264 + && ConfigBitstreamRaw == 2) { + score = 2; + } else { + return 0; + } + + if (IsEqualGUID(guidConfigBitstreamEncryption, &DXVA2_NoEncrypt)) + score += 16; + + return score; +} + +BOOL is_clearvideo(const GUID *mode_guid) +{ + return IsEqualGUID(mode_guid, &DXVA_Intel_H264_NoFGT_ClearVideo); +} + +void copy_nv12(struct mp_image *dest, uint8_t *src_bits, + unsigned src_pitch, unsigned surf_height) +{ + struct mp_image buf = {0}; + mp_image_setfmt(&buf, dest->imgfmt); + mp_image_set_size(&buf, dest->w, dest->h); + + buf.planes[0] = src_bits; + buf.stride[0] = src_pitch; + buf.planes[1] = src_bits + src_pitch * surf_height; + buf.stride[1] = src_pitch; + mp_image_copy_gpu(dest, &buf); +} diff --git a/video/decode/d3d.h b/video/decode/d3d.h new file mode 100644 index 0000000000..107a227579 --- /dev/null +++ b/video/decode/d3d.h @@ -0,0 +1,50 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#ifndef MPV_DECODE_D3D_H +#define MPV_DECODE_D3D_H + +#include +#include + +struct mp_image; +struct lavc_ctx; + +struct d3d_decoder_fmt { + const GUID *guid; + int mpfmt_decoded; + DWORD dxfmt_decoded; // D3DFORMAT or DXGI_FORMAT +}; + +int d3d_probe_codec(const char *decode); +struct d3d_decoder_fmt d3d_select_decoder_mode( + struct lavc_ctx *s, const GUID *device_guids, UINT n_guids, + DWORD (*get_dxfmt_cb)(struct lavc_ctx *s, const GUID *guid, int depth)); + +char *d3d_decoder_guid_to_desc_buf(char *buf, size_t buf_size, + const GUID *mode_guid); +#define d3d_decoder_guid_to_desc(guid) d3d_decoder_guid_to_desc_buf((char[256]){0}, 256, (guid)) + +void d3d_surface_align(struct lavc_ctx *s, int *w, int *h); +unsigned d3d_decoder_config_score(struct lavc_ctx *s, + GUID *guidConfigBitstreamEncryption, + UINT ConfigBitstreamRaw); +BOOL is_clearvideo(const GUID *mode_guid); +void copy_nv12(struct mp_image *dest, uint8_t *src_bits, + unsigned src_pitch, unsigned surf_height); + +#endif diff --git a/video/decode/d3d11va.c b/video/decode/d3d11va.c new file mode 100644 index 0000000000..3c0cce6e0d --- /dev/null +++ b/video/decode/d3d11va.c @@ -0,0 +1,498 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#include + +#include "lavc.h" +#include "common/common.h" +#include "common/av_common.h" +#include "osdep/windows_utils.h" +#include "video/fmt-conversion.h" +#include "video/mp_image_pool.h" +#include "video/hwdec.h" + +#include "video/d3d11va.h" +#include "d3d.h" + +#define ADDITIONAL_SURFACES (4 + HWDEC_DELAY_QUEUE_COUNT) + +struct d3d11va_decoder { + ID3D11VideoDecoder *decoder; + struct mp_image_pool *pool; + ID3D11Texture2D *staging; + int mpfmt_decoded; +}; + +struct priv { + struct mp_log *log; + + HMODULE d3d11_dll; + ID3D11Device *device; + ID3D11DeviceContext *device_ctx; + ID3D11VideoDevice *video_dev; + ID3D11VideoContext *video_ctx; + + struct d3d11va_decoder *decoder; + struct mp_image_pool *sw_pool; +}; + +static struct mp_image *d3d11va_allocate_image(struct lavc_ctx *s, int w, int h) +{ + struct priv *p = s->hwdec_priv; + struct mp_image *img = mp_image_pool_get_no_alloc(p->decoder->pool, + IMGFMT_D3D11VA, w, h); + if (!img) + MP_ERR(p, "Failed to get free D3D11VA surface\n"); + return img; +} + +static struct mp_image *d3d11va_retrieve_image(struct lavc_ctx *s, + struct mp_image *img) +{ + HRESULT hr; + struct priv *p = s->hwdec_priv; + ID3D11Texture2D *staging = p->decoder->staging; + ID3D11Texture2D *texture = d3d11_texture_in_mp_image(img); + ID3D11VideoDecoderOutputView *surface = d3d11_surface_in_mp_image(img); + + if (!texture || !surface) { + MP_ERR(p, "Failed to get Direct3D texture and surface from mp_image\n"); + return img; + } + + D3D11_TEXTURE2D_DESC texture_desc; + ID3D11Texture2D_GetDesc(texture, &texture_desc); + if (texture_desc.Width < img->w || texture_desc.Height < img->h) { + MP_ERR(p, "Direct3D11 texture smaller than mp_image dimensions\n"); + return img; + } + + // copy to the staging texture + D3D11_VIDEO_DECODER_OUTPUT_VIEW_DESC surface_desc; + ID3D11VideoDecoderOutputView_GetDesc(surface, &surface_desc); + ID3D11DeviceContext_CopySubresourceRegion( + p->device_ctx, + (ID3D11Resource *)staging, 0, 0, 0, 0, + (ID3D11Resource *)texture, surface_desc.Texture2D.ArraySlice, NULL); + + struct mp_image *sw_img = mp_image_pool_get(p->sw_pool, + p->decoder->mpfmt_decoded, + texture_desc.Width, + texture_desc.Height); + if (!sw_img) { + MP_ERR(p, "Failed to get %s surface from CPU pool\n", + mp_imgfmt_to_name(p->decoder->mpfmt_decoded)); + return img; + } + + // copy staging texture to the cpu mp_image + D3D11_MAPPED_SUBRESOURCE lock; + hr = ID3D11DeviceContext_Map(p->device_ctx, (ID3D11Resource *)staging, + 0, D3D11_MAP_READ, 0, &lock); + if (FAILED(hr)) { + MP_ERR(p, "Failed to map D3D11 surface: %s\n", mp_HRESULT_to_str(hr)); + talloc_free(sw_img); + return img; + } + copy_nv12(sw_img, lock.pData, lock.RowPitch, texture_desc.Height); + ID3D11DeviceContext_Unmap(p->device_ctx, (ID3D11Resource *)staging, 0); + + mp_image_set_size(sw_img, img->w, img->h); + mp_image_copy_attributes(sw_img, img); + talloc_free(img); + return sw_img; +} + +struct d3d11_format { + DXGI_FORMAT format; + const char *name; + int depth; +}; + +#define DFMT(name) MP_CONCAT(DXGI_FORMAT_, name), # name +static const struct d3d11_format d3d11_formats[] = { + {DFMT(NV12), 8}, + {DFMT(P010), 10}, + {DFMT(P016), 16}, +}; +#undef DFMT + +static BOOL d3d11_format_supported(struct lavc_ctx *s, const GUID *guid, + const struct d3d11_format *format) +{ + struct priv *p = s->hwdec_priv; + BOOL is_supported = FALSE; + HRESULT hr = ID3D11VideoDevice_CheckVideoDecoderFormat( + p->video_dev, guid, format->format, &is_supported); + if (FAILED(hr)) { + MP_ERR(p, "Check decoder output format %s for decoder %s: %s\n", + format->name, d3d_decoder_guid_to_desc(guid), + mp_HRESULT_to_str(hr)); + } + return is_supported; +} + +static void dump_decoder_info(struct lavc_ctx *s, const GUID *guid) +{ + struct priv *p = s->hwdec_priv; + char fmts[256] = {0}; + for (int i = 0; i < MP_ARRAY_SIZE(d3d11_formats); i++) { + const struct d3d11_format *format = &d3d11_formats[i]; + if (d3d11_format_supported(s, guid, format)) + mp_snprintf_cat(fmts, sizeof(fmts), " %s", format->name); + } + MP_VERBOSE(p, "%s %s\n", d3d_decoder_guid_to_desc(guid), fmts); +} + +static DWORD get_dxfmt_cb(struct lavc_ctx *s, const GUID *guid, int depth) +{ + for (int i = 0; i < MP_ARRAY_SIZE(d3d11_formats); i++) { + const struct d3d11_format *format = &d3d11_formats[i]; + if (depth <= format->depth && + d3d11_format_supported(s, guid, format)) { + return format->format; + } + } + return 0; +} + +static void d3d11va_destroy_decoder(void *arg) +{ + struct d3d11va_decoder *decoder = arg; + + if (decoder->decoder) + ID3D11VideoDecoder_Release(decoder->decoder); + + if (decoder->staging) + ID3D11Texture2D_Release(decoder->staging); +} + +static int d3d11va_init_decoder(struct lavc_ctx *s, int w, int h) +{ + HRESULT hr; + int ret = -1; + struct priv *p = s->hwdec_priv; + TA_FREEP(&p->decoder); + + void *tmp = talloc_new(NULL); + + UINT n_guids = ID3D11VideoDevice_GetVideoDecoderProfileCount(p->video_dev); + GUID *device_guids = talloc_array(tmp, GUID, n_guids); + for (UINT i = 0; i < n_guids; i++) { + GUID *guid = &device_guids[i]; + hr = ID3D11VideoDevice_GetVideoDecoderProfile(p->video_dev, i, guid); + if (FAILED(hr)) { + MP_ERR(p, "Failed to get VideoDecoderProfile %d: %s\n", + i, mp_HRESULT_to_str(hr)); + goto done; + } + dump_decoder_info(s, guid); + } + + struct d3d_decoder_fmt fmt = + d3d_select_decoder_mode(s, device_guids, n_guids, get_dxfmt_cb); + if (fmt.mpfmt_decoded == IMGFMT_NONE) { + MP_ERR(p, "Failed to find a suitable decoder\n"); + goto done; + } + + struct d3d11va_decoder *decoder = talloc_zero(tmp, struct d3d11va_decoder); + talloc_set_destructor(decoder, d3d11va_destroy_decoder); + decoder->mpfmt_decoded = fmt.mpfmt_decoded; + + int n_surfaces = hwdec_get_max_refs(s) + ADDITIONAL_SURFACES; + int w_align = w, h_align = h; + d3d_surface_align(s, &w_align, &h_align); + + ID3D11Texture2D *texture = NULL; + D3D11_TEXTURE2D_DESC tex_desc = { + .Width = w_align, + .Height = h_align, + .MipLevels = 1, + .Format = fmt.dxfmt_decoded, + .SampleDesc.Count = 1, + .MiscFlags = 0, + .ArraySize = n_surfaces, + .Usage = D3D11_USAGE_DEFAULT, + .BindFlags = D3D11_BIND_DECODER, + .CPUAccessFlags = 0, + }; + hr = ID3D11Device_CreateTexture2D(p->device, &tex_desc, NULL, &texture); + if (FAILED(hr)) { + MP_ERR(p, "Failed to create Direct3D11 texture with %d surfaces: %s\n", + n_surfaces, mp_HRESULT_to_str(hr)); + goto done; + } + + if (s->hwdec->type == HWDEC_D3D11VA_COPY) { + // create staging texture shared with the CPU with mostly the same + // parameters as the above decoder-bound texture + ID3D11Texture2D_GetDesc(texture, &tex_desc); + tex_desc.MipLevels = 1; + tex_desc.MiscFlags = 0; + tex_desc.ArraySize = 1; + tex_desc.Usage = D3D11_USAGE_STAGING; + tex_desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + tex_desc.BindFlags = 0; + hr = ID3D11Device_CreateTexture2D(p->device, &tex_desc, NULL, + &decoder->staging); + if (FAILED(hr)) { + MP_ERR(p, "Failed to create staging texture: %s\n", + mp_HRESULT_to_str(hr)); + goto done; + } + } + + // pool to hold the mp_image wrapped surfaces + decoder->pool = talloc_steal(decoder, mp_image_pool_new(n_surfaces)); + // array of the same surfaces (needed by ffmpeg) + ID3D11VideoDecoderOutputView **surfaces = + talloc_array_ptrtype(decoder->pool, surfaces, n_surfaces); + + D3D11_VIDEO_DECODER_OUTPUT_VIEW_DESC view_desc = { + .DecodeProfile = *fmt.guid, + .ViewDimension = D3D11_VDOV_DIMENSION_TEXTURE2D, + }; + for (int i = 0; i < n_surfaces; i++) { + ID3D11VideoDecoderOutputView **surface = &surfaces[i]; + view_desc.Texture2D.ArraySlice = i; + hr = ID3D11VideoDevice_CreateVideoDecoderOutputView( + p->video_dev, (ID3D11Resource *)texture, &view_desc, surface); + if (FAILED(hr)) { + MP_ERR(p, "Failed getting decoder output view %d: %s\n", + i, mp_HRESULT_to_str(hr)); + goto done; + } + struct mp_image *img = d3d11va_new_ref(*surface, w, h); + ID3D11VideoDecoderOutputView_Release(*surface); // transferred to img + if (!img) { + MP_ERR(p, "Failed to create D3D11VA image %d\n", i); + goto done; + } + mp_image_pool_add(decoder->pool, img); // transferred to pool + } + + D3D11_VIDEO_DECODER_DESC decoder_desc = { + .Guid = *fmt.guid, + .SampleWidth = w, + .SampleHeight = h, + .OutputFormat = fmt.dxfmt_decoded, + }; + UINT n_cfg; + hr = ID3D11VideoDevice_GetVideoDecoderConfigCount(p->video_dev, + &decoder_desc, &n_cfg); + if (FAILED(hr)) { + MP_ERR(p, "Failed to get number of decoder configurations: %s)", + mp_HRESULT_to_str(hr)); + goto done; + } + + // pick the config with the highest score + D3D11_VIDEO_DECODER_CONFIG *decoder_config = + talloc_zero(decoder, D3D11_VIDEO_DECODER_CONFIG); + unsigned max_score = 0; + for (UINT i = 0; i < n_cfg; i++) { + D3D11_VIDEO_DECODER_CONFIG cfg; + hr = ID3D11VideoDevice_GetVideoDecoderConfig(p->video_dev, + &decoder_desc, + i, &cfg); + if (FAILED(hr)) { + MP_ERR(p, "Failed to get decoder config %d: %s\n", + i, mp_HRESULT_to_str(hr)); + goto done; + } + unsigned score = d3d_decoder_config_score( + s, &cfg.guidConfigBitstreamEncryption, cfg.ConfigBitstreamRaw); + if (score > max_score) { + max_score = score; + *decoder_config = cfg; + } + } + if (!max_score) { + MP_ERR(p, "Failed to find a suitable decoder configuration\n"); + goto done; + } + + hr = ID3D11VideoDevice_CreateVideoDecoder(p->video_dev, &decoder_desc, + decoder_config, + &decoder->decoder); + if (FAILED(hr)) { + MP_ERR(p, "Failed to create video decoder: %s\n", + mp_HRESULT_to_str(hr)); + goto done; + } + + struct AVD3D11VAContext *avd3d11va_ctx = s->avctx->hwaccel_context; + avd3d11va_ctx->decoder = decoder->decoder; + avd3d11va_ctx->video_context = p->video_ctx; + avd3d11va_ctx->cfg = decoder_config; + avd3d11va_ctx->surface_count = n_surfaces; + avd3d11va_ctx->surface = surfaces; + avd3d11va_ctx->workaround = is_clearvideo(fmt.guid) ? + FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO : 0; + + p->decoder = talloc_steal(NULL, decoder); + ret = 0; +done: + // still referenced by pool images / surfaces + if (texture) + ID3D11Texture2D_Release(texture); + + talloc_free(tmp); + return ret; +} + +static void destroy_device(struct lavc_ctx *s) +{ + struct priv *p = s->hwdec_priv; + + if (p->device) + ID3D11Device_Release(p->device); + + if (p->device_ctx) + ID3D11DeviceContext_Release(p->device_ctx); + + if (p->d3d11_dll) + FreeLibrary(p->d3d11_dll); +} + +static bool create_device(struct lavc_ctx *s, BOOL thread_safe) +{ + HRESULT hr; + struct priv *p = s->hwdec_priv; + + p->d3d11_dll = LoadLibrary(L"d3d11.dll"); + if (!p->d3d11_dll) { + MP_ERR(p, "Failed to load D3D11 library\n"); + return false; + } + + PFN_D3D11_CREATE_DEVICE CreateDevice = + (void *)GetProcAddress(p->d3d11_dll, "D3D11CreateDevice"); + if (!CreateDevice) { + MP_ERR(p, "Failed to get D3D11CreateDevice symbol from DLL: %s\n", + mp_LastError_to_str()); + return false; + } + + hr = CreateDevice(NULL, D3D_DRIVER_TYPE_HARDWARE, NULL, + D3D11_CREATE_DEVICE_VIDEO_SUPPORT, NULL, 0, + D3D11_SDK_VERSION, &p->device, NULL, &p->device_ctx); + if (FAILED(hr)) { + MP_ERR(p, "Failed to create D3D11 Device: %s\n", + mp_HRESULT_to_str(hr)); + return false; + } + + ID3D10Multithread *multithread; + hr = ID3D11Device_QueryInterface(p->device, &IID_ID3D10Multithread, + (void **)&multithread); + if (FAILED(hr)) { + MP_ERR(p, "Failed to get Multithread interface: %s\n", + mp_HRESULT_to_str(hr)); + return false; + } + ID3D10Multithread_SetMultithreadProtected(multithread, thread_safe); + ID3D10Multithread_Release(multithread); + return true; +} + +static void d3d11va_uninit(struct lavc_ctx *s) +{ + struct priv *p = s->hwdec_priv; + if (!p) + return; + + talloc_free(p->decoder); + av_freep(&s->avctx->hwaccel_context); + + if (p->video_dev) + ID3D11VideoDevice_Release(p->video_dev); + + if (p->video_ctx) + ID3D11VideoContext_Release(p->video_ctx); + + destroy_device(s); + + TA_FREEP(&s->hwdec_priv); +} + +static int d3d11va_init(struct lavc_ctx *s) +{ + HRESULT hr; + struct priv *p = talloc_zero(NULL, struct priv); + if (!p) + return -1; + + s->hwdec_priv = p; + p->log = mp_log_new(s, s->log, "d3d11va"); + if (s->hwdec->type == HWDEC_D3D11VA_COPY) { + mp_check_gpu_memcpy(p->log, NULL); + p->sw_pool = talloc_steal(p, mp_image_pool_new(17)); + } + + if (!create_device(s, FALSE)) + goto fail; + + hr = ID3D11DeviceContext_QueryInterface(p->device_ctx, + &IID_ID3D11VideoContext, + (void **)&p->video_ctx); + if (FAILED(hr)) { + MP_ERR(p, "Failed to get VideoContext interface: %s\n", + mp_HRESULT_to_str(hr)); + goto fail; + } + + hr = ID3D11Device_QueryInterface(p->device, + &IID_ID3D11VideoDevice, + (void **)&p->video_dev); + if (FAILED(hr)) { + MP_ERR(p, "Failed to get VideoDevice interface. %s\n", + mp_HRESULT_to_str(hr)); + goto fail; + } + + s->avctx->hwaccel_context = av_d3d11va_alloc_context(); + if (!s->avctx->hwaccel_context) { + MP_ERR(p, "Failed to allocate hwaccel_context\n"); + goto fail; + } + + return 0; +fail: + d3d11va_uninit(s); + return -1; +} + +static int d3d11va_probe(struct vd_lavc_hwdec *hwdec, + struct mp_hwdec_info *info, + const char *decoder) +{ + hwdec_request_api(info, "d3d11va"); + return d3d_probe_codec(decoder); +} + +const struct vd_lavc_hwdec mp_vd_lavc_d3d11va_copy = { + .type = HWDEC_D3D11VA_COPY, + .image_format = IMGFMT_D3D11VA, + .probe = d3d11va_probe, + .init = d3d11va_init, + .uninit = d3d11va_uninit, + .init_decoder = d3d11va_init_decoder, + .allocate_image = d3d11va_allocate_image, + .process_image = d3d11va_retrieve_image, +}; diff --git a/video/decode/vd_lavc.c b/video/decode/vd_lavc.c index 1cb5bc9897..478c6d56da 100644 --- a/video/decode/vd_lavc.c +++ b/video/decode/vd_lavc.c @@ -126,6 +126,7 @@ extern const struct vd_lavc_hwdec mp_vd_lavc_vaapi; extern const struct vd_lavc_hwdec mp_vd_lavc_vaapi_copy; extern const struct vd_lavc_hwdec mp_vd_lavc_dxva2; extern const struct vd_lavc_hwdec mp_vd_lavc_dxva2_copy; +extern const struct vd_lavc_hwdec mp_vd_lavc_d3d11va_copy; extern const struct vd_lavc_hwdec mp_vd_lavc_rpi; extern const struct vd_lavc_hwdec mp_vd_lavc_mediacodec; @@ -147,6 +148,9 @@ static const struct vd_lavc_hwdec *const hwdec_list[] = { &mp_vd_lavc_dxva2, &mp_vd_lavc_dxva2_copy, #endif +#if HAVE_D3D11VA_HWACCEL + &mp_vd_lavc_d3d11va_copy, +#endif #if HAVE_ANDROID &mp_vd_lavc_mediacodec, #endif diff --git a/video/fmt-conversion.c b/video/fmt-conversion.c index c23b7bb232..53342060b1 100644 --- a/video/fmt-conversion.c +++ b/video/fmt-conversion.c @@ -109,6 +109,9 @@ static const struct { #endif {IMGFMT_VAAPI, AV_PIX_FMT_VAAPI_VLD}, {IMGFMT_DXVA2, AV_PIX_FMT_DXVA2_VLD}, +#if HAVE_D3D11VA_HWACCEL + {IMGFMT_D3D11VA, AV_PIX_FMT_D3D11VA_VLD}, +#endif #if HAVE_AV_PIX_FMT_MMAL {IMGFMT_MMAL, AV_PIX_FMT_MMAL}, #endif diff --git a/video/hwdec.h b/video/hwdec.h index cfbb2eaf29..9585e32e25 100644 --- a/video/hwdec.h +++ b/video/hwdec.h @@ -15,8 +15,9 @@ enum hwdec_type { HWDEC_VAAPI_COPY = 5, HWDEC_DXVA2 = 6, HWDEC_DXVA2_COPY = 7, - HWDEC_RPI = 8, - HWDEC_MEDIACODEC = 9, + HWDEC_D3D11VA_COPY = 8, + HWDEC_RPI = 9, + HWDEC_MEDIACODEC = 10, }; // hwdec_type names (options.c) diff --git a/video/img_format.h b/video/img_format.h index a58e445ea2..605dc920bd 100644 --- a/video/img_format.h +++ b/video/img_format.h @@ -195,7 +195,8 @@ enum mp_imgfmt { IMGFMT_VDPAU, // VdpVideoSurface IMGFMT_VDPAU_OUTPUT, // VdpOutputSurface IMGFMT_VAAPI, - IMGFMT_DXVA2, // IDirect3DSurface9 (NV12) + IMGFMT_D3D11VA, // ID3D11VideoDecoderOutputView (NV12/P010/P016) + IMGFMT_DXVA2, // IDirect3DSurface9 (NV12/P010/P016) IMGFMT_MMAL, // MMAL_BUFFER_HEADER_T IMGFMT_VIDEOTOOLBOX, // CVPixelBufferRef diff --git a/wscript b/wscript index 47639d303e..f98656a0fa 100644 --- a/wscript +++ b/wscript @@ -845,10 +845,20 @@ hwaccel_features = [ 'desc': 'libavcodec DXVA2 hwaccel', 'deps': [ 'win32' ], 'func': check_headers('libavcodec/dxva2.h', use='libav'), + }, { + 'name': '--d3d11va-hwaccel', + 'desc': 'libavcodec D3D11VA hwaccel', + 'deps': [ 'win32' ], + 'func': check_headers('libavcodec/d3d11va.h', use='libav'), + }, { + 'name': 'd3d-hwaccel', + 'desc': 'Direct3D hwaccel', + 'deps_any': [ 'dxva2-hwaccel', 'd3d11va-hwaccel' ], + 'func': check_true }, { 'name': 'sse4-intrinsics', 'desc': 'GCC SSE4 intrinsics for GPU memcpy', - 'deps_any': [ 'dxva2-hwaccel', 'vaapi-hwaccel' ], + 'deps_any': [ 'd3d-hwaccel', 'vaapi-hwaccel' ], 'func': check_cc(fragment=load_fragment('sse.c')), } ] diff --git a/wscript_build.py b/wscript_build.py index 21a6a5d12d..f996a6e57b 100644 --- a/wscript_build.py +++ b/wscript_build.py @@ -286,11 +286,14 @@ def build(ctx): ( "video/mp_image_pool.c" ), ( "video/sws_utils.c" ), ( "video/dxva2.c", "dxva2-hwaccel" ), + ( "video/d3d11va.c", "d3d11va-hwaccel" ), ( "video/vaapi.c", "vaapi" ), ( "video/vdpau.c", "vdpau" ), ( "video/vdpau_mixer.c", "vdpau" ), ( "video/decode/dec_video.c"), ( "video/decode/dxva2.c", "dxva2-hwaccel" ), + ( "video/decode/d3d11va.c", "d3d11va-hwaccel" ), + ( "video/decode/d3d.c", "d3d-hwaccel" ), ( "video/decode/rpi.c", "rpi" ), ( "video/decode/vaapi.c", "vaapi-hwaccel" ), ( "video/decode/vd_lavc.c" ), -- cgit v1.2.3