diff options
-rw-r--r-- | DOCS/man/options.rst | 41 | ||||
-rw-r--r-- | options/options.c | 5 | ||||
-rw-r--r-- | options/options.h | 1 | ||||
-rw-r--r-- | video/out/d3d11/context.c | 235 | ||||
-rw-r--r-- | video/out/d3d11/hwdec_d3d11va.c | 195 | ||||
-rw-r--r-- | video/out/d3d11/ra_d3d11.c | 2235 | ||||
-rw-r--r-- | video/out/d3d11/ra_d3d11.h | 34 | ||||
-rw-r--r-- | video/out/gpu/context.c | 7 | ||||
-rw-r--r-- | video/out/gpu/d3d11_helpers.c (renamed from video/out/opengl/d3d11_helpers.c) | 26 | ||||
-rw-r--r-- | video/out/gpu/d3d11_helpers.h (renamed from video/out/opengl/d3d11_helpers.h) | 6 | ||||
-rw-r--r-- | video/out/gpu/hwdec.c | 4 | ||||
-rw-r--r-- | video/out/opengl/context_angle.c | 2 | ||||
-rw-r--r-- | wscript | 17 | ||||
-rw-r--r-- | wscript_build.py | 5 |
14 files changed, 2797 insertions, 16 deletions
diff --git a/DOCS/man/options.rst b/DOCS/man/options.rst index a6c2136947..d6dfc48535 100644 --- a/DOCS/man/options.rst +++ b/DOCS/man/options.rst @@ -686,8 +686,8 @@ Video :dxva2: requires ``--vo=gpu`` with ``--gpu-context=angle`` or ``--gpu-context=dxinterop`` (Windows only) :dxva2-copy: copies video back to system RAM (Windows only) - :d3d11va: requires ``--vo=gpu`` with ``--gpu-context=angle`` - (Windows 8+ only) + :d3d11va: requires ``--vo=gpu`` with ``--gpu-context=d3d11`` or + ``--gpu-context=angle`` (Windows 8+ only) :d3d11va-copy: copies video back to system RAM (Windows 8+ only) :mediacodec: requires ``--vo=mediacodec_embed`` (Android only) :mediacodec-copy: copies video back to system RAM (Android only) @@ -775,10 +775,11 @@ Video BT.601 or BT.709, a forced, low-quality but correct RGB conversion is performed. Otherwise, the result will be totally incorrect. - ``d3d11va`` is usually safe (if used with ANGLE builds that support - ``EGL_KHR_stream path`` - otherwise, it converts to RGB), except that - 10 bit input (HEVC main 10 profiles) will be rounded down to 8 bits, - which results in reduced quality. + ``d3d11va`` is safe when used with the ``d3d11`` backend. If used with + ``angle`` is it usually safe, except that 10 bit input (HEVC main 10 + profiles) will be rounded down to 8 bits, which will result in reduced + quality. Also note that with very old ANGLE builds (without + ``EGL_KHR_stream path``,) all input will be converted to RGB. ``dxva2`` is not safe. It appears to always use BT.601 for forced RGB conversion, but actual behavior depends on the GPU drivers. Some drivers @@ -4272,6 +4273,30 @@ The following video options are currently all specific to ``--vo=gpu`` and as mpv's vulkan implementation currently does not try and protect textures against concurrent access. +``--d3d11-warp=<yes|no|auto>`` + Use WARP (Windows Advanced Rasterization Platform) with the D3D11 GPU + backend (default: auto). This is a high performance software renderer. By + default, it is only used when the system has no hardware adapters that + support D3D11. While the extended GPU features will work with WARP, they + can be very slow. + +``--d3d11-feature-level=<12_1|12_0|11_1|11_0|10_1|10_0|9_3|9_2|9_1>`` + Select a specific feature level when using the D3D11 GPU backend. By + default, the highest available feature level is used. This option can be + used to select a lower feature level, which is mainly useful for debugging. + Most extended GPU features will not work at 9_x feature levels. + +``--d3d11-flip=<yes|no>`` + Enable flip-model presentation, which avoids unnecessarily copying the + backbuffer by sharing surfaces with the DWM (default: yes). This may cause + performance issues with older drivers. If flip-model presentation is not + supported (for example, on Windows 7 without the platform update), mpv will + automatically fall back to the older bitblt presentation model. + +``--d3d11-sync-interval=<0..4>`` + Schedule each frame to be presented for this number of VBlank intervals. + (default: 1) Setting to 1 will enable VSync, setting to 0 will disable it. + ``--spirv-compiler=<compiler>`` Controls which compiler is used to translate GLSL to SPIR-V. This is (currently) only relevant for ``--gpu-api=vulkan``. The possible choices @@ -4694,6 +4719,8 @@ The following video options are currently all specific to ``--vo=gpu`` and Win32, using WGL for rendering and Direct3D 9Ex for presentation. Works on Nvidia and AMD. Newer Intel chips with the latest drivers may also work. + d3d11 + Win32, with native Direct3D 11 rendering. x11 X11/GLX x11vk @@ -4728,6 +4755,8 @@ The following video options are currently all specific to ``--vo=gpu`` and Allow only OpenGL (requires OpenGL 2.1+ or GLES 2.0+) vulkan Allow only Vulkan (requires a valid/working ``--spirv-compiler``) + d3d11 + Allow only ``--gpu-context=d3d11`` ``--opengl-es=<mode>`` Controls which type of OpenGL context will be accepted: diff --git a/options/options.c b/options/options.c index 22e448d22f..3bf4ee1108 100644 --- a/options/options.c +++ b/options/options.c @@ -90,6 +90,7 @@ extern const struct m_obj_list ao_obj_list; extern const struct m_sub_options opengl_conf; extern const struct m_sub_options vulkan_conf; extern const struct m_sub_options spirv_conf; +extern const struct m_sub_options d3d11_conf; extern const struct m_sub_options angle_conf; extern const struct m_sub_options cocoa_conf; @@ -699,6 +700,10 @@ const m_option_t mp_opts[] = { OPT_SUBSTRUCT("", vulkan_opts, vulkan_conf, 0), #endif +#if HAVE_D3D11 + OPT_SUBSTRUCT("", d3d11_opts, d3d11_conf, 0), +#endif + #if HAVE_EGL_ANGLE_WIN32 OPT_SUBSTRUCT("", angle_opts, angle_conf, 0), #endif diff --git a/options/options.h b/options/options.h index 0d697a717c..47a4622430 100644 --- a/options/options.h +++ b/options/options.h @@ -332,6 +332,7 @@ typedef struct MPOpts { struct opengl_opts *opengl_opts; struct vulkan_opts *vulkan_opts; struct spirv_opts *spirv_opts; + struct d3d11_opts *d3d11_opts; struct cocoa_opts *cocoa_opts; struct dvd_opts *dvd_opts; diff --git a/video/out/d3d11/context.c b/video/out/d3d11/context.c new file mode 100644 index 0000000000..018fd99934 --- /dev/null +++ b/video/out/d3d11/context.c @@ -0,0 +1,235 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "common/msg.h" +#include "options/m_config.h" +#include "osdep/windows_utils.h" + +#include "video/out/gpu/context.h" +#include "video/out/gpu/d3d11_helpers.h" +#include "video/out/gpu/spirv.h" +#include "video/out/w32_common.h" +#include "ra_d3d11.h" + +struct d3d11_opts { + int feature_level; + int warp; + int flip; + int sync_interval; +}; + +#define OPT_BASE_STRUCT struct d3d11_opts +const struct m_sub_options d3d11_conf = { + .opts = (const struct m_option[]) { + OPT_CHOICE("d3d11-warp", warp, 0, + ({"auto", -1}, + {"no", 0}, + {"yes", 1})), + OPT_CHOICE("d3d11-feature-level", feature_level, 0, + ({"12_1", D3D_FEATURE_LEVEL_12_1}, + {"12_0", D3D_FEATURE_LEVEL_12_0}, + {"11_1", D3D_FEATURE_LEVEL_11_1}, + {"11_0", D3D_FEATURE_LEVEL_11_0}, + {"10_1", D3D_FEATURE_LEVEL_10_1}, + {"10_0", D3D_FEATURE_LEVEL_10_0}, + {"9_3", D3D_FEATURE_LEVEL_9_3}, + {"9_2", D3D_FEATURE_LEVEL_9_2}, + {"9_1", D3D_FEATURE_LEVEL_9_1})), + OPT_FLAG("d3d11-flip", flip, 0), + OPT_INTRANGE("d3d11-sync-interval", sync_interval, 0, 0, 4), + {0} + }, + .defaults = &(const struct d3d11_opts) { + .feature_level = D3D_FEATURE_LEVEL_12_1, + .warp = -1, + .flip = 1, + .sync_interval = 1, + }, + .size = sizeof(struct d3d11_opts) +}; + +struct priv { + struct d3d11_opts *opts; + + struct ra_tex *backbuffer; + ID3D11Device *device; + IDXGISwapChain *swapchain; +}; + +static struct ra_tex *get_backbuffer(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + ID3D11Texture2D *backbuffer = NULL; + struct ra_tex *tex = NULL; + HRESULT hr; + + hr = IDXGISwapChain_GetBuffer(p->swapchain, 0, &IID_ID3D11Texture2D, + (void**)&backbuffer); + if (FAILED(hr)) { + MP_ERR(ctx, "Couldn't get swapchain image\n"); + goto done; + } + + tex = ra_d3d11_wrap_tex(ctx->ra, (ID3D11Resource *)backbuffer); +done: + SAFE_RELEASE(backbuffer); + return tex; +} + +static bool resize(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + HRESULT hr; + + ra_tex_free(ctx->ra, &p->backbuffer); + + hr = IDXGISwapChain_ResizeBuffers(p->swapchain, 0, ctx->vo->dwidth, + ctx->vo->dheight, DXGI_FORMAT_UNKNOWN, 0); + if (FAILED(hr)) { + MP_FATAL(ctx, "Couldn't resize swapchain: %s\n", mp_HRESULT_to_str(hr)); + return false; + } + + p->backbuffer = get_backbuffer(ctx); + + return true; +} + +static bool d3d11_reconfig(struct ra_ctx *ctx) +{ + vo_w32_config(ctx->vo); + return resize(ctx); +} + +static int d3d11_color_depth(struct ra_swapchain *sw) +{ + return 8; +} + +static bool d3d11_start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo) +{ + struct priv *p = sw->priv; + *out_fbo = (struct ra_fbo) { + .tex = p->backbuffer, + .flip = false, + }; + return true; +} + +static bool d3d11_submit_frame(struct ra_swapchain *sw, + const struct vo_frame *frame) +{ + ra_d3d11_flush(sw->ctx->ra); + return true; +} + +static void d3d11_swap_buffers(struct ra_swapchain *sw) +{ + struct priv *p = sw->priv; + IDXGISwapChain_Present(p->swapchain, p->opts->sync_interval, 0); +} + +static int d3d11_control(struct ra_ctx *ctx, int *events, int request, void *arg) +{ + int ret = vo_w32_control(ctx->vo, events, request, arg); + if (*events & VO_EVENT_RESIZE) { + if (!resize(ctx)) + return VO_ERROR; + } + return ret; +} + +static void d3d11_uninit(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + ra_tex_free(ctx->ra, &p->backbuffer); + SAFE_RELEASE(p->swapchain); + vo_w32_uninit(ctx->vo); + SAFE_RELEASE(p->device); + + // Destory the RA last to prevent objects we hold from showing up in D3D's + // leak checker + ctx->ra->fns->destroy(ctx->ra); +} + +static const struct ra_swapchain_fns d3d11_swapchain = { + .color_depth = d3d11_color_depth, + .start_frame = d3d11_start_frame, + .submit_frame = d3d11_submit_frame, + .swap_buffers = d3d11_swap_buffers, +}; + +static bool d3d11_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); + p->opts = mp_get_config_group(ctx, ctx->global, &d3d11_conf); + + struct ra_swapchain *sw = ctx->swapchain = talloc_zero(ctx, struct ra_swapchain); + sw->priv = p; + sw->ctx = ctx; + sw->fns = &d3d11_swapchain; + + struct d3d11_device_opts dopts = { + .debug = ctx->opts.debug, + .allow_warp = p->opts->warp != 0, + .force_warp = p->opts->warp == 1, + .max_feature_level = p->opts->feature_level, + .max_frame_latency = ctx->opts.swapchain_depth, + }; + if (!mp_d3d11_create_present_device(ctx->log, &dopts, &p->device)) + goto error; + + if (!spirv_compiler_init(ctx)) + goto error; + ctx->ra = ra_d3d11_create(p->device, ctx->log, ctx->spirv); + if (!ctx->ra) + goto error; + + if (!vo_w32_init(ctx->vo)) + goto error; + + struct d3d11_swapchain_opts scopts = { + .window = vo_w32_hwnd(ctx->vo), + .width = ctx->vo->dwidth, + .height = ctx->vo->dheight, + .flip = p->opts->flip, + // Add one frame for the backbuffer and one frame of "slack" to reduce + // contention with the window manager when acquiring the backbuffer + .length = ctx->opts.swapchain_depth + 2, + .usage = DXGI_USAGE_RENDER_TARGET_OUTPUT, + }; + if (!mp_d3d11_create_swapchain(p->device, ctx->log, &scopts, &p->swapchain)) + goto error; + + p->backbuffer = get_backbuffer(ctx); + + return true; + +error: + d3d11_uninit(ctx); + return false; +} + +const struct ra_ctx_fns ra_ctx_d3d11 = { + .type = "d3d11", + .name = "d3d11", + .reconfig = d3d11_reconfig, + .control = d3d11_control, + .init = d3d11_init, + .uninit = d3d11_uninit, +}; diff --git a/video/out/d3d11/hwdec_d3d11va.c b/video/out/d3d11/hwdec_d3d11va.c new file mode 100644 index 0000000000..f179298ac1 --- /dev/null +++ b/video/out/d3d11/hwdec_d3d11va.c @@ -0,0 +1,195 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <windows.h> +#include <d3d11.h> +#include <d3d11_1.h> + +#include "config.h" + +#include "common/common.h" +#include "osdep/windows_utils.h" +#include "video/hwdec.h" +#include "video/decode/d3d.h" +#include "video/out/d3d11/ra_d3d11.h" +#include "video/out/gpu/hwdec.h" + +struct priv_owner { + struct mp_hwdec_ctx hwctx; + ID3D11Device *device; + ID3D11Device1 *device1; +}; + +struct priv { + ID3D11DeviceContext1 *ctx; + ID3D11Texture2D *copy_tex; +}; + +static void uninit(struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + if (p->hwctx.ctx) + hwdec_devices_remove(hw->devs, &p->hwctx); + SAFE_RELEASE(p->device); + SAFE_RELEASE(p->device1); +} + +static int init(struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + HRESULT hr; + + if (!ra_is_d3d11(hw->ra)) + return -1; + p->device = ra_d3d11_get_device(hw->ra); + if (!p->device) + return -1; + + // D3D11VA requires Direct3D 11.1, so this should always succeed + hr = ID3D11Device_QueryInterface(p->device, &IID_ID3D11Device1, + (void**)&p->device1); + if (FAILED(hr)) { + MP_ERR(hw, "Failed to get D3D11.1 interface: %s\n", + mp_HRESULT_to_str(hr)); + return -1; + } + + ID3D10Multithread *multithread; + hr = ID3D11Device_QueryInterface(p->device, &IID_ID3D10Multithread, + (void **)&multithread); + if (FAILED(hr)) { + MP_ERR(hw, "Failed to get Multithread interface: %s\n", + mp_HRESULT_to_str(hr)); + return -1; + } + ID3D10Multithread_SetMultithreadProtected(multithread, TRUE); + ID3D10Multithread_Release(multithread); + + p->hwctx = (struct mp_hwdec_ctx){ + .type = HWDEC_D3D11VA, + .driver_name = hw->driver->name, + .ctx = p->device, + .av_device_ref = d3d11_wrap_device_ref(p->device), + }; + hwdec_devices_add(hw->devs, &p->hwctx); + return 0; +} + +static void mapper_uninit(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + for (int i = 0; i < 4; i++) + ra_tex_free(mapper->ra, &mapper->tex[i]); + SAFE_RELEASE(p->copy_tex); + SAFE_RELEASE(p->ctx); +} + +static int mapper_init(struct ra_hwdec_mapper *mapper) +{ + struct priv_owner *o = mapper->owner->priv; + struct priv *p = mapper->priv; + HRESULT hr; + + mapper->dst_params = mapper->src_params; + mapper->dst_params.imgfmt = mapper->src_params.hw_subfmt; + mapper->dst_params.hw_subfmt = 0; + + struct ra_imgfmt_desc desc = {0}; + struct mp_image layout = {0}; + + if (!ra_get_imgfmt_desc(mapper->ra, mapper->dst_params.imgfmt, &desc)) + return -1; + + mp_image_set_params(&layout, &mapper->dst_params); + + DXGI_FORMAT copy_fmt; + switch (mapper->dst_params.imgfmt) { + case IMGFMT_NV12: copy_fmt = DXGI_FORMAT_NV12; break; + case IMGFMT_P010: copy_fmt = DXGI_FORMAT_P010; break; + default: return -1; + } + + // We copy decoder images to an intermediate texture. This is slower than + // the zero-copy path, but according to MSDN, decoder textures should not + // be bound to SRVs, so it is technically correct, and it works around some + // driver "bugs" that can happen with the zero-copy path. It also allows + // samplers to work correctly when the decoder image includes padding. + D3D11_TEXTURE2D_DESC copy_desc = { + .Width = mapper->dst_params.w, + .Height = mapper->dst_params.h, + .MipLevels = 1, + .ArraySize = 1, + .SampleDesc.Count = 1, + .Format = copy_fmt, + .BindFlags = D3D11_BIND_SHADER_RESOURCE, + }; + hr = ID3D11Device_CreateTexture2D(o->device, ©_desc, NULL, &p->copy_tex); + if (FAILED(hr)) { + MP_FATAL(mapper, "Could not create shader resource texture\n"); + return -1; + } + + for (int i = 0; i < desc.num_planes; i++) { + mapper->tex[i] = ra_d3d11_wrap_tex_video(mapper->ra, p->copy_tex, + mp_image_plane_w(&layout, i), + mp_image_plane_h(&layout, i), + desc.planes[i]); + if (!mapper->tex[i]) { + MP_FATAL(mapper, "Could not create RA texture view\n"); + return -1; + } + } + + ID3D11Device1_GetImmediateContext1(o->device1, &p->ctx); + + return 0; +} + +static int mapper_map(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + ID3D11Texture2D *tex = (void *)mapper->src->planes[0]; + int subresource = (intptr_t)mapper->src->planes[1]; + + ID3D11DeviceContext1_CopySubresourceRegion1(p->ctx, + (ID3D11Resource *)p->copy_tex, 0, 0, 0, 0, + (ID3D11Resource *)tex, subresource, (&(D3D11_BOX) { + .left = 0, + .top = 0, + .front = 0, + .right = mapper->dst_params.w, + .bottom = mapper->dst_params.h, + .back = 1, + }), D3D11_COPY_DISCARD); + + return 0; +} + +const struct ra_hwdec_driver ra_hwdec_d3d11va = { + .name = "d3d11va", + .priv_size = sizeof(struct priv_owner), + .api = HWDEC_D3D11VA, + .imgfmts = {IMGFMT_D3D11VA, IMGFMT_D3D11NV12, 0}, + .init = init, + .uninit = uninit, + .mapper = &(const struct ra_hwdec_mapper_driver){ + .priv_size = sizeof(struct priv), + .init = mapper_init, + .uninit = mapper_uninit, + .map = mapper_map, + }, +}; diff --git a/video/out/d3d11/ra_d3d11.c b/video/out/d3d11/ra_d3d11.c new file mode 100644 index 0000000000..372b65d49f --- /dev/null +++ b/video/out/d3d11/ra_d3d11.c @@ -0,0 +1,2235 @@ +#include <windows.h> +#include <versionhelpers.h> +#include <d3d11_1.h> +#include <d3d11sdklayers.h> +#include <dxgi1_2.h> +#include <d3dcompiler.h> +#include <crossc.h> + +#include "common/msg.h" +#include "osdep/io.h" +#include "osdep/subprocess.h" +#include "osdep/windows_utils.h" +#include "video/out/gpu/spirv.h" +#include "video/out/gpu/utils.h" + +#include "ra_d3d11.h" + +#ifndef D3D11_1_UAV_SLOT_COUNT +#define D3D11_1_UAV_SLOT_COUNT (64) +#endif + +struct ra_d3d11 { + struct spirv_compiler *spirv; + + ID3D11Device *dev; + ID3D11Device1 *dev1; + ID3D11DeviceContext *ctx; + ID3D11DeviceContext1 *ctx1; + pD3DCompile D3DCompile; + + // Debug interfaces (--gpu-debug) + ID3D11Debug *debug; + ID3D11InfoQueue *iqueue; + + // Device capabilities + D3D_FEATURE_LEVEL fl; + bool has_clear_view; + int max_uavs; + + // Streaming dynamic vertex buffer, which is used for all renderpasses + ID3D11Buffer *vbuf; + size_t vbuf_size; + size_t vbuf_used; + + // clear() renderpass resources (only used when has_clear_view is false) + ID3D11PixelShader *clear_ps; + ID3D11VertexShader *clear_vs; + ID3D11InputLayout *clear_layout; + ID3D11Buffer *clear_vbuf; + ID3D11Buffer *clear_cbuf; + + // blit() renderpass resources + ID3D11PixelShader *blit_float_ps; + ID3D11VertexShader *blit_vs; + ID3D11InputLayout *blit_layout; + ID3D11Buffer *blit_vbuf; + ID3D11SamplerState *blit_sampler; +}; + +struct d3d_tex { + // res mirrors one of tex1d, tex2d or tex3d for convenience. It does not + // hold an additional reference to the texture object. + ID3D11Resource *res; + + ID3D11Texture1D *tex1d; + ID3D11Texture2D *tex2d; + ID3D11Texture3D *tex3d; + + ID3D11ShaderResourceView *srv; + ID3D11RenderTargetView *rtv; + ID3D11UnorderedAccessView *uav; + ID3D11SamplerState *sampler; +}; + +struct d3d_buf { + ID3D11Buffer *buf; + ID3D11Buffer *staging; + ID3D11UnorderedAccessView *uav; + void *data; // Data for mapped staging texture +}; + +struct d3d_rpass { + ID3D11PixelShader *ps; + ID3D11VertexShader *vs; + ID3D11ComputeShader *cs; + ID3D11InputLayout *layout; + ID3D11BlendState *bstate; +}; + +struct d3d_timer { + ID3D11Query *ts_start; + ID3D11Query *ts_end; + ID3D11Query *disjoint; + uint64_t result; // Latches the result from the previous use of the timer +}; + +struct d3d_fmt { + const char *name; + int components; + int bytes; + int bits[4]; + DXGI_FORMAT fmt; + enum ra_ctype ctype; + bool unordered; +}; + +static const char clear_vs[] = "\ +float4 main(float2 pos : POSITION) : SV_Position\n\ +{\n\ + return float4(pos, 0.0, 1.0);\n\ +}\n\ +"; + +static const char clear_ps[] = "\ +cbuffer ps_cbuf : register(b0) {\n\ + float4 color : packoffset(c0);\n\ +}\n\ +\n\ +float4 main(float4 pos : SV_Position) : SV_Target\n\ +{\n\ + return color;\n\ +}\n\ +"; + +struct blit_vert { + float x, y, u, v; +}; + +static const char blit_vs[] = "\ +void main(float2 pos : POSITION, float2 coord : TEXCOORD0,\n\ + out float4 out_pos : SV_Position, out float2 out_coord : TEXCOORD0)\n\ +{\n\ + out_pos = float4(pos, 0.0, 1.0);\n\ + out_coord = coord;\n\ +}\n\ +"; + +static const char blit_float_ps[] = "\ +Texture2D<float4> tex : register(t0);\n\ +SamplerState samp : register(s0);\n\ +\n\ +float4 main(float4 pos : SV_Position, float2 coord : TEXCOORD0) : SV_Target\n\ +{\n\ + return tex.Sample(samp, coord);\n\ +}\n\ +"; + +#define DXFMT(f, t) .fmt = DXGI_FORMAT_##f##_##t, .ctype = RA_CTYPE_##t +static struct d3d_fmt formats[] = { + { "r8", 1, 1, { 8}, DXFMT(R8, UNORM) }, + { "rg8", 2, 2, { 8, 8}, DXFMT(R8G8, UNORM) }, + { "rgba8", 4, 4, { 8, 8, 8, 8}, DXFMT(R8G8B8A8, UNORM) }, + { "r16", 1, 2, {16}, DXFMT(R16, UNORM) }, + { "rg16", 2, 4, {16, 16}, DXFMT(R16G16, UNORM) }, + { "rgba16", 4, 8, {16, 16, 16, 16}, DXFMT(R16G16B16A16, UNORM) }, + + { "r32ui", 1, 4, {32}, DXFMT(R32, UINT) }, + { "rg32ui", 2, 8, {32, 32}, DXFMT(R32G32, UINT) }, + { "rgb32ui", 3, 12, {32, 32, 32}, DXFMT(R32G32B32, UINT) }, + { "rgba32ui", 4, 16, {32, 32, 32, 32}, DXFMT(R32G32B32A32, UINT) }, + + { "r16hf", 1, 2, {16}, DXFMT(R16, FLOAT) }, + { "rg16hf", 2, 4, {16, 16}, DXFMT(R16G16, FLOAT) }, + { "rgba16hf", 4, 8, {16, 16, 16, 16}, DXFMT(R16G16B16A16, FLOAT) }, + { "r32f", 1, 4, {32}, DXFMT(R32, FLOAT) }, + { "rg32f", 2, 8, {32, 32}, DXFMT(R32G32, FLOAT) }, + { "rgb32f", 3, 12, {32, 32, 32}, DXFMT(R32G32B32, FLOAT) }, + { "rgba32f", 4, 16, {32, 32, 32, 32}, DXFMT(R32G32B32A32, FLOAT) }, + + { "rgb10_a2", 4, 4, {10, 10, 10, 2}, DXFMT(R10G10B10A2, UNORM) }, + { "bgra8", 4, 4, { 8, 8, 8, 8}, DXFMT(B8G8R8A8, UNORM), .unordered = false }, +}; + +static DXGI_FORMAT fmt_to_dxgi(const struct ra_format *fmt) +{ + struct d3d_fmt *d3d = fmt->priv; + return d3d->fmt; +} + +static void setup_formats(struct ra *ra) +{ + // All formats must be usable as a 2D texture + static const UINT sup_basic = D3D11_FORMAT_SUPPORT_TEXTURE2D; + // SHADER_SAMPLE indicates support for linear sampling, point always works + static const UINT sup_filter = D3D11_FORMAT_SUPPORT_SHADER_SAMPLE; + // RA requires renderable surfaces to be blendable as well + static const UINT sup_render = D3D11_FORMAT_SUPPORT_RENDER_TARGET | + D3D11_FORMAT_SUPPORT_BLENDABLE; + + struct ra_d3d11 *p = ra->priv; + HRESULT hr; + + for (int i = 0; i < MP_ARRAY_SIZE(formats); i++) { + struct d3d_fmt *d3dfmt = &formats[i]; + UINT support = 0; + hr = ID3D11Device_CheckFormatSupport(p->dev, d3dfmt->fmt, &support); + if (FAILED(hr)) + continue; + if ((support & sup_basic) != sup_basic) + continue; + + struct ra_format *fmt = talloc_zero(ra, struct ra_format); + *fmt = (struct ra_format) { + .name = d3dfmt->name, + .priv = d3dfmt, + .ctype = d3dfmt->ctype, + .ordered = !d3dfmt->unordered, + .num_components = d3dfmt->components, + .pixel_size = d3dfmt->bytes, + .linear_filter = (support & sup_filter) == sup_filter, + .renderable = (support & sup_render) == sup_render, + }; + + if (support & D3D11_FORMAT_SUPPORT_TEXTURE1D) + ra->caps |= RA_CAP_TEX_1D; + + for (int j = 0; j < d3dfmt->components; j++) + fmt->component_size[j] = fmt->component_depth[j] = d3dfmt->bits[j]; + + fmt->glsl_format = ra_fmt_glsl_format(fmt); + + MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, fmt); + } +} + +static bool tex_init(struct ra *ra, struct ra_tex *tex) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_tex *tex_p = tex->priv; + struct ra_tex_params *params = &tex->params; + HRESULT hr; + + // A SRV is required for renderpasses and blitting, since blitting can use + // a renderpass internally + if (params->render_src || params->blit_src) { + // Always specify the SRV format for simplicity. This will match the + // texture format for textures created with tex_create, but it can be + // different for wrapped planar video textures. + D3D11_SHADER_RESOURCE_VIEW_DESC srvdesc = { + .Format = fmt_to_dxgi(params->format), + }; + switch (params->dimensions) { + case 1: + srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D; + srvdesc.Texture1D.MipLevels = 1; + break; + case 2: + srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + srvdesc.Texture2D.MipLevels = 1; + break; + case 3: + srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D; + srvdesc.Texture3D.MipLevels = 1; + break; + } + hr = ID3D11Device_CreateShaderResourceView(p->dev, tex_p->res, &srvdesc, + &tex_p->srv); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create SRV: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + } + + // Samplers are required for renderpasses, but not blitting, since the blit + // code uses its own point sampler + if (params->render_src) { + D3D11_SAMPLER_DESC sdesc = { + .AddressU = D3D11_TEXTURE_ADDRESS_CLAMP, + .AddressV = D3D11_TEXTURE_ADDRESS_CLAMP, + .AddressW = D3D11_TEXTURE_ADDRESS_CLAMP, + .ComparisonFunc = D3D11_COMPARISON_NEVER, + .MinLOD = 0, + .MaxLOD = D3D11_FLOAT32_MAX, + .MaxAnisotropy = 1, + }; + if (params->src_linear) + sdesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + if (params->src_repeat) { + sdesc.AddressU = sdesc.AddressV = sdesc.AddressW = + D3D11_TEXTURE_ADDRESS_WRAP; + } + // The runtime pools sampler state objects internally, so we don't have + // to worry about resource usage when creating one for every ra_tex + hr = ID3D11Device_CreateSamplerState(p->dev, &sdesc, &tex_p->sampler); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create sampler: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + } + + // Like SRVs, an RTV is required for renderpass output and blitting + if (params->render_dst || params->blit_dst) { + hr = ID3D11Device_CreateRenderTargetView(p->dev, tex_p->res, NULL, + &tex_p->rtv); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create RTV: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + } + + if (p->fl >= D3D_FEATURE_LEVEL_11_0 && params->storage_dst) { + hr = ID3D11Device_CreateUnorderedAccessView(p->dev, tex_p->res, NULL, + &tex_p->uav); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create UAV: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + } + + return true; +error: + return false; +} + +static void tex_destroy(struct ra *ra, struct ra_tex *tex) +{ + if (!tex) + return; + struct d3d_tex *tex_p = tex->priv; + + SAFE_RELEASE(tex_p->srv); + SAFE_RELEASE(tex_p->rtv); + SAFE_RELEASE(tex_p->uav); + SAFE_RELEASE(tex_p->sampler); + SAFE_RELEASE(tex_p->res); + talloc_free(tex); +} + +static struct ra_tex *tex_create(struct ra *ra, + const struct ra_tex_params *params) +{ + struct ra_d3d11 *p = ra->priv; + HRESULT hr; + + struct ra_tex *tex = talloc_zero(NULL, struct ra_tex); + tex->params = *params; + tex->params.initial_data = NULL; + + struct d3d_tex *tex_p = tex->priv = talloc_zero(tex, struct d3d_tex); + DXGI_FORMAT fmt = fmt_to_dxgi(params->format); + + D3D11_SUBRESOURCE_DATA *pdata = NULL; + if (params->initial_data) { + pdata = &(D3D11_SUBRESOURCE_DATA) { + .pSysMem = params->initial_data, + .SysMemPitch = params->w * params->format->pixel_size, + }; + if (params->dimensions >= 3) + pdata->SysMemSlicePitch = pdata->SysMemPitch * params->h; + } + + D3D11_USAGE usage = D3D11_USAGE_DEFAULT; + D3D11_BIND_FLAG bind_flags = 0; + + if (params->render_src || params->blit_src) + bind_flags |= D3D11_BIND_SHADER_RESOURCE; + if (params->render_dst || params->blit_dst) + bind_flags |= D3D11_BIND_RENDER_TARGET; + if (p->fl >= D3D_FEATURE_LEVEL_11_0 && params->storage_dst) + bind_flags |= D3D11_BIND_UNORDERED_ACCESS; + + // Apparently IMMUTABLE textures are efficient, so try to infer whether we + // can use one + if (params->initial_data && !params->render_dst && !params->storage_dst && + |