summaryrefslogtreecommitdiffstats
path: root/video
diff options
context:
space:
mode:
authorJames Ross-Gowan <rossy@jrg.systems>2017-09-07 20:18:06 +1000
committerJames Ross-Gowan <rossy@jrg.systems>2017-11-07 20:27:13 +1100
commit68eac1a1e7ac931576a8b59dd159a7961189ca48 (patch)
treeef25b97dd381bc5429138f256ae113339a9f353a /video
parent8020a62953926cd6d672e9151290bd4d65e8ee08 (diff)
downloadmpv-68eac1a1e7ac931576a8b59dd159a7961189ca48.tar.bz2
mpv-68eac1a1e7ac931576a8b59dd159a7961189ca48.tar.xz
vo_gpu: d3d11: initial implementation
This is a new RA/vo_gpu backend that uses Direct3D 11. The GLSL generated by vo_gpu is cross-compiled to HLSL with SPIRV-Cross. What works: - All of mpv's internal shaders should work, including compute shaders. - Some external shaders have been tested and work, including RAVU and adaptive-sharpen. - Non-dumb mode works, even on very old hardware. Most features work at feature level 9_3 and all features work at feature level 10_0. Some features also work at feature level 9_1 and 9_2, but without high-bit- depth FBOs, it's not very useful. (Hardware this old is probably not fast enough for advanced features anyway.) Note: This is more compatible than ANGLE, which requires 9_3 to work at all (GLES 2.0,) and 10_1 for non-dumb-mode (GLES 3.0.) - Hardware decoding with D3D11VA, including decoding of 10-bit formats without truncation to 8-bit. What doesn't work / can be improved: - PBO upload and direct rendering does not work yet. Direct rendering requires persistent-mapped PBOs because the decoder needs to be able to read data from images that have already been decoded and uploaded. Unfortunately, it seems like persistent-mapped PBOs are fundamentally incompatible with D3D11, which requires all resources to use driver- managed memory and requires memory to be unmapped (and hence pointers to be invalidated) when a resource is used in a draw or copy operation. However it might be possible to use D3D11's limited multithreading capabilities to emulate some features of PBOs, like asynchronous texture uploading. - The blit() and clear() operations don't have equivalents in the D3D11 API that handle all cases, so in most cases, they have to be emulated with a shader. This is currently done inside ra_d3d11, but ideally it would be done in generic code, so it can take advantage of mpv's shader generation utilities. - SPIRV-Cross is used through a NIH C-compatible wrapper library, since it does not expose a C interface itself. The library is available here: https://github.com/rossy/crossc - The D3D11 context could be made to support more modern DXGI features in future. For example, it should be possible to add support for high-bit-depth and HDR output with DXGI 1.5/1.6.
Diffstat (limited to 'video')
-rw-r--r--video/out/d3d11/context.c235
-rw-r--r--video/out/d3d11/hwdec_d3d11va.c195
-rw-r--r--video/out/d3d11/ra_d3d11.c2235
-rw-r--r--video/out/d3d11/ra_d3d11.h34
-rw-r--r--video/out/gpu/context.c7
-rw-r--r--video/out/gpu/d3d11_helpers.c (renamed from video/out/opengl/d3d11_helpers.c)26
-rw-r--r--video/out/gpu/d3d11_helpers.h (renamed from video/out/opengl/d3d11_helpers.h)6
-rw-r--r--video/out/gpu/hwdec.c4
-rw-r--r--video/out/opengl/context_angle.c2
9 files changed, 2739 insertions, 5 deletions
diff --git a/video/out/d3d11/context.c b/video/out/d3d11/context.c
new file mode 100644
index 0000000000..018fd99934
--- /dev/null
+++ b/video/out/d3d11/context.c
@@ -0,0 +1,235 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "common/msg.h"
+#include "options/m_config.h"
+#include "osdep/windows_utils.h"
+
+#include "video/out/gpu/context.h"
+#include "video/out/gpu/d3d11_helpers.h"
+#include "video/out/gpu/spirv.h"
+#include "video/out/w32_common.h"
+#include "ra_d3d11.h"
+
+struct d3d11_opts {
+ int feature_level;
+ int warp;
+ int flip;
+ int sync_interval;
+};
+
+#define OPT_BASE_STRUCT struct d3d11_opts
+const struct m_sub_options d3d11_conf = {
+ .opts = (const struct m_option[]) {
+ OPT_CHOICE("d3d11-warp", warp, 0,
+ ({"auto", -1},
+ {"no", 0},
+ {"yes", 1})),
+ OPT_CHOICE("d3d11-feature-level", feature_level, 0,
+ ({"12_1", D3D_FEATURE_LEVEL_12_1},
+ {"12_0", D3D_FEATURE_LEVEL_12_0},
+ {"11_1", D3D_FEATURE_LEVEL_11_1},
+ {"11_0", D3D_FEATURE_LEVEL_11_0},
+ {"10_1", D3D_FEATURE_LEVEL_10_1},
+ {"10_0", D3D_FEATURE_LEVEL_10_0},
+ {"9_3", D3D_FEATURE_LEVEL_9_3},
+ {"9_2", D3D_FEATURE_LEVEL_9_2},
+ {"9_1", D3D_FEATURE_LEVEL_9_1})),
+ OPT_FLAG("d3d11-flip", flip, 0),
+ OPT_INTRANGE("d3d11-sync-interval", sync_interval, 0, 0, 4),
+ {0}
+ },
+ .defaults = &(const struct d3d11_opts) {
+ .feature_level = D3D_FEATURE_LEVEL_12_1,
+ .warp = -1,
+ .flip = 1,
+ .sync_interval = 1,
+ },
+ .size = sizeof(struct d3d11_opts)
+};
+
+struct priv {
+ struct d3d11_opts *opts;
+
+ struct ra_tex *backbuffer;
+ ID3D11Device *device;
+ IDXGISwapChain *swapchain;
+};
+
+static struct ra_tex *get_backbuffer(struct ra_ctx *ctx)
+{
+ struct priv *p = ctx->priv;
+ ID3D11Texture2D *backbuffer = NULL;
+ struct ra_tex *tex = NULL;
+ HRESULT hr;
+
+ hr = IDXGISwapChain_GetBuffer(p->swapchain, 0, &IID_ID3D11Texture2D,
+ (void**)&backbuffer);
+ if (FAILED(hr)) {
+ MP_ERR(ctx, "Couldn't get swapchain image\n");
+ goto done;
+ }
+
+ tex = ra_d3d11_wrap_tex(ctx->ra, (ID3D11Resource *)backbuffer);
+done:
+ SAFE_RELEASE(backbuffer);
+ return tex;
+}
+
+static bool resize(struct ra_ctx *ctx)
+{
+ struct priv *p = ctx->priv;
+ HRESULT hr;
+
+ ra_tex_free(ctx->ra, &p->backbuffer);
+
+ hr = IDXGISwapChain_ResizeBuffers(p->swapchain, 0, ctx->vo->dwidth,
+ ctx->vo->dheight, DXGI_FORMAT_UNKNOWN, 0);
+ if (FAILED(hr)) {
+ MP_FATAL(ctx, "Couldn't resize swapchain: %s\n", mp_HRESULT_to_str(hr));
+ return false;
+ }
+
+ p->backbuffer = get_backbuffer(ctx);
+
+ return true;
+}
+
+static bool d3d11_reconfig(struct ra_ctx *ctx)
+{
+ vo_w32_config(ctx->vo);
+ return resize(ctx);
+}
+
+static int d3d11_color_depth(struct ra_swapchain *sw)
+{
+ return 8;
+}
+
+static bool d3d11_start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo)
+{
+ struct priv *p = sw->priv;
+ *out_fbo = (struct ra_fbo) {
+ .tex = p->backbuffer,
+ .flip = false,
+ };
+ return true;
+}
+
+static bool d3d11_submit_frame(struct ra_swapchain *sw,
+ const struct vo_frame *frame)
+{
+ ra_d3d11_flush(sw->ctx->ra);
+ return true;
+}
+
+static void d3d11_swap_buffers(struct ra_swapchain *sw)
+{
+ struct priv *p = sw->priv;
+ IDXGISwapChain_Present(p->swapchain, p->opts->sync_interval, 0);
+}
+
+static int d3d11_control(struct ra_ctx *ctx, int *events, int request, void *arg)
+{
+ int ret = vo_w32_control(ctx->vo, events, request, arg);
+ if (*events & VO_EVENT_RESIZE) {
+ if (!resize(ctx))
+ return VO_ERROR;
+ }
+ return ret;
+}
+
+static void d3d11_uninit(struct ra_ctx *ctx)
+{
+ struct priv *p = ctx->priv;
+
+ ra_tex_free(ctx->ra, &p->backbuffer);
+ SAFE_RELEASE(p->swapchain);
+ vo_w32_uninit(ctx->vo);
+ SAFE_RELEASE(p->device);
+
+ // Destory the RA last to prevent objects we hold from showing up in D3D's
+ // leak checker
+ ctx->ra->fns->destroy(ctx->ra);
+}
+
+static const struct ra_swapchain_fns d3d11_swapchain = {
+ .color_depth = d3d11_color_depth,
+ .start_frame = d3d11_start_frame,
+ .submit_frame = d3d11_submit_frame,
+ .swap_buffers = d3d11_swap_buffers,
+};
+
+static bool d3d11_init(struct ra_ctx *ctx)
+{
+ struct priv *p = ctx->priv = talloc_zero(ctx, struct priv);
+ p->opts = mp_get_config_group(ctx, ctx->global, &d3d11_conf);
+
+ struct ra_swapchain *sw = ctx->swapchain = talloc_zero(ctx, struct ra_swapchain);
+ sw->priv = p;
+ sw->ctx = ctx;
+ sw->fns = &d3d11_swapchain;
+
+ struct d3d11_device_opts dopts = {
+ .debug = ctx->opts.debug,
+ .allow_warp = p->opts->warp != 0,
+ .force_warp = p->opts->warp == 1,
+ .max_feature_level = p->opts->feature_level,
+ .max_frame_latency = ctx->opts.swapchain_depth,
+ };
+ if (!mp_d3d11_create_present_device(ctx->log, &dopts, &p->device))
+ goto error;
+
+ if (!spirv_compiler_init(ctx))
+ goto error;
+ ctx->ra = ra_d3d11_create(p->device, ctx->log, ctx->spirv);
+ if (!ctx->ra)
+ goto error;
+
+ if (!vo_w32_init(ctx->vo))
+ goto error;
+
+ struct d3d11_swapchain_opts scopts = {
+ .window = vo_w32_hwnd(ctx->vo),
+ .width = ctx->vo->dwidth,
+ .height = ctx->vo->dheight,
+ .flip = p->opts->flip,
+ // Add one frame for the backbuffer and one frame of "slack" to reduce
+ // contention with the window manager when acquiring the backbuffer
+ .length = ctx->opts.swapchain_depth + 2,
+ .usage = DXGI_USAGE_RENDER_TARGET_OUTPUT,
+ };
+ if (!mp_d3d11_create_swapchain(p->device, ctx->log, &scopts, &p->swapchain))
+ goto error;
+
+ p->backbuffer = get_backbuffer(ctx);
+
+ return true;
+
+error:
+ d3d11_uninit(ctx);
+ return false;
+}
+
+const struct ra_ctx_fns ra_ctx_d3d11 = {
+ .type = "d3d11",
+ .name = "d3d11",
+ .reconfig = d3d11_reconfig,
+ .control = d3d11_control,
+ .init = d3d11_init,
+ .uninit = d3d11_uninit,
+};
diff --git a/video/out/d3d11/hwdec_d3d11va.c b/video/out/d3d11/hwdec_d3d11va.c
new file mode 100644
index 0000000000..f179298ac1
--- /dev/null
+++ b/video/out/d3d11/hwdec_d3d11va.c
@@ -0,0 +1,195 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <windows.h>
+#include <d3d11.h>
+#include <d3d11_1.h>
+
+#include "config.h"
+
+#include "common/common.h"
+#include "osdep/windows_utils.h"
+#include "video/hwdec.h"
+#include "video/decode/d3d.h"
+#include "video/out/d3d11/ra_d3d11.h"
+#include "video/out/gpu/hwdec.h"
+
+struct priv_owner {
+ struct mp_hwdec_ctx hwctx;
+ ID3D11Device *device;
+ ID3D11Device1 *device1;
+};
+
+struct priv {
+ ID3D11DeviceContext1 *ctx;
+ ID3D11Texture2D *copy_tex;
+};
+
+static void uninit(struct ra_hwdec *hw)
+{
+ struct priv_owner *p = hw->priv;
+ if (p->hwctx.ctx)
+ hwdec_devices_remove(hw->devs, &p->hwctx);
+ SAFE_RELEASE(p->device);
+ SAFE_RELEASE(p->device1);
+}
+
+static int init(struct ra_hwdec *hw)
+{
+ struct priv_owner *p = hw->priv;
+ HRESULT hr;
+
+ if (!ra_is_d3d11(hw->ra))
+ return -1;
+ p->device = ra_d3d11_get_device(hw->ra);
+ if (!p->device)
+ return -1;
+
+ // D3D11VA requires Direct3D 11.1, so this should always succeed
+ hr = ID3D11Device_QueryInterface(p->device, &IID_ID3D11Device1,
+ (void**)&p->device1);
+ if (FAILED(hr)) {
+ MP_ERR(hw, "Failed to get D3D11.1 interface: %s\n",
+ mp_HRESULT_to_str(hr));
+ return -1;
+ }
+
+ ID3D10Multithread *multithread;
+ hr = ID3D11Device_QueryInterface(p->device, &IID_ID3D10Multithread,
+ (void **)&multithread);
+ if (FAILED(hr)) {
+ MP_ERR(hw, "Failed to get Multithread interface: %s\n",
+ mp_HRESULT_to_str(hr));
+ return -1;
+ }
+ ID3D10Multithread_SetMultithreadProtected(multithread, TRUE);
+ ID3D10Multithread_Release(multithread);
+
+ p->hwctx = (struct mp_hwdec_ctx){
+ .type = HWDEC_D3D11VA,
+ .driver_name = hw->driver->name,
+ .ctx = p->device,
+ .av_device_ref = d3d11_wrap_device_ref(p->device),
+ };
+ hwdec_devices_add(hw->devs, &p->hwctx);
+ return 0;
+}
+
+static void mapper_uninit(struct ra_hwdec_mapper *mapper)
+{
+ struct priv *p = mapper->priv;
+ for (int i = 0; i < 4; i++)
+ ra_tex_free(mapper->ra, &mapper->tex[i]);
+ SAFE_RELEASE(p->copy_tex);
+ SAFE_RELEASE(p->ctx);
+}
+
+static int mapper_init(struct ra_hwdec_mapper *mapper)
+{
+ struct priv_owner *o = mapper->owner->priv;
+ struct priv *p = mapper->priv;
+ HRESULT hr;
+
+ mapper->dst_params = mapper->src_params;
+ mapper->dst_params.imgfmt = mapper->src_params.hw_subfmt;
+ mapper->dst_params.hw_subfmt = 0;
+
+ struct ra_imgfmt_desc desc = {0};
+ struct mp_image layout = {0};
+
+ if (!ra_get_imgfmt_desc(mapper->ra, mapper->dst_params.imgfmt, &desc))
+ return -1;
+
+ mp_image_set_params(&layout, &mapper->dst_params);
+
+ DXGI_FORMAT copy_fmt;
+ switch (mapper->dst_params.imgfmt) {
+ case IMGFMT_NV12: copy_fmt = DXGI_FORMAT_NV12; break;
+ case IMGFMT_P010: copy_fmt = DXGI_FORMAT_P010; break;
+ default: return -1;
+ }
+
+ // We copy decoder images to an intermediate texture. This is slower than
+ // the zero-copy path, but according to MSDN, decoder textures should not
+ // be bound to SRVs, so it is technically correct, and it works around some
+ // driver "bugs" that can happen with the zero-copy path. It also allows
+ // samplers to work correctly when the decoder image includes padding.
+ D3D11_TEXTURE2D_DESC copy_desc = {
+ .Width = mapper->dst_params.w,
+ .Height = mapper->dst_params.h,
+ .MipLevels = 1,
+ .ArraySize = 1,
+ .SampleDesc.Count = 1,
+ .Format = copy_fmt,
+ .BindFlags = D3D11_BIND_SHADER_RESOURCE,
+ };
+ hr = ID3D11Device_CreateTexture2D(o->device, &copy_desc, NULL, &p->copy_tex);
+ if (FAILED(hr)) {
+ MP_FATAL(mapper, "Could not create shader resource texture\n");
+ return -1;
+ }
+
+ for (int i = 0; i < desc.num_planes; i++) {
+ mapper->tex[i] = ra_d3d11_wrap_tex_video(mapper->ra, p->copy_tex,
+ mp_image_plane_w(&layout, i),
+ mp_image_plane_h(&layout, i),
+ desc.planes[i]);
+ if (!mapper->tex[i]) {
+ MP_FATAL(mapper, "Could not create RA texture view\n");
+ return -1;
+ }
+ }
+
+ ID3D11Device1_GetImmediateContext1(o->device1, &p->ctx);
+
+ return 0;
+}
+
+static int mapper_map(struct ra_hwdec_mapper *mapper)
+{
+ struct priv *p = mapper->priv;
+ ID3D11Texture2D *tex = (void *)mapper->src->planes[0];
+ int subresource = (intptr_t)mapper->src->planes[1];
+
+ ID3D11DeviceContext1_CopySubresourceRegion1(p->ctx,
+ (ID3D11Resource *)p->copy_tex, 0, 0, 0, 0,
+ (ID3D11Resource *)tex, subresource, (&(D3D11_BOX) {
+ .left = 0,
+ .top = 0,
+ .front = 0,
+ .right = mapper->dst_params.w,
+ .bottom = mapper->dst_params.h,
+ .back = 1,
+ }), D3D11_COPY_DISCARD);
+
+ return 0;
+}
+
+const struct ra_hwdec_driver ra_hwdec_d3d11va = {
+ .name = "d3d11va",
+ .priv_size = sizeof(struct priv_owner),
+ .api = HWDEC_D3D11VA,
+ .imgfmts = {IMGFMT_D3D11VA, IMGFMT_D3D11NV12, 0},
+ .init = init,
+ .uninit = uninit,
+ .mapper = &(const struct ra_hwdec_mapper_driver){
+ .priv_size = sizeof(struct priv),
+ .init = mapper_init,
+ .uninit = mapper_uninit,
+ .map = mapper_map,
+ },
+};
diff --git a/video/out/d3d11/ra_d3d11.c b/video/out/d3d11/ra_d3d11.c
new file mode 100644
index 0000000000..372b65d49f
--- /dev/null
+++ b/video/out/d3d11/ra_d3d11.c
@@ -0,0 +1,2235 @@
+#include <windows.h>
+#include <versionhelpers.h>
+#include <d3d11_1.h>
+#include <d3d11sdklayers.h>
+#include <dxgi1_2.h>
+#include <d3dcompiler.h>
+#include <crossc.h>
+
+#include "common/msg.h"
+#include "osdep/io.h"
+#include "osdep/subprocess.h"
+#include "osdep/windows_utils.h"
+#include "video/out/gpu/spirv.h"
+#include "video/out/gpu/utils.h"
+
+#include "ra_d3d11.h"
+
+#ifndef D3D11_1_UAV_SLOT_COUNT
+#define D3D11_1_UAV_SLOT_COUNT (64)
+#endif
+
+struct ra_d3d11 {
+ struct spirv_compiler *spirv;
+
+ ID3D11Device *dev;
+ ID3D11Device1 *dev1;
+ ID3D11DeviceContext *ctx;
+ ID3D11DeviceContext1 *ctx1;
+ pD3DCompile D3DCompile;
+
+ // Debug interfaces (--gpu-debug)
+ ID3D11Debug *debug;
+ ID3D11InfoQueue *iqueue;
+
+ // Device capabilities
+ D3D_FEATURE_LEVEL fl;
+ bool has_clear_view;
+ int max_uavs;
+
+ // Streaming dynamic vertex buffer, which is used for all renderpasses
+ ID3D11Buffer *vbuf;
+ size_t vbuf_size;
+ size_t vbuf_used;
+
+ // clear() renderpass resources (only used when has_clear_view is false)
+ ID3D11PixelShader *clear_ps;
+ ID3D11VertexShader *clear_vs;
+ ID3D11InputLayout *clear_layout;
+ ID3D11Buffer *clear_vbuf;
+ ID3D11Buffer *clear_cbuf;
+
+ // blit() renderpass resources
+ ID3D11PixelShader *blit_float_ps;
+ ID3D11VertexShader *blit_vs;
+ ID3D11InputLayout *blit_layout;
+ ID3D11Buffer *blit_vbuf;
+ ID3D11SamplerState *blit_sampler;
+};
+
+struct d3d_tex {
+ // res mirrors one of tex1d, tex2d or tex3d for convenience. It does not
+ // hold an additional reference to the texture object.
+ ID3D11Resource *res;
+
+ ID3D11Texture1D *tex1d;
+ ID3D11Texture2D *tex2d;
+ ID3D11Texture3D *tex3d;
+
+ ID3D11ShaderResourceView *srv;
+ ID3D11RenderTargetView *rtv;
+ ID3D11UnorderedAccessView *uav;
+ ID3D11SamplerState *sampler;
+};
+
+struct d3d_buf {
+ ID3D11Buffer *buf;
+ ID3D11Buffer *staging;
+ ID3D11UnorderedAccessView *uav;
+ void *data; // Data for mapped staging texture
+};
+
+struct d3d_rpass {
+ ID3D11PixelShader *ps;
+ ID3D11VertexShader *vs;
+ ID3D11ComputeShader *cs;
+ ID3D11InputLayout *layout;
+ ID3D11BlendState *bstate;
+};
+
+struct d3d_timer {
+ ID3D11Query *ts_start;
+ ID3D11Query *ts_end;
+ ID3D11Query *disjoint;
+ uint64_t result; // Latches the result from the previous use of the timer
+};
+
+struct d3d_fmt {
+ const char *name;
+ int components;
+ int bytes;
+ int bits[4];
+ DXGI_FORMAT fmt;
+ enum ra_ctype ctype;
+ bool unordered;
+};
+
+static const char clear_vs[] = "\
+float4 main(float2 pos : POSITION) : SV_Position\n\
+{\n\
+ return float4(pos, 0.0, 1.0);\n\
+}\n\
+";
+
+static const char clear_ps[] = "\
+cbuffer ps_cbuf : register(b0) {\n\
+ float4 color : packoffset(c0);\n\
+}\n\
+\n\
+float4 main(float4 pos : SV_Position) : SV_Target\n\
+{\n\
+ return color;\n\
+}\n\
+";
+
+struct blit_vert {
+ float x, y, u, v;
+};
+
+static const char blit_vs[] = "\
+void main(float2 pos : POSITION, float2 coord : TEXCOORD0,\n\
+ out float4 out_pos : SV_Position, out float2 out_coord : TEXCOORD0)\n\
+{\n\
+ out_pos = float4(pos, 0.0, 1.0);\n\
+ out_coord = coord;\n\
+}\n\
+";
+
+static const char blit_float_ps[] = "\
+Texture2D<float4> tex : register(t0);\n\
+SamplerState samp : register(s0);\n\
+\n\
+float4 main(float4 pos : SV_Position, float2 coord : TEXCOORD0) : SV_Target\n\
+{\n\
+ return tex.Sample(samp, coord);\n\
+}\n\
+";
+
+#define DXFMT(f, t) .fmt = DXGI_FORMAT_##f##_##t, .ctype = RA_CTYPE_##t
+static struct d3d_fmt formats[] = {
+ { "r8", 1, 1, { 8}, DXFMT(R8, UNORM) },
+ { "rg8", 2, 2, { 8, 8}, DXFMT(R8G8, UNORM) },
+ { "rgba8", 4, 4, { 8, 8, 8, 8}, DXFMT(R8G8B8A8, UNORM) },
+ { "r16", 1, 2, {16}, DXFMT(R16, UNORM) },
+ { "rg16", 2, 4, {16, 16}, DXFMT(R16G16, UNORM) },
+ { "rgba16", 4, 8, {16, 16, 16, 16}, DXFMT(R16G16B16A16, UNORM) },
+
+ { "r32ui", 1, 4, {32}, DXFMT(R32, UINT) },
+ { "rg32ui", 2, 8, {32, 32}, DXFMT(R32G32, UINT) },
+ { "rgb32ui", 3, 12, {32, 32, 32}, DXFMT(R32G32B32, UINT) },
+ { "rgba32ui", 4, 16, {32, 32, 32, 32}, DXFMT(R32G32B32A32, UINT) },
+
+ { "r16hf", 1, 2, {16}, DXFMT(R16, FLOAT) },
+ { "rg16hf", 2, 4, {16, 16}, DXFMT(R16G16, FLOAT) },
+ { "rgba16hf", 4, 8, {16, 16, 16, 16}, DXFMT(R16G16B16A16, FLOAT) },
+ { "r32f", 1, 4, {32}, DXFMT(R32, FLOAT) },
+ { "rg32f", 2, 8, {32, 32}, DXFMT(R32G32, FLOAT) },
+ { "rgb32f", 3, 12, {32, 32, 32}, DXFMT(R32G32B32, FLOAT) },
+ { "rgba32f", 4, 16, {32, 32, 32, 32}, DXFMT(R32G32B32A32, FLOAT) },
+
+ { "rgb10_a2", 4, 4, {10, 10, 10, 2}, DXFMT(R10G10B10A2, UNORM) },
+ { "bgra8", 4, 4, { 8, 8, 8, 8}, DXFMT(B8G8R8A8, UNORM), .unordered = false },
+};
+
+static DXGI_FORMAT fmt_to_dxgi(const struct ra_format *fmt)
+{
+ struct d3d_fmt *d3d = fmt->priv;
+ return d3d->fmt;
+}
+
+static void setup_formats(struct ra *ra)
+{
+ // All formats must be usable as a 2D texture
+ static const UINT sup_basic = D3D11_FORMAT_SUPPORT_TEXTURE2D;
+ // SHADER_SAMPLE indicates support for linear sampling, point always works
+ static const UINT sup_filter = D3D11_FORMAT_SUPPORT_SHADER_SAMPLE;
+ // RA requires renderable surfaces to be blendable as well
+ static const UINT sup_render = D3D11_FORMAT_SUPPORT_RENDER_TARGET |
+ D3D11_FORMAT_SUPPORT_BLENDABLE;
+
+ struct ra_d3d11 *p = ra->priv;
+ HRESULT hr;
+
+ for (int i = 0; i < MP_ARRAY_SIZE(formats); i++) {
+ struct d3d_fmt *d3dfmt = &formats[i];
+ UINT support = 0;
+ hr = ID3D11Device_CheckFormatSupport(p->dev, d3dfmt->fmt, &support);
+ if (FAILED(hr))
+ continue;
+ if ((support & sup_basic) != sup_basic)
+ continue;
+
+ struct ra_format *fmt = talloc_zero(ra, struct ra_format);
+ *fmt = (struct ra_format) {
+ .name = d3dfmt->name,
+ .priv = d3dfmt,
+ .ctype = d3dfmt->ctype,
+ .ordered = !d3dfmt->unordered,
+ .num_components = d3dfmt->components,
+ .pixel_size = d3dfmt->bytes,
+ .linear_filter = (support & sup_filter) == sup_filter,
+ .renderable = (support & sup_render) == sup_render,
+ };
+
+ if (support & D3D11_FORMAT_SUPPORT_TEXTURE1D)
+ ra->caps |= RA_CAP_TEX_1D;
+
+ for (int j = 0; j < d3dfmt->components; j++)
+ fmt->component_size[j] = fmt->component_depth[j] = d3dfmt->bits[j];
+
+ fmt->glsl_format = ra_fmt_glsl_format(fmt);
+
+ MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, fmt);
+ }
+}
+
+static bool tex_init(struct ra *ra, struct ra_tex *tex)
+{
+ struct ra_d3d11 *p = ra->priv;
+ struct d3d_tex *tex_p = tex->priv;
+ struct ra_tex_params *params = &tex->params;
+ HRESULT hr;
+
+ // A SRV is required for renderpasses and blitting, since blitting can use
+ // a renderpass internally
+ if (params->render_src || params->blit_src) {
+ // Always specify the SRV format for simplicity. This will match the
+ // texture format for textures created with tex_create, but it can be
+ // different for wrapped planar video textures.
+ D3D11_SHADER_RESOURCE_VIEW_DESC srvdesc = {
+ .Format = fmt_to_dxgi(params->format),
+ };
+ switch (params->dimensions) {
+ case 1:
+ srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D;
+ srvdesc.Texture1D.MipLevels = 1;
+ break;
+ case 2:
+ srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
+ srvdesc.Texture2D.MipLevels = 1;
+ break;
+ case 3:
+ srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D;
+ srvdesc.Texture3D.MipLevels = 1;
+ break;
+ }
+ hr = ID3D11Device_CreateShaderResourceView(p->dev, tex_p->res, &srvdesc,
+ &tex_p->srv);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create SRV: %s\n", mp_HRESULT_to_str(hr));
+ goto error;
+ }
+ }
+
+ // Samplers are required for renderpasses, but not blitting, since the blit
+ // code uses its own point sampler
+ if (params->render_src) {
+ D3D11_SAMPLER_DESC sdesc = {
+ .AddressU = D3D11_TEXTURE_ADDRESS_CLAMP,
+ .AddressV = D3D11_TEXTURE_ADDRESS_CLAMP,
+ .AddressW = D3D11_TEXTURE_ADDRESS_CLAMP,
+ .ComparisonFunc = D3D11_COMPARISON_NEVER,
+ .MinLOD = 0,
+ .MaxLOD = D3D11_FLOAT32_MAX,
+ .MaxAnisotropy = 1,
+ };
+ if (params->src_linear)
+ sdesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR;
+ if (params->src_repeat) {
+ sdesc.AddressU = sdesc.AddressV = sdesc.AddressW =
+ D3D11_TEXTURE_ADDRESS_WRAP;
+ }
+ // The runtime pools sampler state objects internally, so we don't have
+ // to worry about resource usage when creating one for every ra_tex
+ hr = ID3D11Device_CreateSamplerState(p->dev, &sdesc, &tex_p->sampler);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create sampler: %s\n", mp_HRESULT_to_str(hr));
+ goto error;
+ }
+ }
+
+ // Like SRVs, an RTV is required for renderpass output and blitting
+ if (params->render_dst || params->blit_dst) {
+ hr = ID3D11Device_CreateRenderTargetView(p->dev, tex_p->res, NULL,
+ &tex_p->rtv);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create RTV: %s\n", mp_HRESULT_to_str(hr));
+ goto error;
+ }
+ }
+
+ if (p->fl >= D3D_FEATURE_LEVEL_11_0 && params->storage_dst) {
+ hr = ID3D11Device_CreateUnorderedAccessView(p->dev, tex_p->res, NULL,
+ &tex_p->uav);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create UAV: %s\n", mp_HRESULT_to_str(hr));
+ goto error;
+ }
+ }
+
+ return true;
+error:
+ return false;
+}
+
+static void tex_destroy(struct ra *ra, struct ra_tex *tex)
+{
+ if (!tex)
+ return;
+ struct d3d_tex *tex_p = tex->priv;
+
+ SAFE_RELEASE(tex_p->srv);
+ SAFE_RELEASE(tex_p->rtv);
+ SAFE_RELEASE(tex_p->uav);
+ SAFE_RELEASE(tex_p->sampler);
+ SAFE_RELEASE(tex_p->res);
+ talloc_free(tex);
+}
+
+static struct ra_tex *tex_create(struct ra *ra,
+ const struct ra_tex_params *params)
+{
+ struct ra_d3d11 *p = ra->priv;
+ HRESULT hr;
+
+ struct ra_tex *tex = talloc_zero(NULL, struct ra_tex);
+ tex->params = *params;
+ tex->params.initial_data = NULL;
+
+ struct d3d_tex *tex_p = tex->priv = talloc_zero(tex, struct d3d_tex);
+ DXGI_FORMAT fmt = fmt_to_dxgi(params->format);
+
+ D3D11_SUBRESOURCE_DATA *pdata = NULL;
+ if (params->initial_data) {
+ pdata = &(D3D11_SUBRESOURCE_DATA) {
+ .pSysMem = params->initial_data,
+ .SysMemPitch = params->w * params->format->pixel_size,
+ };
+ if (params->dimensions >= 3)
+ pdata->SysMemSlicePitch = pdata->SysMemPitch * params->h;
+ }
+
+ D3D11_USAGE usage = D3D11_USAGE_DEFAULT;
+ D3D11_BIND_FLAG bind_flags = 0;
+
+ if (params->render_src || params->blit_src)
+ bind_flags |= D3D11_BIND_SHADER_RESOURCE;
+ if (params->render_dst || params->blit_dst)
+ bind_flags |= D3D11_BIND_RENDER_TARGET;
+ if (p->fl >= D3D_FEATURE_LEVEL_11_0 && params->storage_dst)
+ bind_flags |= D3D11_BIND_UNORDERED_ACCESS;
+
+ // Apparently IMMUTABLE textures are efficient, so try to infer whether we
+ // can use one
+ if (params->initial_data && !params->render_dst && !params->storage_dst &&
+ !params->blit_dst && !params->host_mutable)
+ usage = D3D11_USAGE_IMMUTABLE;
+
+ switch (params->dimensions) {
+ case 1:;
+ D3D11_TEXTURE1D_DESC desc1d = {
+ .Width = params->w,
+ .MipLevels = 1,
+ .ArraySize = 1,
+ .Format = fmt,
+ .Usage = usage,
+ .BindFlags = bind_flags,
+ };
+ hr = ID3D11Device_CreateTexture1D(p->dev, &desc1d, pdata, &tex_p->tex1d);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create Texture1D: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+ tex_p->res = (ID3D11Resource *)tex_p->tex1d;
+ break;
+ case 2:;
+ D3D11_TEXTURE2D_DESC desc2d = {
+ .Width = params->w,
+ .Height = params->h,
+ .MipLevels = 1,
+ .ArraySize = 1,
+ .SampleDesc.Count = 1,
+ .Format = fmt,
+ .Usage = usage,
+ .BindFlags = bind_flags,
+ };
+ hr = ID3D11Device_CreateTexture2D(p->dev, &desc2d, pdata, &tex_p->tex2d);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create Texture2D: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+ tex_p->res = (ID3D11Resource *)tex_p->tex2d;
+ break;
+ case 3:;
+ D3D11_TEXTURE3D_DESC desc3d = {
+ .Width = params->w,
+ .Height = params->h,
+ .Depth = params->d,
+ .MipLevels = 1,
+ .Format = fmt,
+ .Usage = usage,
+ .BindFlags = bind_flags,
+ };
+ hr = ID3D11Device_CreateTexture3D(p->dev, &desc3d, pdata, &tex_p->tex3d);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create Texture3D: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+ tex_p->res = (ID3D11Resource *)tex_p->tex3d;
+ break;
+ default:
+ abort();
+ }
+
+ if (!tex_init(ra, tex))
+ goto error;
+
+ return tex;
+
+error:
+ tex_destroy(ra, tex);
+ return NULL;
+}
+
+struct ra_tex *ra_d3d11_wrap_tex(struct ra *ra, ID3D11Resource *res)
+{
+ HRESULT hr;
+
+ struct ra_tex *tex = talloc_zero(NULL, struct ra_tex);
+ struct ra_tex_params *params = &tex->params;
+ struct d3d_tex *tex_p = tex->priv = talloc_zero(tex, struct d3d_tex);
+
+ DXGI_FORMAT fmt = DXGI_FORMAT_UNKNOWN;
+ D3D11_USAGE usage = D3D11_USAGE_DEFAULT;
+ D3D11_BIND_FLAG bind_flags = 0;
+
+ D3D11_RESOURCE_DIMENSION type;
+ ID3D11Resource_GetType(res, &type);
+ switch (type) {
+ case D3D11_RESOURCE_DIMENSION_TEXTURE2D:
+ hr = ID3D11Resource_QueryInterface(res, &IID_ID3D11Texture2D,
+ (void**)&tex_p->tex2d);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Resource is not a ID3D11Texture2D\n");
+ goto error;
+ }
+ tex_p->res = (ID3D11Resource *)tex_p->tex2d;
+
+ D3D11_TEXTURE2D_DESC desc2d;
+ ID3D11Texture2D_GetDesc(tex_p->tex2d, &desc2d);
+ if (desc2d.MipLevels != 1 || desc2d.ArraySize != 1)
+ goto error;
+ if (desc2d.SampleDesc.Count != 1)
+ goto error;
+
+ params->dimensions = 2;
+ params->w = desc2d.Width;
+ params->h = desc2d.Height;
+ params->d = 1;
+ usage = desc2d.Usage;
+ bind_flags = desc2d.BindFlags;
+ fmt = desc2d.Format;
+ break;
+ default:
+ // We could wrap Texture1D/3D as well, but keep it simple, since this
+ // function is only used for swapchain backbuffers at the moment
+ MP_ERR(ra, "Resource is not suitable to wrap\n");
+ goto error;
+ }
+
+ for (int i = 0; i < ra->num_formats; i++) {
+ DXGI_FORMAT target_fmt = fmt_to_dxgi(ra->formats[i]);
+ if (fmt == target_fmt) {
+ params->format = ra->formats[i];
+ break;
+ }
+ }
+ if (!params->format) {
+ MP_