summaryrefslogtreecommitdiffstats
path: root/video/out/d3d11/ra_d3d11.c
diff options
context:
space:
mode:
Diffstat (limited to 'video/out/d3d11/ra_d3d11.c')
-rw-r--r--video/out/d3d11/ra_d3d11.c2235
1 files changed, 2235 insertions, 0 deletions
diff --git a/video/out/d3d11/ra_d3d11.c b/video/out/d3d11/ra_d3d11.c
new file mode 100644
index 0000000000..372b65d49f
--- /dev/null
+++ b/video/out/d3d11/ra_d3d11.c
@@ -0,0 +1,2235 @@
+#include <windows.h>
+#include <versionhelpers.h>
+#include <d3d11_1.h>
+#include <d3d11sdklayers.h>
+#include <dxgi1_2.h>
+#include <d3dcompiler.h>
+#include <crossc.h>
+
+#include "common/msg.h"
+#include "osdep/io.h"
+#include "osdep/subprocess.h"
+#include "osdep/windows_utils.h"
+#include "video/out/gpu/spirv.h"
+#include "video/out/gpu/utils.h"
+
+#include "ra_d3d11.h"
+
+#ifndef D3D11_1_UAV_SLOT_COUNT
+#define D3D11_1_UAV_SLOT_COUNT (64)
+#endif
+
+struct ra_d3d11 {
+ struct spirv_compiler *spirv;
+
+ ID3D11Device *dev;
+ ID3D11Device1 *dev1;
+ ID3D11DeviceContext *ctx;
+ ID3D11DeviceContext1 *ctx1;
+ pD3DCompile D3DCompile;
+
+ // Debug interfaces (--gpu-debug)
+ ID3D11Debug *debug;
+ ID3D11InfoQueue *iqueue;
+
+ // Device capabilities
+ D3D_FEATURE_LEVEL fl;
+ bool has_clear_view;
+ int max_uavs;
+
+ // Streaming dynamic vertex buffer, which is used for all renderpasses
+ ID3D11Buffer *vbuf;
+ size_t vbuf_size;
+ size_t vbuf_used;
+
+ // clear() renderpass resources (only used when has_clear_view is false)
+ ID3D11PixelShader *clear_ps;
+ ID3D11VertexShader *clear_vs;
+ ID3D11InputLayout *clear_layout;
+ ID3D11Buffer *clear_vbuf;
+ ID3D11Buffer *clear_cbuf;
+
+ // blit() renderpass resources
+ ID3D11PixelShader *blit_float_ps;
+ ID3D11VertexShader *blit_vs;
+ ID3D11InputLayout *blit_layout;
+ ID3D11Buffer *blit_vbuf;
+ ID3D11SamplerState *blit_sampler;
+};
+
+struct d3d_tex {
+ // res mirrors one of tex1d, tex2d or tex3d for convenience. It does not
+ // hold an additional reference to the texture object.
+ ID3D11Resource *res;
+
+ ID3D11Texture1D *tex1d;
+ ID3D11Texture2D *tex2d;
+ ID3D11Texture3D *tex3d;
+
+ ID3D11ShaderResourceView *srv;
+ ID3D11RenderTargetView *rtv;
+ ID3D11UnorderedAccessView *uav;
+ ID3D11SamplerState *sampler;
+};
+
+struct d3d_buf {
+ ID3D11Buffer *buf;
+ ID3D11Buffer *staging;
+ ID3D11UnorderedAccessView *uav;
+ void *data; // Data for mapped staging texture
+};
+
+struct d3d_rpass {
+ ID3D11PixelShader *ps;
+ ID3D11VertexShader *vs;
+ ID3D11ComputeShader *cs;
+ ID3D11InputLayout *layout;
+ ID3D11BlendState *bstate;
+};
+
+struct d3d_timer {
+ ID3D11Query *ts_start;
+ ID3D11Query *ts_end;
+ ID3D11Query *disjoint;
+ uint64_t result; // Latches the result from the previous use of the timer
+};
+
+struct d3d_fmt {
+ const char *name;
+ int components;
+ int bytes;
+ int bits[4];
+ DXGI_FORMAT fmt;
+ enum ra_ctype ctype;
+ bool unordered;
+};
+
+static const char clear_vs[] = "\
+float4 main(float2 pos : POSITION) : SV_Position\n\
+{\n\
+ return float4(pos, 0.0, 1.0);\n\
+}\n\
+";
+
+static const char clear_ps[] = "\
+cbuffer ps_cbuf : register(b0) {\n\
+ float4 color : packoffset(c0);\n\
+}\n\
+\n\
+float4 main(float4 pos : SV_Position) : SV_Target\n\
+{\n\
+ return color;\n\
+}\n\
+";
+
+struct blit_vert {
+ float x, y, u, v;
+};
+
+static const char blit_vs[] = "\
+void main(float2 pos : POSITION, float2 coord : TEXCOORD0,\n\
+ out float4 out_pos : SV_Position, out float2 out_coord : TEXCOORD0)\n\
+{\n\
+ out_pos = float4(pos, 0.0, 1.0);\n\
+ out_coord = coord;\n\
+}\n\
+";
+
+static const char blit_float_ps[] = "\
+Texture2D<float4> tex : register(t0);\n\
+SamplerState samp : register(s0);\n\
+\n\
+float4 main(float4 pos : SV_Position, float2 coord : TEXCOORD0) : SV_Target\n\
+{\n\
+ return tex.Sample(samp, coord);\n\
+}\n\
+";
+
+#define DXFMT(f, t) .fmt = DXGI_FORMAT_##f##_##t, .ctype = RA_CTYPE_##t
+static struct d3d_fmt formats[] = {
+ { "r8", 1, 1, { 8}, DXFMT(R8, UNORM) },
+ { "rg8", 2, 2, { 8, 8}, DXFMT(R8G8, UNORM) },
+ { "rgba8", 4, 4, { 8, 8, 8, 8}, DXFMT(R8G8B8A8, UNORM) },
+ { "r16", 1, 2, {16}, DXFMT(R16, UNORM) },
+ { "rg16", 2, 4, {16, 16}, DXFMT(R16G16, UNORM) },
+ { "rgba16", 4, 8, {16, 16, 16, 16}, DXFMT(R16G16B16A16, UNORM) },
+
+ { "r32ui", 1, 4, {32}, DXFMT(R32, UINT) },
+ { "rg32ui", 2, 8, {32, 32}, DXFMT(R32G32, UINT) },
+ { "rgb32ui", 3, 12, {32, 32, 32}, DXFMT(R32G32B32, UINT) },
+ { "rgba32ui", 4, 16, {32, 32, 32, 32}, DXFMT(R32G32B32A32, UINT) },
+
+ { "r16hf", 1, 2, {16}, DXFMT(R16, FLOAT) },
+ { "rg16hf", 2, 4, {16, 16}, DXFMT(R16G16, FLOAT) },
+ { "rgba16hf", 4, 8, {16, 16, 16, 16}, DXFMT(R16G16B16A16, FLOAT) },
+ { "r32f", 1, 4, {32}, DXFMT(R32, FLOAT) },
+ { "rg32f", 2, 8, {32, 32}, DXFMT(R32G32, FLOAT) },
+ { "rgb32f", 3, 12, {32, 32, 32}, DXFMT(R32G32B32, FLOAT) },
+ { "rgba32f", 4, 16, {32, 32, 32, 32}, DXFMT(R32G32B32A32, FLOAT) },
+
+ { "rgb10_a2", 4, 4, {10, 10, 10, 2}, DXFMT(R10G10B10A2, UNORM) },
+ { "bgra8", 4, 4, { 8, 8, 8, 8}, DXFMT(B8G8R8A8, UNORM), .unordered = false },
+};
+
+static DXGI_FORMAT fmt_to_dxgi(const struct ra_format *fmt)
+{
+ struct d3d_fmt *d3d = fmt->priv;
+ return d3d->fmt;
+}
+
+static void setup_formats(struct ra *ra)
+{
+ // All formats must be usable as a 2D texture
+ static const UINT sup_basic = D3D11_FORMAT_SUPPORT_TEXTURE2D;
+ // SHADER_SAMPLE indicates support for linear sampling, point always works
+ static const UINT sup_filter = D3D11_FORMAT_SUPPORT_SHADER_SAMPLE;
+ // RA requires renderable surfaces to be blendable as well
+ static const UINT sup_render = D3D11_FORMAT_SUPPORT_RENDER_TARGET |
+ D3D11_FORMAT_SUPPORT_BLENDABLE;
+
+ struct ra_d3d11 *p = ra->priv;
+ HRESULT hr;
+
+ for (int i = 0; i < MP_ARRAY_SIZE(formats); i++) {
+ struct d3d_fmt *d3dfmt = &formats[i];
+ UINT support = 0;
+ hr = ID3D11Device_CheckFormatSupport(p->dev, d3dfmt->fmt, &support);
+ if (FAILED(hr))
+ continue;
+ if ((support & sup_basic) != sup_basic)
+ continue;
+
+ struct ra_format *fmt = talloc_zero(ra, struct ra_format);
+ *fmt = (struct ra_format) {
+ .name = d3dfmt->name,
+ .priv = d3dfmt,
+ .ctype = d3dfmt->ctype,
+ .ordered = !d3dfmt->unordered,
+ .num_components = d3dfmt->components,
+ .pixel_size = d3dfmt->bytes,
+ .linear_filter = (support & sup_filter) == sup_filter,
+ .renderable = (support & sup_render) == sup_render,
+ };
+
+ if (support & D3D11_FORMAT_SUPPORT_TEXTURE1D)
+ ra->caps |= RA_CAP_TEX_1D;
+
+ for (int j = 0; j < d3dfmt->components; j++)
+ fmt->component_size[j] = fmt->component_depth[j] = d3dfmt->bits[j];
+
+ fmt->glsl_format = ra_fmt_glsl_format(fmt);
+
+ MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, fmt);
+ }
+}
+
+static bool tex_init(struct ra *ra, struct ra_tex *tex)
+{
+ struct ra_d3d11 *p = ra->priv;
+ struct d3d_tex *tex_p = tex->priv;
+ struct ra_tex_params *params = &tex->params;
+ HRESULT hr;
+
+ // A SRV is required for renderpasses and blitting, since blitting can use
+ // a renderpass internally
+ if (params->render_src || params->blit_src) {
+ // Always specify the SRV format for simplicity. This will match the
+ // texture format for textures created with tex_create, but it can be
+ // different for wrapped planar video textures.
+ D3D11_SHADER_RESOURCE_VIEW_DESC srvdesc = {
+ .Format = fmt_to_dxgi(params->format),
+ };
+ switch (params->dimensions) {
+ case 1:
+ srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D;
+ srvdesc.Texture1D.MipLevels = 1;
+ break;
+ case 2:
+ srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
+ srvdesc.Texture2D.MipLevels = 1;
+ break;
+ case 3:
+ srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D;
+ srvdesc.Texture3D.MipLevels = 1;
+ break;
+ }
+ hr = ID3D11Device_CreateShaderResourceView(p->dev, tex_p->res, &srvdesc,
+ &tex_p->srv);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create SRV: %s\n", mp_HRESULT_to_str(hr));
+ goto error;
+ }
+ }
+
+ // Samplers are required for renderpasses, but not blitting, since the blit
+ // code uses its own point sampler
+ if (params->render_src) {
+ D3D11_SAMPLER_DESC sdesc = {
+ .AddressU = D3D11_TEXTURE_ADDRESS_CLAMP,
+ .AddressV = D3D11_TEXTURE_ADDRESS_CLAMP,
+ .AddressW = D3D11_TEXTURE_ADDRESS_CLAMP,
+ .ComparisonFunc = D3D11_COMPARISON_NEVER,
+ .MinLOD = 0,
+ .MaxLOD = D3D11_FLOAT32_MAX,
+ .MaxAnisotropy = 1,
+ };
+ if (params->src_linear)
+ sdesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR;
+ if (params->src_repeat) {
+ sdesc.AddressU = sdesc.AddressV = sdesc.AddressW =
+ D3D11_TEXTURE_ADDRESS_WRAP;
+ }
+ // The runtime pools sampler state objects internally, so we don't have
+ // to worry about resource usage when creating one for every ra_tex
+ hr = ID3D11Device_CreateSamplerState(p->dev, &sdesc, &tex_p->sampler);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create sampler: %s\n", mp_HRESULT_to_str(hr));
+ goto error;
+ }
+ }
+
+ // Like SRVs, an RTV is required for renderpass output and blitting
+ if (params->render_dst || params->blit_dst) {
+ hr = ID3D11Device_CreateRenderTargetView(p->dev, tex_p->res, NULL,
+ &tex_p->rtv);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create RTV: %s\n", mp_HRESULT_to_str(hr));
+ goto error;
+ }
+ }
+
+ if (p->fl >= D3D_FEATURE_LEVEL_11_0 && params->storage_dst) {
+ hr = ID3D11Device_CreateUnorderedAccessView(p->dev, tex_p->res, NULL,
+ &tex_p->uav);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create UAV: %s\n", mp_HRESULT_to_str(hr));
+ goto error;
+ }
+ }
+
+ return true;
+error:
+ return false;
+}
+
+static void tex_destroy(struct ra *ra, struct ra_tex *tex)
+{
+ if (!tex)
+ return;
+ struct d3d_tex *tex_p = tex->priv;
+
+ SAFE_RELEASE(tex_p->srv);
+ SAFE_RELEASE(tex_p->rtv);
+ SAFE_RELEASE(tex_p->uav);
+ SAFE_RELEASE(tex_p->sampler);
+ SAFE_RELEASE(tex_p->res);
+ talloc_free(tex);
+}
+
+static struct ra_tex *tex_create(struct ra *ra,
+ const struct ra_tex_params *params)
+{
+ struct ra_d3d11 *p = ra->priv;
+ HRESULT hr;
+
+ struct ra_tex *tex = talloc_zero(NULL, struct ra_tex);
+ tex->params = *params;
+ tex->params.initial_data = NULL;
+
+ struct d3d_tex *tex_p = tex->priv = talloc_zero(tex, struct d3d_tex);
+ DXGI_FORMAT fmt = fmt_to_dxgi(params->format);
+
+ D3D11_SUBRESOURCE_DATA *pdata = NULL;
+ if (params->initial_data) {
+ pdata = &(D3D11_SUBRESOURCE_DATA) {
+ .pSysMem = params->initial_data,
+ .SysMemPitch = params->w * params->format->pixel_size,
+ };
+ if (params->dimensions >= 3)
+ pdata->SysMemSlicePitch = pdata->SysMemPitch * params->h;
+ }
+
+ D3D11_USAGE usage = D3D11_USAGE_DEFAULT;
+ D3D11_BIND_FLAG bind_flags = 0;
+
+ if (params->render_src || params->blit_src)
+ bind_flags |= D3D11_BIND_SHADER_RESOURCE;
+ if (params->render_dst || params->blit_dst)
+ bind_flags |= D3D11_BIND_RENDER_TARGET;
+ if (p->fl >= D3D_FEATURE_LEVEL_11_0 && params->storage_dst)
+ bind_flags |= D3D11_BIND_UNORDERED_ACCESS;
+
+ // Apparently IMMUTABLE textures are efficient, so try to infer whether we
+ // can use one
+ if (params->initial_data && !params->render_dst && !params->storage_dst &&
+ !params->blit_dst && !params->host_mutable)
+ usage = D3D11_USAGE_IMMUTABLE;
+
+ switch (params->dimensions) {
+ case 1:;
+ D3D11_TEXTURE1D_DESC desc1d = {
+ .Width = params->w,
+ .MipLevels = 1,
+ .ArraySize = 1,
+ .Format = fmt,
+ .Usage = usage,
+ .BindFlags = bind_flags,
+ };
+ hr = ID3D11Device_CreateTexture1D(p->dev, &desc1d, pdata, &tex_p->tex1d);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create Texture1D: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+ tex_p->res = (ID3D11Resource *)tex_p->tex1d;
+ break;
+ case 2:;
+ D3D11_TEXTURE2D_DESC desc2d = {
+ .Width = params->w,
+ .Height = params->h,
+ .MipLevels = 1,
+ .ArraySize = 1,
+ .SampleDesc.Count = 1,
+ .Format = fmt,
+ .Usage = usage,
+ .BindFlags = bind_flags,
+ };
+ hr = ID3D11Device_CreateTexture2D(p->dev, &desc2d, pdata, &tex_p->tex2d);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create Texture2D: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+ tex_p->res = (ID3D11Resource *)tex_p->tex2d;
+ break;
+ case 3:;
+ D3D11_TEXTURE3D_DESC desc3d = {
+ .Width = params->w,
+ .Height = params->h,
+ .Depth = params->d,
+ .MipLevels = 1,
+ .Format = fmt,
+ .Usage = usage,
+ .BindFlags = bind_flags,
+ };
+ hr = ID3D11Device_CreateTexture3D(p->dev, &desc3d, pdata, &tex_p->tex3d);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create Texture3D: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+ tex_p->res = (ID3D11Resource *)tex_p->tex3d;
+ break;
+ default:
+ abort();
+ }
+
+ if (!tex_init(ra, tex))
+ goto error;
+
+ return tex;
+
+error:
+ tex_destroy(ra, tex);
+ return NULL;
+}
+
+struct ra_tex *ra_d3d11_wrap_tex(struct ra *ra, ID3D11Resource *res)
+{
+ HRESULT hr;
+
+ struct ra_tex *tex = talloc_zero(NULL, struct ra_tex);
+ struct ra_tex_params *params = &tex->params;
+ struct d3d_tex *tex_p = tex->priv = talloc_zero(tex, struct d3d_tex);
+
+ DXGI_FORMAT fmt = DXGI_FORMAT_UNKNOWN;
+ D3D11_USAGE usage = D3D11_USAGE_DEFAULT;
+ D3D11_BIND_FLAG bind_flags = 0;
+
+ D3D11_RESOURCE_DIMENSION type;
+ ID3D11Resource_GetType(res, &type);
+ switch (type) {
+ case D3D11_RESOURCE_DIMENSION_TEXTURE2D:
+ hr = ID3D11Resource_QueryInterface(res, &IID_ID3D11Texture2D,
+ (void**)&tex_p->tex2d);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Resource is not a ID3D11Texture2D\n");
+ goto error;
+ }
+ tex_p->res = (ID3D11Resource *)tex_p->tex2d;
+
+ D3D11_TEXTURE2D_DESC desc2d;
+ ID3D11Texture2D_GetDesc(tex_p->tex2d, &desc2d);
+ if (desc2d.MipLevels != 1 || desc2d.ArraySize != 1)
+ goto error;
+ if (desc2d.SampleDesc.Count != 1)
+ goto error;
+
+ params->dimensions = 2;
+ params->w = desc2d.Width;
+ params->h = desc2d.Height;
+ params->d = 1;
+ usage = desc2d.Usage;
+ bind_flags = desc2d.BindFlags;
+ fmt = desc2d.Format;
+ break;
+ default:
+ // We could wrap Texture1D/3D as well, but keep it simple, since this
+ // function is only used for swapchain backbuffers at the moment
+ MP_ERR(ra, "Resource is not suitable to wrap\n");
+ goto error;
+ }
+
+ for (int i = 0; i < ra->num_formats; i++) {
+ DXGI_FORMAT target_fmt = fmt_to_dxgi(ra->formats[i]);
+ if (fmt == target_fmt) {
+ params->format = ra->formats[i];
+ break;
+ }
+ }
+ if (!params->format) {
+ MP_ERR(ra, "Could not find a suitable RA format for wrapped resource\n");
+ goto error;
+ }
+
+ if (bind_flags & D3D11_BIND_SHADER_RESOURCE)
+ params->render_src = params->blit_src = true;
+ if (bind_flags & D3D11_BIND_RENDER_TARGET)
+ params->render_dst = params->blit_dst = true;
+ if (bind_flags & D3D11_BIND_UNORDERED_ACCESS)
+ params->storage_dst = true;
+
+ if (usage != D3D11_USAGE_DEFAULT) {
+ MP_ERR(ra, "Resource is not D3D11_USAGE_DEFAULT\n");
+ goto error;
+ }
+
+ if (!tex_init(ra, tex))
+ goto error;
+
+ return tex;
+error:
+ tex_destroy(ra, tex);
+ return NULL;
+}
+
+struct ra_tex *ra_d3d11_wrap_tex_video(struct ra *ra, ID3D11Texture2D *res,
+ int w, int h,
+ const struct ra_format *fmt)
+{
+ struct ra_tex *tex = talloc_zero(NULL, struct ra_tex);
+ struct ra_tex_params *params = &tex->params;
+ struct d3d_tex *tex_p = tex->priv = talloc_zero(tex, struct d3d_tex);
+
+ tex_p->tex2d = res;
+ tex_p->res = (ID3D11Resource *)tex_p->tex2d;
+ ID3D11Texture2D_AddRef(res);
+
+ D3D11_TEXTURE2D_DESC desc2d;
+ ID3D11Texture2D_GetDesc(tex_p->tex2d, &desc2d);
+ if (!(desc2d.BindFlags & D3D11_BIND_SHADER_RESOURCE)) {
+ MP_ERR(ra, "Video resource is not bindable\n");
+ goto error;
+ }
+
+ params->dimensions = 2;
+ params->w = w;
+ params->h = h;
+ params->d = 1;
+ params->render_src = true;
+ params->src_linear = true;
+ // fmt can be different to the texture format for planar video textures
+ params->format = fmt;
+
+ if (!tex_init(ra, tex))
+ goto error;
+
+ return tex;
+error:
+ tex_destroy(ra, tex);
+ return NULL;
+}
+
+static bool tex_upload(struct ra *ra, const struct ra_tex_upload_params *params)
+{
+ struct ra_d3d11 *p = ra->priv;
+ struct ra_tex *tex = params->tex;
+ struct d3d_tex *tex_p = tex->priv;
+
+ if (!params->src) {
+ MP_ERR(ra, "Pixel buffers are not supported\n");
+ return false;
+ }
+
+ const char *src = params->src;
+ ptrdiff_t stride = tex->params.dimensions >= 2 ? tex->params.w : 0;
+ ptrdiff_t pitch = tex->params.dimensions >= 3 ? stride * tex->params.h : 0;
+ bool invalidate = true;
+ D3D11_BOX *rc = NULL;
+
+ if (tex->params.dimensions == 2) {
+ stride = params->stride;
+
+ // stride can be negative, but vo_gpu expects the RA backend to ignore
+ // the negative stride and upload the image "upside-down" for now
+ if (stride < 0) {
+ int h = params->rc ? mp_rect_h(*params->rc) : tex->params.h;
+ src += (h - 1) * stride;
+ stride = -stride;
+ }
+
+ if (params->rc && (params->rc->x0 != 0 || params->rc->y0 != 0 ||
+ params->rc->x1 != tex->params.w || params->rc->y1 != tex->params.h))
+ {
+ rc = &(D3D11_BOX) {
+ .left = params->rc->x0,
+ .top = params->rc->y0,
+ .front = 0,
+ .right = params->rc->x1,
+ .bottom = params->rc->y1,
+ .back = 1,
+ };
+ invalidate = params->invalidate;
+ }
+ }
+
+ if (p->ctx1) {
+ ID3D11DeviceContext1_UpdateSubresource1(p->ctx1, tex_p->res, 0, rc,
+ src, stride, pitch, invalidate ? D3D11_COPY_DISCARD : 0);
+ } else {
+ ID3D11DeviceContext_UpdateSubresource(p->ctx, tex_p->res, 0, rc,
+ src, stride, pitch);
+ }
+
+ return true;
+}
+
+static void buf_destroy(struct ra *ra, struct ra_buf *buf)
+{
+ if (!buf)
+ return;
+ struct ra_d3d11 *p = ra->priv;
+ struct d3d_buf *buf_p = buf->priv;
+
+ if (buf_p->data)
+ ID3D11DeviceContext_Unmap(p->ctx, (ID3D11Resource *)buf_p->staging, 0);
+ SAFE_RELEASE(buf_p->buf);
+ SAFE_RELEASE(buf_p->staging);
+ SAFE_RELEASE(buf_p->uav);
+ talloc_free(buf);
+}
+
+static struct ra_buf *buf_create(struct ra *ra,
+ const struct ra_buf_params *params)
+{
+ // D3D11 does not support permanent mapping or pixel buffers
+ if (params->host_mapped || params->type == RA_BUF_TYPE_TEX_UPLOAD)
+ return NULL;
+
+ struct ra_d3d11 *p = ra->priv;
+ HRESULT hr;
+
+ struct ra_buf *buf = talloc_zero(NULL, struct ra_buf);
+ buf->params = *params;
+ buf->params.initial_data = NULL;
+
+ struct d3d_buf *buf_p = buf->priv = talloc_zero(buf, struct d3d_buf);
+
+ D3D11_SUBRESOURCE_DATA *pdata = NULL;
+ if (params->initial_data)
+ pdata = &(D3D11_SUBRESOURCE_DATA) { .pSysMem = params->initial_data };
+
+ D3D11_BUFFER_DESC desc = { .ByteWidth = params->size };
+ switch (params->type) {
+ case RA_BUF_TYPE_SHADER_STORAGE:
+ desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS;
+ desc.ByteWidth = MP_ALIGN_UP(desc.ByteWidth, sizeof(float));
+ desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS;
+ break;
+ case RA_BUF_TYPE_UNIFORM:
+ desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
+ desc.ByteWidth = MP_ALIGN_UP(desc.ByteWidth, sizeof(float[4]));
+ break;
+ }
+
+ hr = ID3D11Device_CreateBuffer(p->dev, &desc, pdata, &buf_p->buf);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create buffer: %s\n", mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ if (params->host_mutable) {
+ // D3D11 doesn't allow constant buffer updates that aren't aligned to a
+ // full constant boundary (vec4,) and some drivers don't allow partial
+ // constant buffer updates at all, but the RA consumer is allowed to
+ // partially update an ra_buf. The best way to handle partial updates
+ // without causing a pipeline stall is probably to keep a copy of the
+ // data in a staging buffer.
+
+ desc.Usage = D3D11_USAGE_STAGING;
+ desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
+ desc.BindFlags = 0;
+ hr = ID3D11Device_CreateBuffer(p->dev, &desc, NULL, &buf_p->staging);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create staging buffer: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+ }
+
+ if (params->type == RA_BUF_TYPE_SHADER_STORAGE) {
+ D3D11_UNORDERED_ACCESS_VIEW_DESC udesc = {
+ .Format = DXGI_FORMAT_R32_TYPELESS,
+ .ViewDimension = D3D11_UAV_DIMENSION_BUFFER,
+ .Buffer = {
+ .NumElements = desc.ByteWidth / sizeof(float),
+ .Flags = D3D11_BUFFER_UAV_FLAG_RAW,
+ },
+ };
+ hr = ID3D11Device_CreateUnorderedAccessView(p->dev,
+ (ID3D11Resource *)buf_p->buf, &udesc, &buf_p->uav);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create UAV: %s\n", mp_HRESULT_to_str(hr));
+ goto error;
+ }
+ }
+
+ return buf;
+error:
+ buf_destroy(ra, buf);
+ return NULL;
+}
+
+static void buf_resolve(struct ra *ra, struct ra_buf *buf)
+{
+ struct ra_d3d11 *p = ra->priv;
+ struct d3d_buf *buf_p = buf->priv;
+
+ assert(buf->params.host_mutable);
+ if (!buf_p->data)
+ return;
+
+ ID3D11DeviceContext_Unmap(p->ctx, (ID3D11Resource *)buf_p->staging, 0);
+ buf_p->data = NULL;
+
+ // Synchronize the GPU buffer with the staging buffer
+ ID3D11DeviceContext_CopyResource(p->ctx, (ID3D11Resource *)buf_p->buf,
+ (ID3D11Resource *)buf_p->staging);
+}
+
+static void buf_update(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset,
+ const void *data, size_t size)
+{
+ struct ra_d3d11 *p = ra->priv;
+ struct d3d_buf *buf_p = buf->priv;
+ HRESULT hr;
+
+ if (!buf_p->data) {
+ // If this is the first update after the buffer was created or after it
+ // has been used in a renderpass, it will be unmapped, so map it
+ D3D11_MAPPED_SUBRESOURCE map = {0};
+ hr = ID3D11DeviceContext_Map(p->ctx, (ID3D11Resource *)buf_p->staging,
+ 0, D3D11_MAP_WRITE, 0, &map);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to map resource\n");
+ return;
+ }
+ buf_p->data = map.pData;
+ }
+
+ char *cdata = buf_p->data;
+ memcpy(cdata + offset, data, size);
+}
+
+static const char *get_shader_target(struct ra *ra, enum glsl_shader type)
+{
+ struct ra_d3d11 *p = ra->priv;
+ switch (p->fl) {
+ default:
+ switch (type) {
+ case GLSL_SHADER_VERTEX: return "vs_5_0";
+ case GLSL_SHADER_FRAGMENT: return "ps_5_0";
+ case GLSL_SHADER_COMPUTE: return "cs_5_0";
+ }
+ break;
+ case D3D_FEATURE_LEVEL_10_1:
+ switch (type) {
+ case GLSL_SHADER_VERTEX: return "vs_4_1";
+ case GLSL_SHADER_FRAGMENT: return "ps_4_1";
+ case GLSL_SHADER_COMPUTE: return "cs_4_1";
+ }
+ break;
+ case D3D_FEATURE_LEVEL_10_0:
+ switch (type) {
+ case GLSL_SHADER_VERTEX: return "vs_4_0";
+ case GLSL_SHADER_FRAGMENT: return "ps_4_0";
+ case GLSL_SHADER_COMPUTE: return "cs_4_0";
+ }
+ break;
+ case D3D_FEATURE_LEVEL_9_3:
+ switch (type) {
+ case GLSL_SHADER_VERTEX: return "vs_4_0_level_9_3";
+ case GLSL_SHADER_FRAGMENT: return "ps_4_0_level_9_3";
+ }
+ break;
+ case D3D_FEATURE_LEVEL_9_2:
+ case D3D_FEATURE_LEVEL_9_1:
+ switch (type) {
+ case GLSL_SHADER_VERTEX: return "vs_4_0_level_9_1";
+ case GLSL_SHADER_FRAGMENT: return "ps_4_0_level_9_1";
+ }
+ break;
+ }
+ return NULL;
+}
+
+static bool setup_clear_rpass(struct ra *ra)
+{
+ struct ra_d3d11 *p = ra->priv;
+ ID3DBlob *vs_blob = NULL;
+ ID3DBlob *ps_blob = NULL;
+ HRESULT hr;
+
+ hr = p->D3DCompile(clear_vs, sizeof(clear_vs), NULL, NULL, NULL, "main",
+ get_shader_target(ra, GLSL_SHADER_VERTEX),
+ D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &vs_blob, NULL);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to compile clear() vertex shader: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ hr = ID3D11Device_CreateVertexShader(p->dev,
+ ID3D10Blob_GetBufferPointer(vs_blob), ID3D10Blob_GetBufferSize(vs_blob),
+ NULL, &p->clear_vs);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create clear() vertex shader: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ hr = p->D3DCompile(clear_ps, sizeof(clear_ps), NULL, NULL, NULL, "main",
+ get_shader_target(ra, GLSL_SHADER_FRAGMENT),
+ D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &ps_blob, NULL);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to compile clear() pixel shader: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ hr = ID3D11Device_CreatePixelShader(p->dev,
+ ID3D10Blob_GetBufferPointer(ps_blob), ID3D10Blob_GetBufferSize(ps_blob),
+ NULL, &p->clear_ps);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create clear() pixel shader: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ D3D11_INPUT_ELEMENT_DESC in_descs[] = {
+ { "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0 },
+ };
+ hr = ID3D11Device_CreateInputLayout(p->dev, in_descs,
+ MP_ARRAY_SIZE(in_descs), ID3D10Blob_GetBufferPointer(vs_blob),
+ ID3D10Blob_GetBufferSize(vs_blob), &p->clear_layout);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create clear() IA layout: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ // clear() always draws to a quad covering the whole viewport
+ static const float verts[] = {
+ -1, -1,
+ 1, -1,
+ 1, 1,
+ -1, 1,
+ -1, -1,
+ 1, 1,
+ };
+ D3D11_BUFFER_DESC vdesc = {
+ .ByteWidth = sizeof(verts),
+ .Usage = D3D11_USAGE_IMMUTABLE,
+ .BindFlags = D3D11_BIND_VERTEX_BUFFER,
+ };
+ D3D11_SUBRESOURCE_DATA vdata = {
+ .pSysMem = verts,
+ };
+ hr = ID3D11Device_CreateBuffer(p->dev, &vdesc, &vdata, &p->clear_vbuf);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create clear() vertex buffer: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ D3D11_BUFFER_DESC cdesc = {
+ .ByteWidth = sizeof(float[4]),
+ .BindFlags = D3D11_BIND_CONSTANT_BUFFER,
+ };
+ hr = ID3D11Device_CreateBuffer(p->dev, &cdesc, NULL, &p->clear_cbuf);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create clear() constant buffer: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ SAFE_RELEASE(vs_blob);
+ SAFE_RELEASE(ps_blob);
+ return true;
+error:
+ SAFE_RELEASE(vs_blob);
+ SAFE_RELEASE(ps_blob);
+ return false;
+}
+
+static void clear_rpass(struct ra *ra, struct ra_tex *tex, float color[4],
+ struct mp_rect *rc)
+{
+ struct ra_d3d11 *p = ra->priv;
+ struct d3d_tex *tex_p = tex->priv;
+ struct ra_tex_params *params = &tex->params;
+
+ ID3D11DeviceContext_UpdateSubresource(p->ctx,
+ (ID3D11Resource *)p->clear_cbuf, 0, NULL, color, 0, 0);
+
+ ID3D11DeviceContext_IASetInputLayout(p->ctx, p->clear_layout);
+ ID3D11DeviceContext_IASetVertexBuffers(p->ctx, 0, 1, &p->clear_vbuf,
+ &(UINT) { sizeof(float[2]) }, &(UINT) { 0 });
+ ID3D11DeviceContext_IASetPrimitiveTopology(p->ctx,
+ D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
+
+ ID3D11DeviceContext_VSSetShader(p->ctx, p->clear_vs, NULL, 0);
+
+ ID3D11DeviceContext_RSSetViewports(p->ctx, 1, (&(D3D11_VIEWPORT) {
+ .Width = params->w,
+ .Height = params->h,
+ .MinDepth = 0,
+ .MaxDepth = 1,
+ }));
+ ID3D11DeviceContext_RSSetScissorRects(p->ctx, 1, (&(D3D11_RECT) {
+ .left = rc->x0,
+ .top = rc->y0,
+ .right = rc->x1,
+ .bottom = rc->y1,
+ }));
+ ID3D11DeviceContext_PSSetShader(p->ctx, p->clear_ps, NULL, 0);
+ ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, 1, &p->clear_cbuf);
+
+ ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 1, &tex_p->rtv, NULL);
+ ID3D11DeviceContext_OMSetBlendState(p->ctx, NULL, NULL,
+ D3D11_DEFAULT_SAMPLE_MASK);
+
+ ID3D11DeviceContext_Draw(p->ctx, 6, 0);
+
+ ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, 1,
+ &(ID3D11Buffer *){ NULL });
+ ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 0, NULL, NULL);
+}
+
+static void clear(struct ra *ra, struct ra_tex *tex, float color[4],
+ struct mp_rect *rc)
+{
+ struct ra_d3d11 *p = ra->priv;
+ struct d3d_tex *tex_p = tex->priv;
+ struct ra_tex_params *params = &tex->params;
+
+ if (!tex_p->rtv)
+ return;
+
+ if (rc->x0 || rc->y0 || rc->x1 != params->w || rc->y1 != params->h) {
+ if (p->has_clear_view) {
+ ID3D11DeviceContext1_ClearView(p->ctx1, (ID3D11View *)tex_p->rtv,
+ color, (&(D3D11_RECT) {
+ .left = rc->x0,
+ .top = rc->y0,
+ .right = rc->x1,
+ .bottom = rc->y1,
+ }), 1);
+ } else {
+ clear_rpass(ra, tex, color, rc);
+ }
+ } else {
+ ID3D11DeviceContext_ClearRenderTargetView(p->ctx, tex_p->rtv, color);
+ }
+}
+
+static bool setup_blit_rpass(struct ra *ra)
+{
+ struct ra_d3d11 *p = ra->priv;
+ ID3DBlob *vs_blob = NULL;
+ ID3DBlob *float_ps_blob = NULL;
+ HRESULT hr;
+
+ hr = p->D3DCompile(blit_vs, sizeof(blit_vs), NULL, NULL, NULL, "main",
+ get_shader_target(ra, GLSL_SHADER_VERTEX),
+ D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &vs_blob, NULL);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to compile blit() vertex shader: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ hr = ID3D11Device_CreateVertexShader(p->dev,
+ ID3D10Blob_GetBufferPointer(vs_blob), ID3D10Blob_GetBufferSize(vs_blob),
+ NULL, &p->blit_vs);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create blit() vertex shader: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ hr = p->D3DCompile(blit_float_ps, sizeof(blit_float_ps), NULL, NULL, NULL,
+ "main", get_shader_target(ra, GLSL_SHADER_FRAGMENT),
+ D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &float_ps_blob, NULL);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to compile blit() pixel shader: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ hr = ID3D11Device_CreatePixelShader(p->dev,
+ ID3D10Blob_GetBufferPointer(float_ps_blob),
+ ID3D10Blob_GetBufferSize(float_ps_blob),
+ NULL, &p->blit_float_ps);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create blit() pixel shader: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ D3D11_INPUT_ELEMENT_DESC in_descs[] = {
+ { "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0 },
+ { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 8 },
+ };
+ hr = ID3D11Device_CreateInputLayout(p->dev, in_descs,
+ MP_ARRAY_SIZE(in_descs), ID3D10Blob_GetBufferPointer(vs_blob),
+ ID3D10Blob_GetBufferSize(vs_blob), &p->blit_layout);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create blit() IA layout: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ D3D11_BUFFER_DESC vdesc = {
+ .ByteWidth = sizeof(struct blit_vert[6]),
+ .Usage = D3D11_USAGE_DEFAULT,
+ .BindFlags = D3D11_BIND_VERTEX_BUFFER,
+ };
+ hr = ID3D11Device_CreateBuffer(p->dev, &vdesc, NULL, &p->blit_vbuf);
+ if (FAILED(hr)) {
+ MP_ERR(ra, "Failed to create blit() vertex buffer: %s\n",
+ mp_HRESULT_to_str(hr));
+ goto error;
+ }
+
+ // Blit always uses point sampling, regardless of the source texture
+ D3D11_SAMPLER_DESC sdesc = {