summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNiklas Haas <git@haasn.xyz>2018-11-10 12:53:33 +0100
committerJan Ekström <jeebjp@gmail.com>2019-04-21 23:55:22 +0300
commit7006d6752d7da21870dfdb2b0d7640a3734f748c (patch)
tree035ca58d22de438e834d212e97b73d03a4248d98
parent9f7dcc0726ab635fb34fb7310e54b1aec9467f14 (diff)
downloadmpv-7006d6752d7da21870dfdb2b0d7640a3734f748c.tar.bz2
mpv-7006d6752d7da21870dfdb2b0d7640a3734f748c.tar.xz
vo_gpu: vulkan: use libplacebo instead
This commit rips out the entire mpv vulkan implementation in favor of exposing lightweight wrappers on top of libplacebo instead, which provides much of the same except in a more up-to-date and polished form. This (finally) unifies the code base between mpv and libplacebo, which is something I've been hoping to do for a long time. Note: The ra_pl wrappers are abstract enough from the actual libplacebo device type that we can in theory re-use them for other devices like d3d11 or even opengl in the future, so I moved them to a separate directory for the time being. However, the rest of the code is still vulkan-specific, so I've kept the "vulkan" naming and file paths, rather than introducing a new `--gpu-api` type. (Which would have been ended up with significantly more code duplicaiton) Plus, the code and functionality is similar enough that for most users this should just be a straight-up drop-in replacement. Note: This commit excludes some changes; specifically, the updates to context_win and hwdec_cuda are deferred to separate commits for authorship reasons.
-rw-r--r--common/msg.c8
-rw-r--r--common/msg.h7
-rw-r--r--video/out/gpu/context.c4
-rw-r--r--video/out/opengl/hwdec_cuda.c11
-rw-r--r--video/out/placebo/ra_pl.c628
-rw-r--r--video/out/placebo/ra_pl.h10
-rw-r--r--video/out/placebo/utils.c62
-rw-r--r--video/out/placebo/utils.h18
-rw-r--r--video/out/vulkan/common.h58
-rw-r--r--video/out/vulkan/context.c482
-rw-r--r--video/out/vulkan/context.h4
-rw-r--r--video/out/vulkan/context_wayland.c18
-rw-r--r--video/out/vulkan/context_win.c105
-rw-r--r--video/out/vulkan/context_xlib.c11
-rw-r--r--video/out/vulkan/formats.c55
-rw-r--r--video/out/vulkan/formats.h16
-rw-r--r--video/out/vulkan/malloc.c471
-rw-r--r--video/out/vulkan/malloc.h37
-rw-r--r--video/out/vulkan/ra_vk.c1982
-rw-r--r--video/out/vulkan/ra_vk.h51
-rw-r--r--video/out/vulkan/utils.c990
-rw-r--r--video/out/vulkan/utils.h190
-rw-r--r--wscript11
-rw-r--r--wscript_build.py7
24 files changed, 868 insertions, 4368 deletions
diff --git a/common/msg.c b/common/msg.c
index cb41ea4168..e35b953f7d 100644
--- a/common/msg.c
+++ b/common/msg.c
@@ -127,19 +127,19 @@ static void update_loglevel(struct mp_log *log)
pthread_mutex_unlock(&mp_msg_lock);
}
-// Return whether the message at this verbosity level would be actually printed.
+// Get the current effective msg level.
// Thread-safety: see mp_msg().
-bool mp_msg_test(struct mp_log *log, int lev)
+int mp_msg_level(struct mp_log *log)
{
struct mp_log_root *root = log->root;
if (!root)
- return false;
+ return -1;
if (atomic_load_explicit(&log->reload_counter, memory_order_relaxed) !=
atomic_load_explicit(&root->reload_counter, memory_order_relaxed))
{
update_loglevel(log);
}
- return lev <= log->level;
+ return log->level;
}
// Reposition cursor and clear lines for outputting the status line. In certain
diff --git a/common/msg.h b/common/msg.h
index 21228870f4..635a85191f 100644
--- a/common/msg.h
+++ b/common/msg.h
@@ -52,7 +52,12 @@ void mp_msg(struct mp_log *log, int lev, const char *format, ...)
PRINTF_ATTRIBUTE(3, 4);
void mp_msg_va(struct mp_log *log, int lev, const char *format, va_list va);
-bool mp_msg_test(struct mp_log *log, int lev);
+int mp_msg_level(struct mp_log *log);
+
+static inline bool mp_msg_test(struct mp_log *log, int lev)
+{
+ return lev <= mp_msg_level(log);
+}
// Convenience macros.
#define mp_fatal(log, ...) mp_msg(log, MSGL_FATAL, __VA_ARGS__)
diff --git a/video/out/gpu/context.c b/video/out/gpu/context.c
index 85f1aa7667..0a46936708 100644
--- a/video/out/gpu/context.c
+++ b/video/out/gpu/context.c
@@ -50,7 +50,7 @@ extern const struct ra_ctx_fns ra_ctx_vdpauglx;
/* Vulkan */
extern const struct ra_ctx_fns ra_ctx_vulkan_wayland;
-extern const struct ra_ctx_fns ra_ctx_vulkan_win;
+//extern const struct ra_ctx_fns ra_ctx_vulkan_win;
extern const struct ra_ctx_fns ra_ctx_vulkan_xlib;
/* Direct3D 11 */
@@ -105,9 +105,11 @@ static const struct ra_ctx_fns *contexts[] = {
// Vulkan contexts:
#if HAVE_VULKAN
+/*
#if HAVE_WIN32_DESKTOP
&ra_ctx_vulkan_win,
#endif
+*/
#if HAVE_WAYLAND
&ra_ctx_vulkan_wayland,
#endif
diff --git a/video/out/opengl/hwdec_cuda.c b/video/out/opengl/hwdec_cuda.c
index fee1f83f98..2e87593c78 100644
--- a/video/out/opengl/hwdec_cuda.c
+++ b/video/out/opengl/hwdec_cuda.c
@@ -39,9 +39,11 @@
#include "ra_gl.h"
#endif
#if HAVE_VULKAN
+/*
#include "video/out/vulkan/formats.h"
#include "video/out/vulkan/ra_vk.h"
#include "video/out/vulkan/utils.h"
+*/
#endif
#if HAVE_WIN32_DESKTOP
@@ -125,6 +127,8 @@ static int cuda_init(struct ra_hwdec *hw)
#endif
#if HAVE_VULKAN
+ return -1; // TODO: reimplement
+ /*
p->is_vk = ra_vk_get(hw->ra) != NULL;
if (p->is_vk) {
if (!ra_vk_get(hw->ra)->has_ext_external_memory_export) {
@@ -133,6 +137,7 @@ static int cuda_init(struct ra_hwdec *hw)
return -1;
}
}
+ */
#endif
if (!p->is_gl && !p->is_vk) {
@@ -197,6 +202,7 @@ static int cuda_init(struct ra_hwdec *hw)
}
} else if (p->is_vk) {
#if HAVE_VULKAN
+ /*
uint8_t vk_uuid[VK_UUID_SIZE];
struct mpvk_ctx *vk = ra_vk_get(hw->ra);
@@ -236,6 +242,7 @@ static int cuda_init(struct ra_hwdec *hw)
return -1;
p->decode_ctx = p->display_ctx;
+ */
#endif
}
@@ -293,6 +300,7 @@ static void cuda_uninit(struct ra_hwdec *hw)
#define CHECK_CU(x) check_cu((mapper)->owner, (x), #x)
#if HAVE_VULKAN
+/*
static struct ra_buf *cuda_buf_pool_get(struct ra_hwdec_mapper *mapper, int n)
{
struct priv_owner *p_owner = mapper->owner->priv;
@@ -390,6 +398,7 @@ static void cuda_buf_pool_uninit(struct ra_hwdec_mapper *mapper, int n)
}
ra_buf_pool_uninit(mapper->ra, pool);
}
+*/
#endif // HAVE_VULKAN
static int mapper_init(struct ra_hwdec_mapper *mapper)
@@ -497,7 +506,7 @@ static void mapper_uninit(struct ra_hwdec_mapper *mapper)
ra_tex_free(mapper->ra, &mapper->tex[n]);
#if HAVE_VULKAN
- cuda_buf_pool_uninit(mapper, n);
+ //cuda_buf_pool_uninit(mapper, n);
#endif
}
CHECK_CU(cu->cuCtxPopCurrent(&dummy));
diff --git a/video/out/placebo/ra_pl.c b/video/out/placebo/ra_pl.c
new file mode 100644
index 0000000000..334f2f135f
--- /dev/null
+++ b/video/out/placebo/ra_pl.c
@@ -0,0 +1,628 @@
+#include "common/common.h"
+#include "common/msg.h"
+
+#include "ra_pl.h"
+#include "utils.h"
+
+struct ra_pl {
+ const struct pl_gpu *gpu;
+};
+
+static inline const struct pl_gpu *get_gpu(struct ra *ra)
+{
+ struct ra_pl *p = ra->priv;
+ return p->gpu;
+}
+
+static struct ra_fns ra_fns_pl;
+
+struct ra *ra_create_pl(const struct pl_gpu *gpu, struct mp_log *log)
+{
+ assert(gpu);
+
+ struct ra *ra = talloc_zero(NULL, struct ra);
+ ra->log = log;
+ ra->fns = &ra_fns_pl;
+
+ struct ra_pl *p = ra->priv = talloc_zero(ra, struct ra_pl);
+ p->gpu = gpu;
+
+ ra->glsl_version = gpu->glsl.version;
+ ra->glsl_vulkan = gpu->glsl.vulkan;
+ ra->glsl_es = gpu->glsl.gles;
+
+ ra->caps = RA_CAP_DIRECT_UPLOAD | RA_CAP_NESTED_ARRAY | RA_CAP_FRAGCOORD;
+
+ if (gpu->caps & PL_GPU_CAP_COMPUTE)
+ ra->caps |= RA_CAP_COMPUTE | RA_CAP_NUM_GROUPS;
+ if (gpu->caps & PL_GPU_CAP_PARALLEL_COMPUTE)
+ ra->caps |= RA_CAP_PARALLEL_COMPUTE;
+ if (gpu->caps & PL_GPU_CAP_INPUT_VARIABLES)
+ ra->caps |= RA_CAP_GLOBAL_UNIFORM;
+
+ if (gpu->limits.max_tex_1d_dim)
+ ra->caps |= RA_CAP_TEX_1D;
+ if (gpu->limits.max_tex_3d_dim)
+ ra->caps |= RA_CAP_TEX_3D;
+ if (gpu->limits.max_ubo_size)
+ ra->caps |= RA_CAP_BUF_RO;
+ if (gpu->limits.max_ssbo_size)
+ ra->caps |= RA_CAP_BUF_RW;
+ if (gpu->limits.min_gather_offset && gpu->limits.max_gather_offset)
+ ra->caps |= RA_CAP_GATHER;
+
+ // Semi-hack: assume all textures are blittable if r8 is
+ const struct pl_fmt *r8 = pl_find_named_fmt(gpu, "r8");
+ if (r8->caps & PL_FMT_CAP_BLITTABLE)
+ ra->caps |= RA_CAP_BLIT;
+
+ ra->max_texture_wh = gpu->limits.max_tex_2d_dim;
+ ra->max_shmem = gpu->limits.max_shmem_size;
+ ra->max_pushc_size = gpu->limits.max_pushc_size;
+
+ // Set up format wrappers
+ for (int i = 0; i < gpu->num_formats; i++) {
+ const struct pl_fmt *plfmt = gpu->formats[i];
+ static const enum ra_ctype fmt_type_map[PL_FMT_TYPE_COUNT] = {
+ [PL_FMT_UNORM] = RA_CTYPE_UNORM,
+ [PL_FMT_UINT] = RA_CTYPE_UINT,
+ [PL_FMT_FLOAT] = RA_CTYPE_FLOAT,
+ };
+
+ enum ra_ctype type = fmt_type_map[plfmt->type];
+ if (!type || !(plfmt->caps & PL_FMT_CAP_SAMPLEABLE))
+ continue;
+
+ struct ra_format *rafmt = talloc_zero(ra, struct ra_format);
+ *rafmt = (struct ra_format) {
+ .name = plfmt->name,
+ .priv = (void *) plfmt,
+ .ctype = type,
+ .ordered = pl_fmt_is_ordered(plfmt),
+ .num_components = plfmt->num_components,
+ .pixel_size = plfmt->texel_size,
+ .linear_filter = plfmt->caps & PL_FMT_CAP_LINEAR,
+ .renderable = plfmt->caps & PL_FMT_CAP_RENDERABLE,
+ .glsl_format = plfmt->glsl_format,
+ };
+
+ for (int c = 0; c < plfmt->num_components; c++) {
+ rafmt->component_size[c] = plfmt->host_bits[c];
+ rafmt->component_depth[c] = plfmt->component_depth[c];
+ }
+
+ MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, rafmt);
+ }
+
+ return ra;
+}
+
+static void destroy_ra_pl(struct ra *ra)
+{
+ talloc_free(ra);
+}
+
+static struct ra_format *map_fmt(struct ra *ra, const struct pl_fmt *plfmt)
+{
+ for (int i = 0; i < ra->num_formats; i++) {
+ if (ra->formats[i]->priv == plfmt)
+ return ra->formats[i];
+ }
+
+ MP_ERR(ra, "Failed mapping pl_fmt '%s' to ra_fmt?\n", plfmt->name);
+ return NULL;
+}
+
+bool mppl_wrap_tex(struct ra *ra, const struct pl_tex *pltex,
+ struct ra_tex *out_tex)
+{
+ if (!pltex)
+ return false;
+
+ *out_tex = (struct ra_tex) {
+ .params = {
+ .dimensions = pl_tex_params_dimension(pltex->params),
+ .w = pltex->params.w,
+ .h = pltex->params.h,
+ .d = pltex->params.d,
+ .format = map_fmt(ra, pltex->params.format),
+ .render_src = pltex->params.sampleable,
+ .render_dst = pltex->params.renderable,
+ .storage_dst = pltex->params.storable,
+ .blit_src = pltex->params.blit_src,
+ .blit_dst = pltex->params.blit_dst,
+ .host_mutable = pltex->params.host_writable,
+ .downloadable = pltex->params.host_readable,
+ .src_linear = pltex->params.sample_mode == PL_TEX_SAMPLE_LINEAR,
+ .src_repeat = pltex->params.address_mode == PL_TEX_ADDRESS_REPEAT,
+ },
+ .priv = (void *) pltex,
+ };
+
+ return !!out_tex->params.format;
+}
+
+static struct ra_tex *tex_create_pl(struct ra *ra,
+ const struct ra_tex_params *params)
+{
+ const struct pl_gpu *gpu = get_gpu(ra);
+
+ // Check size limits
+ bool ok = false;
+ switch (params->dimensions) {
+ case 1:
+ ok = params->w <= gpu->limits.max_tex_1d_dim;
+ break;
+
+ case 2:
+ ok = params->w <= gpu->limits.max_tex_2d_dim &&
+ params->h <= gpu->limits.max_tex_2d_dim;
+ break;
+
+ case 3:
+ ok = params->w <= gpu->limits.max_tex_2d_dim &&
+ params->h <= gpu->limits.max_tex_2d_dim &&
+ params->d <= gpu->limits.max_tex_2d_dim;
+ break;
+ };
+
+ if (!ok) {
+ MP_ERR(ra, "Texture size %dx%dx%d exceeds dimension limits!\n",
+ params->w, params->h, params->d);
+ return NULL;
+ }
+
+ const struct pl_tex *pltex = pl_tex_create(gpu, &(struct pl_tex_params) {
+ .w = params->w,
+ .h = params->dimensions >= 2 ? params->h : 0,
+ .d = params->dimensions >= 3 ? params->d : 0,
+ .format = params->format->priv,
+ .sampleable = params->render_src,
+ .renderable = params->render_dst,
+ .storable = params->storage_dst,
+ .blit_src = params->blit_src,
+ .blit_dst = params->blit_dst || params->render_dst,
+ .host_writable = params->host_mutable,
+ .host_readable = params->downloadable,
+ .sample_mode = params->src_linear ? PL_TEX_SAMPLE_LINEAR
+ : PL_TEX_SAMPLE_NEAREST,
+ .address_mode = params->src_repeat ? PL_TEX_ADDRESS_REPEAT
+ : PL_TEX_ADDRESS_CLAMP,
+ .initial_data = params->initial_data,
+ });
+
+ struct ra_tex *ratex = talloc_ptrtype(NULL, ratex);
+ if (!mppl_wrap_tex(ra, pltex, ratex)) {
+ pl_tex_destroy(gpu, &pltex);
+ talloc_free(ratex);
+ return NULL;
+ }
+
+ return ratex;
+}
+
+static void tex_destroy_pl(struct ra *ra, struct ra_tex *tex)
+{
+ if (!tex)
+ return;
+
+ pl_tex_destroy(get_gpu(ra), (const struct pl_tex **) &tex->priv);
+ talloc_free(tex);
+}
+
+static int texel_stride_w(size_t stride, const struct pl_tex *tex)
+{
+ size_t texel_size = tex->params.format->texel_size;
+ int texels = stride / texel_size;
+ assert(texels * texel_size == stride);
+ return texels;
+}
+
+static bool tex_upload_pl(struct ra *ra, const struct ra_tex_upload_params *params)
+{
+ const struct pl_tex *tex = params->tex->priv;
+ struct pl_tex_transfer_params pl_params = {
+ .tex = tex,
+ .buf = params->buf ? params->buf->priv : NULL,
+ .buf_offset = params->buf_offset,
+ .ptr = (void *) params->src,
+ };
+
+ if (params->tex->params.dimensions == 2) {
+ pl_params.stride_w = texel_stride_w(params->stride, tex);
+ if (params->rc) {
+ pl_params.rc = (struct pl_rect3d) {
+ .x0 = params->rc->x0, .x1 = params->rc->x1,
+ .y0 = params->rc->y0, .y1 = params->rc->y1,
+ };
+ }
+ }
+
+ return pl_tex_upload(get_gpu(ra), &pl_params);
+}
+
+static bool tex_download_pl(struct ra *ra, struct ra_tex_download_params *params)
+{
+ const struct pl_tex *tex = params->tex->priv;
+ struct pl_tex_transfer_params pl_params = {
+ .tex = tex,
+ .ptr = params->dst,
+ .stride_w = texel_stride_w(params->stride, tex),
+ };
+
+ return pl_tex_download(get_gpu(ra), &pl_params);
+}
+
+static struct ra_buf *buf_create_pl(struct ra *ra,
+ const struct ra_buf_params *params)
+{
+ static const enum pl_buf_type buf_type[] = {
+ [RA_BUF_TYPE_TEX_UPLOAD] = PL_BUF_TEX_TRANSFER,
+ [RA_BUF_TYPE_SHADER_STORAGE] = PL_BUF_STORAGE,
+ [RA_BUF_TYPE_UNIFORM] = PL_BUF_UNIFORM,
+ [RA_BUF_TYPE_SHARED_MEMORY] = 0,
+ };
+
+ const struct pl_gpu *gpu = get_gpu(ra);
+ size_t max_size[] = {
+ [PL_BUF_TEX_TRANSFER] = gpu->limits.max_xfer_size,
+ [PL_BUF_UNIFORM] = gpu->limits.max_ubo_size,
+ [PL_BUF_STORAGE] = gpu->limits.max_ssbo_size,
+ };
+
+ if (params->size > max_size[buf_type[params->type]]) {
+ MP_ERR(ra, "Buffer size %zu exceeds size limits!\n", params->size);
+ return NULL;
+ }
+
+ const struct pl_buf *plbuf = pl_buf_create(gpu, &(struct pl_buf_params) {
+ .type = buf_type[params->type],
+ .size = params->size,
+ .host_mapped = params->host_mapped,
+ .host_writable = params->host_mutable,
+ .initial_data = params->initial_data,
+ });
+
+ if (!plbuf)
+ return NULL;
+
+ struct ra_buf *rabuf = talloc_ptrtype(NULL, rabuf);
+ *rabuf = (struct ra_buf) {
+ .params = *params,
+ .data = plbuf->data,
+ .priv = (void *) plbuf,
+ };
+
+ rabuf->params.initial_data = NULL;
+ return rabuf;
+}
+
+static void buf_destroy_pl(struct ra *ra, struct ra_buf *buf)
+{
+ if (!buf)
+ return;
+
+ pl_buf_destroy(get_gpu(ra), (const struct pl_buf **) &buf->priv);
+ talloc_free(buf);
+}
+
+static void buf_update_pl(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset,
+ const void *data, size_t size)
+{
+ pl_buf_write(get_gpu(ra), buf->priv, offset, data, size);
+}
+
+static bool buf_poll_pl(struct ra *ra, struct ra_buf *buf)
+{
+ return !pl_buf_poll(get_gpu(ra), buf->priv, 0);
+}
+
+static void clear_pl(struct ra *ra, struct ra_tex *dst, float color[4],
+ struct mp_rect *scissor)
+{
+ // TODO: implement scissor clearing by bltting a 1x1 tex instead
+ pl_tex_clear(get_gpu(ra), dst->priv, color);
+}
+
+static void blit_pl(struct ra *ra, struct ra_tex *dst, struct ra_tex *src,
+ struct mp_rect *dst_rc, struct mp_rect *src_rc)
+{
+ struct pl_rect3d plsrc = {0}, pldst = {0};
+ if (src_rc) {
+ plsrc.x0 = MPMIN(MPMAX(src_rc->x0, 0), src->params.w);
+ plsrc.y0 = MPMIN(MPMAX(src_rc->y0, 0), src->params.h);
+ plsrc.x1 = MPMIN(MPMAX(src_rc->x1, 0), src->params.w);
+ plsrc.y1 = MPMIN(MPMAX(src_rc->y1, 0), src->params.h);
+ }
+
+ if (dst_rc) {
+ pldst.x0 = MPMIN(MPMAX(dst_rc->x0, 0), dst->params.w);
+ pldst.y0 = MPMIN(MPMAX(dst_rc->y0, 0), dst->params.h);
+ pldst.x1 = MPMIN(MPMAX(dst_rc->x1, 0), dst->params.w);
+ pldst.y1 = MPMIN(MPMAX(dst_rc->y1, 0), dst->params.h);
+ }
+
+ pl_tex_blit(get_gpu(ra), dst->priv, src->priv, pldst, plsrc);
+}
+
+static const enum pl_var_type var_type[RA_VARTYPE_COUNT] = {
+ [RA_VARTYPE_INT] = PL_VAR_SINT,
+ [RA_VARTYPE_FLOAT] = PL_VAR_FLOAT,
+};
+
+static const enum pl_desc_type desc_type[RA_VARTYPE_COUNT] = {
+ [RA_VARTYPE_TEX] = PL_DESC_SAMPLED_TEX,
+ [RA_VARTYPE_IMG_W] = PL_DESC_STORAGE_IMG,
+ [RA_VARTYPE_BUF_RO] = PL_DESC_BUF_UNIFORM,
+ [RA_VARTYPE_BUF_RW] = PL_DESC_BUF_STORAGE,
+};
+
+static const enum pl_fmt_type fmt_type[RA_VARTYPE_COUNT] = {
+ [RA_VARTYPE_INT] = PL_FMT_SINT,
+ [RA_VARTYPE_FLOAT] = PL_FMT_FLOAT,
+ [RA_VARTYPE_BYTE_UNORM] = PL_FMT_UNORM,
+};
+
+static const size_t var_size[RA_VARTYPE_COUNT] = {
+ [RA_VARTYPE_INT] = sizeof(int),
+ [RA_VARTYPE_FLOAT] = sizeof(float),
+ [RA_VARTYPE_BYTE_UNORM] = sizeof(uint8_t),
+};
+
+static struct ra_layout uniform_layout_pl(struct ra_renderpass_input *inp)
+{
+ // To get the alignment requirements, we try laying this out with
+ // an offset of 1 and then see where it ends up. This will always be
+ // the minimum alignment requirement.
+ struct pl_var_layout layout = pl_buf_uniform_layout(1, &(struct pl_var) {
+ .name = inp->name,
+ .type = var_type[inp->type],
+ .dim_v = inp->dim_v,
+ .dim_m = inp->dim_m,
+ .dim_a = 1,
+ });
+
+ return (struct ra_layout) {
+ .align = layout.offset,
+ .stride = layout.stride,
+ .size = layout.size,
+ };
+}
+
+static struct ra_layout push_constant_layout_pl(struct ra_renderpass_input *inp)
+{
+ struct pl_var_layout layout = pl_push_constant_layout(1, &(struct pl_var) {
+ .name = inp->name,
+ .type = var_type[inp->type],
+ .dim_v = inp->dim_v,
+ .dim_m = inp->dim_m,
+ .dim_a = 1,
+ });
+
+ return (struct ra_layout) {
+ .align = layout.offset,
+ .stride = layout.stride,
+ .size = layout.size,
+ };
+}
+
+static int desc_namespace_pl(struct ra *ra, enum ra_vartype type)
+{
+ return pl_desc_namespace(get_gpu(ra), desc_type[type]);
+}
+
+struct pass_priv {
+ const struct pl_pass *pl_pass;
+ uint16_t *inp_index; // index translation map
+ // Space to hold the descriptor bindings and variable updates
+ struct pl_desc_binding *binds;
+ struct pl_var_update *varups;
+ int num_varups;
+};
+
+static struct ra_renderpass *renderpass_create_pl(struct ra *ra,
+ const struct ra_renderpass_params *params)
+{
+ void *tmp = talloc_new(NULL);
+ const struct pl_gpu *gpu = get_gpu(ra);
+ struct ra_renderpass *pass = NULL;
+
+ static const enum pl_pass_type pass_type[] = {
+ [RA_RENDERPASS_TYPE_RASTER] = PL_PASS_RASTER,
+ [RA_RENDERPASS_TYPE_COMPUTE] = PL_PASS_COMPUTE,
+ };
+
+ struct pl_var *vars = NULL;
+ struct pl_desc *descs = NULL;
+ int num_vars = 0, num_descs = 0;
+
+ struct pass_priv *priv = talloc_ptrtype(tmp, priv);
+ priv->inp_index = talloc_zero_array(priv, uint16_t, params->num_inputs);
+
+ for (int i = 0; i < params->num_inputs; i++) {
+ const struct ra_renderpass_input *inp = &params->inputs[i];
+ if (var_type[inp->type]) {
+ priv->inp_index[i] = num_vars;
+ MP_TARRAY_APPEND(tmp, vars, num_vars, (struct pl_var) {
+ .name = inp->name,
+ .type = var_type[inp->type],
+ .dim_v = inp->dim_v,
+ .dim_m = inp->dim_m,
+ .dim_a = 1,
+ });
+ } else if (desc_type[inp->type]) {
+ priv->inp_index[i] = num_descs;
+ MP_TARRAY_APPEND(tmp, descs, num_descs, (struct pl_desc) {
+ .name = inp->name,
+ .type = desc_type[inp->type],
+ .binding = inp->binding,
+ .access = inp->type == RA_VARTYPE_IMG_W ? PL_DESC_ACCESS_WRITEONLY
+ : inp->type == RA_VARTYPE_BUF_RW ? PL_DESC_ACCESS_READWRITE
+ : PL_DESC_ACCESS_READONLY,
+ });
+ }
+ }
+
+ // Allocate space to store the bindings map persistently
+ priv->binds = talloc_zero_array(priv, struct pl_desc_binding, num_descs);
+
+ struct pl_pass_params pl_params = {
+ .type = pass_type[params->type],
+ .variables = vars,
+ .num_variables = num_vars,
+ .descriptors = descs,
+ .num_descriptors = num_descs,
+ .push_constants_size = params->push_constants_size,
+ .glsl_shader = params->type == RA_RENDERPASS_TYPE_COMPUTE
+ ? params->compute_shader
+ : params->frag_shader,
+ .cached_program = params->cached_program.start,
+ .cached_program_len = params->cached_program.len,
+ };
+
+ struct pl_blend_params blend_params;
+
+ if (params->type == RA_RENDERPASS_TYPE_RASTER) {
+ pl_params.vertex_shader = params->vertex_shader;
+ pl_params.vertex_type = PL_PRIM_TRIANGLE_LIST;
+ pl_params.vertex_stride = params->vertex_stride;
+ pl_params.target_dummy.params.format = params->target_format->priv;
+ pl_params.load_target = !params->invalidate_target;
+
+ if (params->enable_blend) {
+ pl_params.blend_params = &blend_params;
+ blend_params = (struct pl_blend_params) {
+ // Same enum order as ra_blend
+ .src_rgb = (enum ra_blend) params->blend_src_rgb,
+ .dst_rgb = (enum ra_blend) params->blend_dst_rgb,
+ .src_alpha = (enum ra_blend) params->blend_src_alpha,
+ .dst_alpha = (enum ra_blend) params->blend_dst_alpha,
+ };
+ }
+
+ for (int i = 0; i < params->num_vertex_attribs; i++) {
+ const struct ra_renderpass_input *inp = &params->vertex_attribs[i];
+ struct pl_vertex_attrib attrib = {
+ .name = inp->name,
+ .offset = inp->offset,
+ .location = i,
+ .fmt = pl_find_fmt(gpu, fmt_type[inp->type], inp->dim_v, 0,
+ var_size[inp->type] * 8, PL_FMT_CAP_VERTEX),
+ };
+
+ if (!attrib.fmt) {
+ MP_ERR(ra, "Failed mapping vertex attrib '%s' to pl_fmt?\n",
+ inp->name);
+ goto error;
+ }
+
+ MP_TARRAY_APPEND(tmp, pl_params.vertex_attribs,
+ pl_params.num_vertex_attribs, attrib);
+ }
+ }
+
+ priv->pl_pass = pl_pass_create(gpu, &pl_params);
+ if (!priv->pl_pass)
+ goto error;
+
+ pass = talloc_ptrtype(NULL, pass);
+ *pass = (struct ra_renderpass) {
+ .params = *ra_renderpass_params_copy(pass, params),
+ .priv = talloc_steal(pass, priv),
+ };
+
+ pass->params.cached_program = (struct bstr) {
+ .start = (void *) priv->pl_pass->params.cached_program,
+ .len = priv->pl_pass->params.cached_program_len,
+ };
+
+ // fall through
+error:
+ talloc_free(tmp);
+ return pass;
+}
+
+static void renderpass_destroy_pl(struct ra *ra, struct ra_renderpass *pass)
+{
+ if (!pass)
+ return;
+
+ struct pass_priv *priv = pass->priv;
+ pl_pass_destroy(get_gpu(ra), (const struct pl_pass **) &priv->pl_pass);
+ talloc_free(pass);
+}
+
+static void renderpass_run_pl(struct ra *ra,
+ const struct ra_renderpass_run_params *params)
+{
+ struct pass_priv *p = params->pass->priv;
+ p->num_varups = 0;
+
+ for (int i = 0; i < params->num_values; i++) {
+ const struct ra_renderpass_input_val *val = &params->values[i];
+ const struct ra_renderpass_input *inp = &params->pass->params.inputs[i];
+ if (var_type[inp->type]) {
+ MP_TARRAY_APPEND(p, p->varups, p->num_varups, (struct pl_var_update) {
+ .index = p->inp_index[val->index],
+ .data = val->data,
+ });
+ } else {
+ struct pl_desc_binding bind;
+ switch (inp->type) {
+ case RA_VARTYPE_TEX:
+ case RA_VARTYPE_IMG_W:
+ bind.object = (* (struct ra_tex **) val->data)->priv;
+ break;
+ case RA_VARTYPE_BUF_RO:
+ case RA_VARTYPE_BUF_RW:
+ bind.object = (* (struct ra_buf **) val->data)->priv;
+ break;
+ default: abort();
+ };
+
+ p->binds[p->inp_index[val->index]] = bind;
+ };
+ }
+
+ struct pl_pass_run_params pl_params = {
+ .pass = p->pl_pass,
+ .var_updates = p->varups,
+ .num_var_updates = p->num_varups,
+ .desc_bindings = p->binds,
+ .push_constants = params->push_constants,
+ };
+
+ if (p->pl_pass->params.type == PL_PASS_RASTER) {
+ pl_params.target = params->target->priv;
+ pl_params.viewport = mp_rect2d_to_pl(params->viewport);
+ pl_params.scissors = mp_rect2d_to_pl(params->scissors);
+ pl_params.vertex_data = params->vertex_data;
+ pl_params.vertex_count = params->vertex_count;
+ } else {
+ for (int i = 0; i < MP_ARRAY_SIZE(pl_params.compute_groups); i++)
+ pl_params.compute_groups[i] = params->compute_groups[i];
+ }
+
+ pl_pass_run(get_gpu(ra), &pl_params);
+}
+
+static struct ra_fns ra_fns_pl = {
+ .destroy = destroy_ra_pl,
+ .tex_create = tex_create_pl,
+ .tex_destroy = tex_destroy_pl,
+ .tex_upload = tex_upload_pl,
+ .tex_download = tex_download_pl,
+ .buf_create = buf_create_pl,
+ .buf_destroy = buf_destroy_pl,
+ .buf_update = buf_update_pl,
+ .buf_poll = buf_poll_pl,
+ .clear = clear_pl,
+ .blit = blit_pl,
+ .uniform_layout = uniform_layout_pl,
+ .push_constant_layout = push_constant_layout_pl,
+ .desc_namespace = desc_namespace_pl,
+ .renderpass_create = renderpass_create_pl,
+ .renderpass_destroy = renderpass_destroy_pl,
+ .renderpass_run = renderpass_run_pl,
+};
+
diff --git a/video/out/placebo/ra_pl.h b/video/out/placebo/ra_pl.h
new file mode 100644
index 0000000000..a342892963
--- /dev/null
+++ b/video/out/placebo/ra_pl.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include "video/out/gpu/ra.h"
+#include <libplacebo/gpu.h>
+
+struct ra *ra_create_pl(const struct pl_gpu *gpu, struct mp_log *log);
+
+// Wrap a pl_tex into a ra_tex struct, returns if successful
+bool mppl_wrap_tex(struct ra *ra, const struct pl_tex *pltex,
+ struct ra_tex *out_tex);
diff --git a/video/out/placebo/utils.c b/video/out/placebo/utils.c
new file mode 100644
index 0000000000..79f313872c
--- /dev/null
+++ b/video/out/placebo/utils.c
@@ -0,0 +1,62 @@
+#include "common/common.h"
+#include "utils.h"
+
+static const int pl_log_to_msg_lev[PL_LOG_ALL+1] = {
+ [PL_LOG_FATAL] = MSGL_FATAL,
+ [PL_LOG_ERR] = MSGL_ERR,
+ [PL_LOG_WARN] = MSGL_WARN,
+ [PL_LOG_INFO] = MSGL_V,
+ [PL_LOG_DEBUG] = MSGL_DEBUG,
+ [PL_LOG_TRACE] = MSGL_TRACE,
+};
+
+static const enum pl_log_level msg_lev_to_pl_log[MSGL_MAX+1] = {
+ [MSGL_FATAL] = PL_LOG_FATAL,
+ [MSGL_ERR] = PL_LOG_ERR,
+ [MSGL_WARN] = PL_LOG_WARN,
+ [MSGL_INFO] = PL_LOG_WARN,
+ [MSGL_STATUS] = PL_LOG_WARN,
+ [MSGL_V] = PL_LOG_INFO,
+ [MSGL_DEBUG] = PL_LOG_DEBUG,
+ [MSGL_TRACE] = PL_LOG_TRACE,
+ [MSGL_MAX] = PL_LOG_ALL,
+};
+
+// translates log levels while probing
+static const enum pl_log_level probing_map(enum pl_log_level level)
+{
+ switch (level) {
+ case PL_LOG_FATAL:
+ return PL_LOG_ERR;
+
+ case PL_LOG_ERR:
+ case PL_LOG_WARN:
+ return PL_LOG_INFO;
+
+ default:
+ return level;
+ }
+}
+
+static void log_cb(void *priv, enum pl_log_level level, const char *msg)
+{
+ struct mp_log *log = priv;
+ mp_msg(log, pl_log_to_msg_lev[level], "%s\n", msg);
+}
+
+static void log_cb_probing(void *priv, enum pl_log_level level, const char *msg)
+{
+ struct mp_log *log = priv;
+ mp_msg(log, pl_log_to_msg_lev[probing_map(level)], "%s\n", msg);
+}
+
+void mppl_ctx_set_log(struct pl_context *ctx, struct mp_log *log, bool probing)
+{
+ assert(log);
+
+ pl_context_update(ctx, &(struct pl_context_params) {
+ .log_cb = probing ? log_cb_probing : log_cb,
+ .log_level = msg_lev_to_pl_log[mp_msg_level(log)],
+ .log_priv = log,
+ });
+}
diff --git a/video/out/placebo/utils.h b/video/out/placebo/utils.h
new file mode 100644
index 0000000000..03bcb0f252
--- /dev/null
+++ b/video/out/placebo/utils.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include "common/common.h"
+#include "common/msg.h"
+
+#include <libplacebo/common.h>
+
+void mppl_ctx_set_log(struct pl_context *ctx, struct mp_log *log, bool probing);
+
+static inline struct pl_rect2d mp_rect2d_to_pl(struct mp_rect rc)
+{
+ return (struct pl_rect2d) {
+ .x0 = rc.x0,
+ .y0 = rc.y0,
+ .x1 = rc.x1,
+ .y1 = rc.y1,
+ };
+}
diff --git a/video/out/vulkan/common.h b/video/out/vulkan/common.h
index a4284f9055..b085fb462c 100644
--- a/video/out/vulkan/common.h
+++ b/video/out/vulkan/common.h
@@ -23,58 +23,14 @@
#define VK_USE_PLATFORM_WIN32_KHR
#endif
-#include <vulkan/vulkan.h>
-
-// Vulkan allows the optional use of a custom allocator. We don't need one but
-// mark this parameter with a better name in case we ever decide to change this
-// in the future. (And to make the code more readable)
-#define MPVK_ALLOCATOR NULL
-
-// A lot of things depend on streaming resources across frames. Depending on
-// how many frames we render ahead of time, we need to pick enough to avoid
-// any conflicts, so make all of these tunable relative to this constant in
-// order to centralize them.
-#define MPVK_MAX_STREAMING_DEPTH 8
+#include <libplacebo/vulkan.h>
// Shared struct used to hold vulkan context information
struct mpvk_ctx {
- struct mp_log *log;
- VkInstance inst;
- VkPhysicalDevice physd;
- VkDebugReportCallbackEXT dbg;
- VkDevice dev;
-
- // Surface, must be initialized fter the context itself
- VkSurfaceKHR surf;
- VkSurfaceFormatKHR surf_format; // picked at surface initialization time
-
- struct vk_malloc *alloc; // memory allocator for this device
- struct spirv_compiler *spirv; // GLSL -> SPIR-V compiler
- struct vk_cmdpool **pools; // command pools (one per queue family)
- int num_pools;
- struct vk_cmd *last_cmd; // most recently submitted command
-
- // Queued/pending commands. These are shared for the entire mpvk_ctx to
- // ensure submission and callbacks are FIFO
- struct vk_cmd **cmds_queued; // recorded but not yet submitted
- struct vk_cmd **cmds_pending; // submitted but not completed
- int num_cmds_queued;
- int num_cmds_pending;
-
- // Pointers into *pools
- struct vk_cmdpool *pool_graphics; // required
- struct vk_cmdpool *pool_compute; // optional
- struct vk_cmdpool *pool_transfer; // optional
-
- // Common pool of signals, to avoid having to re-create these objects often
- struct vk_signal **signals;
- int num_signals;
-
- // Cached capabilities
- VkPhysicalDeviceLimits limits;
- VkPhysicalDeviceFeatures features;
-
- // Extension availability
- bool has_ext_external_memory;
- bool has_ext_external_memory_export;
+ struct mp_log *pl_log;
+ struct pl_context *ctx;
+ const struct pl_vk_inst *vkinst;
+ const struct pl_vulkan *vulkan;
+ const struct pl_gpu *gpu; // points to vulkan->gpu for convenience
+ VkSurfaceKHR surface;
};
diff --git a/video/out/vulkan/context.c b/video/out/vulkan/context.c
index 29a2c9b727..c05a5ac209 100644
--- a/video/out/vulkan/context.c
+++ b/video/out/vulkan/context.c
@@ -16,25 +16,17 @@
*/
#include "options/m_config.h"
-#include "video/out/gpu/spirv.h"
+#include "video/out/placebo/ra_pl.h"
#include "context.h"
-#include "ra_vk.h"
#include "utils.h"
-enum {
- SWAP_AUTO = 0,
- SWAP_FIFO,
- SWAP_FIFO_RELAXED,
- SWAP_MAILBOX,
- SWAP_IMMEDIATE,
- SWAP_COUNT,
-};
-
struct vulkan_opts {
- struct mpvk_device_opts dev_opts; // logical device options
char *device; // force a specific GPU
int swap_mode;
+ int queue_count;
+ int async_transfer;
+ int async_compute;
};
static int vk_validate_dev(struct mp_log *log, const struct m_option *opt,
@@ -52,7 +44,7 @@ static int vk_validate_dev(struct mp_log *log, const struct m_option *opt,
VkPhysicalDevice *devices = NULL;
uint32_t num = 0;
- res = vkCreateInstance(&info, MPVK_ALLOCATOR, &inst);
+ res = vkCreateInstance(&info, NULL, &inst);
if (res != VK_SUCCESS)
goto done;
@@ -97,45 +89,30 @@ const struct m_sub_options vulkan_conf = {
.opts = (const struct m_option[]) {
OPT_STRING_VALIDATE("vulkan-device", device, 0, vk_validate_dev),
OPT_CHOICE("vulkan-swap-mode", swap_mode, 0,
- ({"auto", SWAP_AUTO},
- {"fifo", SWAP_FIFO},
- {"fifo-relaxed", SWAP_FIFO_RELAXED},
- {"mailbox", SWAP_MAILBOX},
- {"immediate", SWAP_IMMEDIATE})),
- OPT_INTRANGE("vulkan-queue-count", dev_opts.queue_count, 0, 1, 8,
- OPTDEF_INT(1)),
- OPT_FLAG("vulkan-async-transfer", dev_opts.async_transfer, 0),
- OPT_FLAG("vulkan-async-compute", dev_opts.async_compute, 0),
+ ({"auto", -1},
+ {"fifo", VK_PRESENT_MODE_FIFO_KHR},
+ {"fifo-relaxed", VK_PRESENT_MODE_FIFO_RELAXED_KHR},
+ {"mailbox", VK_PRESENT_MODE_MAILBOX_KHR},
+ {"immediate", VK_PRESENT_MODE_IMMEDIATE_KHR})),
+ OPT_INTRANGE("vulkan-queue-count", queue_count, 0, 1, 8),
+ OPT_FLAG("vulkan-async-transfer", async_transfer, 0),
+ OPT_FLAG("vulkan-async-compute", async_compute, 0),
{0}
},
.size = sizeof(struct vulkan_opts),
.defaults = &(struct vulkan_opts) {
- .dev_opts = {
- .async_transfer = 1,
- },
+ .swap_mode = -1,
+ .queue_count = 1,
+ .async_transfer = true,
+ .async_compute = true,
},
};
struct priv {
struct mpvk_ctx *vk;
struct vulkan_opts *opts;
- // Swapchain metadata:
- int w, h; // current size
- VkSwapchainCreateInfoKHR protoInfo; // partially filled-in prototype
- VkSwapchainKHR swapchain;
- VkSwapchainKHR old_swapchain;
- int frames_in_flight;
- // state of the images:
- struct ra_tex **images; // ra_tex wrappers for the vkimages
- int num_images; // size of images
- VkSemaphore *sems_in; // pool of semaphores used to synchronize images
- VkSemaphore *sems_out; // outgoing semaphores (rendering complete)
- int num_sems;
- int idx_sems; // index of next free semaphore pair
- int last_imgidx; // the image index last acquired (for submit)
-
- // This is used to pre-fetch the next frame at the end of swap_buffers
- struct ra_fbo queued_fbo;
+ const struct pl_swapchain *swapchain;
+ struct ra_tex proxy_tex;
};
static const struct ra_swapchain_fns vulkan_swapchain;
@@ -149,133 +126,26 @@ struct mpvk_ctx *ra_vk_ctx_get(struct ra_ctx *ctx)
return p->vk;
}
-static bool update_swapchain_info(struct priv *p,
- VkSwapchainCreateInfoKHR *info)
-{
- struct mpvk_ctx *vk = p->vk;
-
- // Query the supported capabilities and update this struct as needed
- VkSurfaceCapabilitiesKHR caps;
- VK(vkGetPhysicalDeviceSurfaceCapabilitiesKHR(vk->physd, vk->surf, &caps));
-
- // Sorted by preference
- static const VkCompositeAlphaFlagsKHR alphaModes[] = {
- VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR,
- VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR,
- };
-
- for (int i = 0; i < MP_ARRAY_SIZE(alphaModes); i++) {
- if (caps.supportedCompositeAlpha & alphaModes[i]) {
- info->compositeAlpha = alphaModes[i];
- break;
- }
- }
-
- if (!info->compositeAlpha) {
- MP_ERR(vk, "Failed picking alpha compositing mode (caps: 0x%x)\n",
- caps.supportedCompositeAlpha);
- goto error;
- }
-
- static const VkSurfaceTransformFlagsKHR rotModes[] = {
- VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR,
- VK_SURFACE_TRANSFORM_INHERIT_BIT_KHR,
- };
-
- for (int i = 0; i < MP_ARRAY_SIZE(rotModes); i++) {
- if (caps.supportedTransforms & rotModes[i]) {
- info->preTransform = rotModes[i];
- break;
- }
- }
-
- if (!info->preTransform) {
- MP_ERR(vk, "Failed picking surface transform mode (caps: 0x%x)\n",
- caps.supportedTransforms);
- goto error;
- }
-
- // Image count as required
- MP_VERBOSE(vk, "Requested image count: %d (min %d max %d)\n",
- (int)info->minImageCount, (int)caps.minImageCount,
- (int)caps.maxImageCount);
-
- info->minImageCount = MPMAX(info->minImageCount, caps.minImageCount);
- if (caps.maxImageCount)
- info->minImageCount = MPMIN(info->minImageCount, caps.maxImageCount);
-
- // Check the extent against the allowed parameters
- if (caps.currentExtent.width != info->imageExtent.width &&
- caps.currentExtent.width != 0xFFFFFFFF)
- {
- MP_WARN(vk, "Requested width %d does not match current width %d\n",
- (int)info->imageExtent.width, (int)caps.currentExtent.width);
- info->imageExtent.width = caps.currentExtent.width;
- }
-
- if (caps.currentExtent.height != info->imageExtent.height &&
- caps.currentExtent.height != 0xFFFFFFFF)
- {
- MP_WARN(vk, "Requested height %d does not match current height %d\n",
- (int)info->imageExtent.height, (int)caps.currentExtent.height);
- info->imageExtent.height = caps.currentExtent.height;
- }
-
- if (caps.minImageExtent.width > info->imageExtent.width ||
- caps.minImageExtent.height > info->imageExtent.height)
- {
- MP_ERR(vk, "Requested size %dx%d smaller than device minimum %d%d\n",
- (int)info->imageExtent.width, (int)info->imageExtent.height,
- (int)caps.minImageExtent.width, (int)caps.minImageExtent.height);
- goto error;
- }
-
- if (caps.maxImageExtent.width < info->imageExtent.width ||
- caps.maxImageExtent.height < info->imageExtent.height)
- {
- MP_ERR(vk, "Requested size %dx%d larger than device maximum %d%d\n",
- (int)info->imageExtent.width, (int)info->imageExtent.height,
- (int)caps.maxImageExtent.width, (int)caps.maxImageExtent.height);
- goto error;
- }
-
- // We just request whatever usage we can, and let the ra_vk decide what
- // ra_tex_params that translates to. This makes the images as flexible
- // as possible.
- info->imageUsage = caps.supportedUsageFlags;
- return true;
-
-error:
- return false;
-}
-
void ra_vk_ctx_uninit(struct ra_ctx *ctx)
{
- if (ctx->ra) {
- struct priv *p = ctx->swapchain->priv;
- struct mpvk_ctx *vk = p->vk;
-
- mpvk_flush_commands(vk);
- mpvk_poll_commands(vk, UINT64_MAX);
+ if (!ctx->swapchain)
+ return;
- for (int i = 0; i < p->num_images; i++)
- ra_tex_free(ctx->ra, &p->images[i]);
- for (int i = 0; i < p->num_sems; i++) {
- vkDestroySemaphore(vk->dev, p->sems_in[i], MPVK_ALLOCATOR);
- vkDestroySemaphore(vk->dev, p->sems_out[i], MPVK_ALLOCATOR);
- }
+ struct priv *p = ctx->swapchain->priv;
+ struct mpvk_ctx *vk = p->vk;
- vkDestroySwapchainKHR(vk->dev, p->swapchain, MPVK_ALLOCATOR);
+ if (ctx->ra) {
+ pl_gpu_finish(vk->gpu);
+ pl_swapchain_destroy(&p->swapchain);
ctx->ra->fns->destroy(ctx->ra);
ctx->ra = NULL;
}
- talloc_free(ctx->swapchain);
- ctx->swapchain = NULL;
+ vk->gpu = NULL;
+ pl_vulkan_destroy(&vk->vulkan);
+ TA_FREEP(&ctx->swapchain);
}
-static const struct ra_swapchain_fns vulkan_swapchain;
-
bool ra_vk_ctx_init(struct ra_ctx *ctx, struct mpvk_ctx *vk,
VkPresentModeKHR preferred_mode)
{
@@ -287,56 +157,36 @@ bool ra_vk_ctx_init(struct ra_ctx *ctx, struct mpvk_ctx *vk,
p->vk = vk;
p->opts = mp_get_config_group(p, ctx->global, &vulkan_conf);
- if (!mpvk_find_phys_device(vk, p->opts->device, ctx->opts.allow_sw))
- goto error;
- if (!spirv_compiler_init(ctx))
- goto error;
- vk->spirv = ctx->spirv;
- if (!mpvk_pick_surface_format(vk))
- goto error;
- if (!mpvk_device_init(vk, p->opts->dev_opts))
+ assert(vk->ctx);
+ assert(vk->vkinst);
+ vk->vulkan = pl_vulkan_create(vk->ctx, &(struct pl_vulkan_params) {
+ .instance = vk->vkinst->instance,
+ .surface = vk->surface,
+ .async_transfer = p->opts->async_transfer,
+ .async_compute = p->opts->async_compute,
+ .queue_count = p->opts->queue_count,
+ });
+ if (!vk->vulkan)
goto error;
- ctx->ra = ra_create_vk(vk, ctx->log);
+ vk->gpu = vk->vulkan->gpu;
+ ctx->ra = ra_create_pl(vk->gpu, ctx->log);
if (!ctx->ra)
goto error;
- static const VkPresentModeKHR present_modes[SWAP_COUNT] = {
- [SWAP_FIFO] = VK_PRESENT_MODE_FIFO_KHR,
- [SWAP_FIFO_RELAXED] = VK_PRESENT_MODE_FIFO_RELAXED_KHR,
- [SWAP_MAILBOX] = VK_PRESENT_MODE_MAILBOX_KHR,
- [SWAP_IMMEDIATE] = VK_PRESENT_MODE_IMMEDIATE_KHR,
+ // Create the swapchain
+ struct pl_vulkan_swapchain_params params = {
+ .surface = vk->surface,
+ .present_mode = preferred_mode,
+ .swapchain_depth = ctx->opts.swapchain_depth,
};
- p->protoInfo = (VkSwapchainCreateInfoKHR) {
- .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
- .surface = vk->surf,
- .imageFormat = vk->surf_format.format,
- .imageColorSpace = vk->surf_format.colorSpace,
- .imageArrayLayers = 1, // non-stereoscopic
- .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE,
- .minImageCount = ctx->opts.swapchain_depth + 1, // +1 for FB
- .presentMode = p->opts->swap_mode ? present_modes[p->opts->swap_mode]
- : preferred_mode,
- .clipped = true,
- };
+ if (p->opts->swap_mode >= 0) // user override
+ params.present_mode = p->opts->swap_mode;
- // Make sure the swapchain present mode is supported
- int num_modes;
- VK(vkGetPhysicalDeviceSurfacePresentModesKHR(vk->physd, vk->surf,
- &num_modes, NULL));
- VkPresentModeKHR *modes = talloc_array(NULL, VkPresentModeKHR, num_modes);
- VK(vkGetPhysicalDeviceSurfacePresentModesKHR(vk->physd, vk->surf,
- &num_modes, modes));
- bool supported = false;
- for (int i = 0; i < num_modes; i++)
- supported |= (modes[i] == p->protoInfo.presentMode);
- talloc_free(modes);
-
- if (!supported) {
- MP_ERR(ctx, "Requested swap mode unsupported by this device!\n");
+ p->swapchain = pl_vulkan_create_swapchain(vk->vulkan, &params);
+ if (!p->swapchain)
goto error;
- }
return true;
@@ -345,245 +195,49 @@ error:
return false;
}
-static void destroy_swapchain(struct mpvk_ctx *vk, struct priv *p)
+bool ra_vk_ctx_resize(struct ra_ctx *ctx, int width, int height)
{
- assert(p->old_swapchain);
- vkDestroySwapchainKHR(vk->dev, p->old_swapchain, MPVK_ALLOCATOR);
- p->old_swapchain = NULL;
-}
-
-bool ra_vk_ctx_resize(struct ra_swapchain *sw, int w, int h)
-{
- struct priv *p = sw->priv;
- if (w == p->w && h == p->h)
- return true;
-
- struct ra *ra = sw->ctx->ra;
- struct mpvk_ctx *vk = p->vk;
- VkImage *vkimages = NULL;
-
- // It's invalid to trigger another swapchain recreation while there's
- // more than one swapchain already active, so we need to flush any pending
- // asynchronous swapchain release operations that may be ongoing.
- while (p->old_swapchain)
- mpvk_poll_commands(vk, 100000); // 100μs
-
- VkSwapchainCreateInfoKHR sinfo = p->protoInfo;
- sinfo.imageExtent = (VkExtent2D){ w, h };
- sinfo.oldSwapchain = p->swapchain;
-
- if (!update_swapchain_info(p, &sinfo))
- goto error;
-
- VK(vkCreateSwapchainKHR(vk->dev, &sinfo, MPVK_ALLOCATOR, &p->swapchain));
- p->w = w;
- p->h = h;
-
- // Freeing the old swapchain while it's still in use is an error, so do
- // it asynchronously once the device is idle.
- if (sinfo.oldSwapchain) {
- p->old_swapchain = sinfo.oldSwapchain;
- vk_dev_callback(vk, (vk_cb) destroy_swapchain, vk, p);
- }
-
- // Get the new swapchain images
- int num;
- VK(vkGetSwapchainImagesKHR(vk->dev, p->swapchain, &num, NULL));
- vkimages = talloc_array(NULL, VkImage, num);
- VK(vkGetSwapchainImagesKHR(vk->dev, p->swapchain, &num, vkimages));
-
- // If needed, allocate some more semaphores
- while (num > p->num_sems) {
- VkSemaphore sem_in, sem_out;
- static const VkSemaphoreCreateInfo seminfo = {
- .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
- };
- VK(vkCreateSemaphore(vk->dev, &seminfo, MPVK_ALLOCATOR, &sem_in));
- VK(vkCreateSemaphore(vk->dev, &seminfo, MPVK_ALLOCATOR, &sem_out));
-
- int idx = p->num_sems++;
- MP_TARRAY_GROW(p, p->sems_in, idx);
- MP_TARRAY_GROW(p, p->sems_out, idx);
- p->sems_in[idx] = sem_in;
- p->sems_out[idx] = sem_out;
- }
-
- // Invalidate the queued texture
- p->queued_fbo = (struct ra_fbo) {0};
-
- // Recreate the ra_tex wrappers
- for (int i = 0; i < p->num_images; i++)
- ra_tex_free(ra, &p->images[i]);
-
- p->num_images = num;
- MP_TARRAY_GROW(p, p->images, p->num_images);
- for (int i = 0; i < num; i++) {
- p->images[i] = ra_vk_wrap_swapchain_img(ra, vkimages[i], sinfo);
- if (!p->images[i])
- goto error;
- }
+ struct priv *p = ctx->swapchain->priv;
- talloc_free(vkimages);
- return true;
+ bool ok = pl_swapchain_resize(p->swapchain, &width, &height);
+ ctx->vo->dwidth = width;
+ ctx->vo->dheight = height;
-error:
- talloc_free(vkimages);
- vkDestroySwapchainKHR(vk->dev, p->swapchain, MPVK_ALLOCATOR);
- p->swapchain = NULL;
- return false;
+ return ok;
}
static int color_depth(struct ra_swapchain *sw)
{
- struct priv *p = sw->priv;
- int bits = 0;
-
- if (!p->num_images)
- return bits;
-
- // The channel with the most bits is probably the most authoritative about
- // the actual color information (consider e.g. a2bgr10). Slight downside
- // in that it results in rounding r/b for e.g. rgb565, but we don't pick
- // surfaces with fewer than 8 bits anyway.
- const struct ra_format *fmt = p->images[0]->params.format;
- for (int i = 0; i < fmt->num_components; i++) {
- int depth = fmt->component_depth[i];
- bits = MPMAX(bits, depth ? depth : fmt->component_size[i]);
- }
-
- return bits;
+ return 0; // TODO: implement this somehow?
}
static bool start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo)
{
struct priv *p = sw->priv;
- struct mpvk_ctx *vk = p->vk;
- if (!p->swapchain)
+ struct pl_swapchain_frame frame;
+ if (!pl_swapchain_start_frame(p->swapchain, &frame))
+ return false;
+ if (!mppl_wrap_tex(sw->ctx->ra, frame.fbo, &p->proxy_tex))
return false;
- if (p->queued_fbo.tex) {
- assert(out_fbo != &p->queued_fbo);
- *out_fbo = p->queued_fbo;
- p->queued_fbo = (struct ra_fbo) {0};
- return true;
- }
-
- VkSemaphore sem_in = p->sems_in[p->idx_sems];
- MP_TRACE(vk, "vkAcquireNextImageKHR signals %p\n", (void *)sem_in);
-
- for (int attempts = 0; attempts < 2; attempts++) {
- uint32_t imgidx = 0;
- VkResult res = vkAcquireNextImageKHR(vk->dev, p->swapchain, UINT64_MAX,
- sem_in, NULL, &imgidx);
-
- switch (res) {
- case VK_SUCCESS:
- p->last_imgidx = imgidx;
- *out_fbo = (struct ra_fbo) {
- .tex = p->images[imgidx],
- .flip = false,
- };
- ra_tex_vk_external_dep(sw->ctx->ra, out_fbo->tex, sem_in);
- return true;
-
- case VK_ERROR_OUT_OF_DATE_KHR: {
- // In these cases try recreating the swapchain
- int w = p->w, h = p->h;
- p->w = p->h = 0; // invalidate the current state
- if (!ra_vk_ctx_resize(sw, w, h))
- return false;
- continue;
- }
-
- default:
- MP_ERR(vk, "Failed acquiring swapchain image: %s\n", vk_err(res));
- return false;
- }
- }
-
- // If we've exhausted the number of attempts to recreate the swapchain,
- // just give up silently.
- return false;
-}
+ *out_fbo = (struct ra_fbo) {
+ .tex = &p->proxy_tex,
+ .flip = frame.flipped,
+ };
-static void present_cb(struct priv *p, void *arg)
-{
- p->frames_in_flight--;
+ return true;
}
static bool submit_frame(struct ra_swapchain *sw, const struct vo_frame *frame)
{
struct priv *p = sw->priv;
- struct ra *ra = sw->ctx->ra;
- struct mpvk_ctx *vk = p->vk;
- if (!p->swapchain)
- return false;
-
- struct vk_cmd *cmd = ra_vk_submit(ra, p->images[p->last_imgidx]);
- if (!cmd)
- return false;
-
- VkSemaphore sem_out = p->sems_out[p->idx_sems++];
- p->idx_sems %= p->num_sems;
- vk_cmd_sig(cmd, sem_out);
-
- p->frames_in_flight++;
- vk_cmd_callback(cmd, (vk_cb) present_cb, p, NULL);
-
- vk_cmd_queue(vk, cmd);
- if (!mpvk_flush_commands(vk))
- return false;
-
- // Submit to the same queue that we were currently rendering to
- struct vk_cmdpool *pool_gfx = vk->pool_graphics;
- VkQueue queue = pool_gfx->queues[pool_gfx->idx_queues];
-
- // Rotate the queues to ensure good parallelism across frames
- for (int i = 0; i < vk->num_pools; i++) {
- struct vk_cmdpool *pool = vk->pools[i];
- pool->idx_queues = (pool->idx_queues + 1) % pool->num_queues;
- }
-
- VkPresentInfoKHR pinfo = {
- .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
- .waitSemaphoreCount = 1,
- .pWaitSemaphores = &sem_out,
- .swapchainCount = 1,
- .pSwapchains = &p->swapchain,
- .pImageIndices = &p->last_imgidx,
- };
-
- MP_TRACE(vk, "vkQueuePresentKHR waits on %p\n", (void *)sem_out);
- VkResult res = vkQueuePresentKHR(queue, &pinfo);
- switch (res) {
- case VK_SUCCESS:
- case VK_SUBOPTIMAL_KHR:
- return true;
-
- case VK_ERROR_OUT_OF_DATE_KHR:
- // We can silently ignore this error, since the next start_frame will
- // recreate the swapchain automatically.
- return true;
-
- default:
- MP_ERR(vk, "Failed presenting to queue %p: %s\n", (void *)queue,
- vk_err(res));
- return false;
- }
+ return pl_swapchain_submit_frame(p->swapchain);
}
static void swap_buffers(struct ra_swapchain *sw)
{
struct priv *p = sw->priv;
-
- while (p->frames_in_flight >= sw->ctx->opts.swapchain_depth)
- mpvk_poll_commands(p->vk, 100000); // 100μs
-
- // Also try and block until the next hardware buffer swap early. this
- // prevents start_frame from blocking later, thus slightly improving the
- // frame timing stats. (since mpv assumes most blocking will happen in
- // swap_buffers)
- start_frame(sw, &p->queued_fbo);
+ pl_swapchain_swap_buffers(p->swapchain);
}
static const struct ra_swapchain_fns vulkan_swapchain = {
diff --git a/video/out/vulkan/context.h b/video/out/vulkan/context.h
index a64d39f125..30c97cfb4f 100644
--- a/video/out/vulkan/context.h
+++ b/video/out/vulkan/context.h
@@ -7,7 +7,9 @@
void ra_vk_ctx_uninit(struct ra_ctx *ctx);
bool ra_vk_ctx_init(struct ra_ctx *ctx, struct mpvk_ctx *vk,
VkPresentModeKHR preferred_mode);
-bool ra_vk_ctx_resize(struct ra_swapchain *sw, int w, int h);
+
+// Handles a resize request, and updates ctx->vo->dwidth/dheight
+bool ra_vk_ctx_resize(struct ra_ctx *ctx, int width, int height);
// May be called on a ra_ctx of any type.
struct mpvk_ctx *ra_vk_ctx_get(struct ra_ctx *ctx);
diff --git a/video/out/vulkan/context_wayland.c b/video/out/vulkan/context_wayland.c
index 7276775242..f1091a534f 100644
--- a/video/out/vulkan/context_wayland.c
+++ b/video/out/vulkan/context_wayland.c
@@ -41,8 +41,7 @@ static bool wayland_vk_init(struct ra_ctx *ctx)
struct mpvk_ctx *vk = &p->vk;
int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR;
- if (!mpvk_instance_init(vk, ctx->log, VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
- ctx->opts.debug))
+ if (!mpvk_init(vk, ctx, VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME))
goto error;
if (!vo_wayland_init(ctx->vo))
@@ -54,10 +53,10 @@ static bool wayland_vk_init(struct ra_ctx *ctx)
.surface = ctx->vo->wl->surface,
};
- VkResult res = vkCreateWaylandSurfaceKHR(vk->inst, &wlinfo, MPVK_ALLOCATOR,
- &vk->surf);
+ VkInstance inst = vk->vkinst->instance;
+ VkResult res = vkCreateWaylandSurfaceKHR(inst, &wlinfo, NULL, &vk->surface);
if (res != VK_SUCCESS) {
- MP_MSG(ctx, msgl, "Failed creating Wayland surface: %s\n", vk_err(res));
+ MP_MSG(ctx, msgl, "Failed creating Wayland surface\n");
goto error;
}
@@ -77,7 +76,7 @@ error:
return false;
}
-static void resize(struct ra_ctx *ctx)
+static bool resize(struct ra_ctx *ctx)
{
struct vo_wayland_state *wl = ctx->vo->wl;
@@ -87,9 +86,7 @@ static void resize(struct ra_ctx *ctx)
const int32_t height = wl->scaling*mp_rect_h(wl->geometry);
wl_surface_set_buffer_scale(wl->surface, wl->scaling);
-
- wl->vo->dwidth = width;
- wl->vo->dheight = height;
+ return ra_vk_ctx_resize(ctx, width, height);
}
static bool wayland_vk_reconfig(struct ra_ctx *ctx)
@@ -104,8 +101,7 @@ static int wayland_vk_control(struct ra_ctx *ctx, int *events, int request, void
{
int ret = vo_wayland_control(ctx->vo, events, request, arg);
if (*events & VO_EVENT_RESIZE) {
- resize(ctx);
- if (ra_vk_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight))
+ if (!resize(ctx))
return VO_ERROR;
}
return ret;
diff --git a/video/out/vulkan/context_win.c b/video/out/vulkan/context_win.c
deleted file mode 100644
index cf31586d00..0000000000
--- a/video/out/vulkan/context_win.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * This file is part of mpv.
- *
- * mpv is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * mpv is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with mpv. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "video/out/gpu/context.h"
-#include "video/out/w32_common.h"
-
-#include "common.h"
-#include "context.h"
-#include "utils.h"
-
-EXTERN_C IMAGE_DOS_HEADER __ImageBase;
-#define HINST_THISCOMPONENT ((HINSTANCE)&__ImageBase)
-
-struct priv {
- struct mpvk_ctx vk;
-};
-
-static void win_uninit(struct ra_ctx *ctx)
-{
- struct priv *p = ctx->priv;
-
- ra_vk_ctx_uninit(ctx);
- mpvk_uninit(&p->vk);
- vo_w32_uninit(ctx->vo);
-}
-
-static bool win_init(struct ra_ctx *ctx)
-{
- struct priv *p = ctx->priv = talloc_zero(ctx, struct priv);
- struct mpvk_ctx *vk = &p->vk;
- int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR;
-
- if (!mpvk_instance_init(vk, ctx->log, VK_KHR_WIN32_SURFACE_EXTENSION_NAME,
- ctx->opts.debug))
- goto error;
-
- if (!vo_w32_init(ctx->vo))
- goto error;
-
- VkWin32SurfaceCreateInfoKHR wininfo = {
- .sType = VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR,
- .hinstance = HINST_THISCOMPONENT,
- .hwnd = vo_w32_hwnd(ctx->vo),
- };
-
- VkResult res = vkCreateWin32SurfaceKHR(vk->inst, &wininfo, MPVK_ALLOCATOR,
- &vk->surf);
- if (res != VK_SUCCESS) {
- MP_MSG(ctx, msgl, "Failed creating Windows surface: %s\n", vk_err(res));
- goto error;
- }
-
- if (!ra_vk_ctx_init(ctx, vk, VK_PRESENT_MODE_FIFO_KHR))
- goto error;
-
- return true;
-
-error:
- win_uninit(ctx);
- return false;
-}
-
-static bool resize(struct ra_ctx *ctx)
-{
- return ra_vk_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight);
-}
-
-static bool win_reconfig(struct ra_ctx *ctx)
-{
- vo_w32_config(ctx->vo);
- return resize(ctx);
-}
-
-static int win_control(struct ra_ctx *ctx, int *events, int request, void *arg)
-{
- int ret = vo_w32_control(ctx->vo, events, request, arg);
- if (*events & VO_EVENT_RESIZE) {
- if (!resize(ctx))
- return VO_ERROR;
- }
- return ret;
-}
-
-const struct ra_ctx_fns ra_ctx_vulkan_win = {
- .type = "vulkan",
- .name = "winvk",
- .reconfig = win_reconfig,
- .control = win_control,
- .init = win_init,
- .uninit = win_uninit,
-};
diff --git a/video/out/vulkan/context_xlib.c b/video/out/vulkan/context_xlib.c
index c3bd49f4fb..9baa7c4dc4 100644
--- a/video/out/vulkan/context_xlib.c
+++ b/video/out/vulkan/context_xlib.c
@@ -41,8 +41,7 @@ static bool xlib_init(struct ra_ctx *ctx)
struct mpvk_ctx *vk = &p->vk;
int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR;
- if (!mpvk_instance_init(vk, ctx->log, VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
- ctx->opts.debug))
+ if (!mpvk_init(vk, ctx, VK_KHR_XLIB_SURFACE_EXTENSION_NAME))
goto error;
if (!vo_x11_init(ctx->vo))
@@ -57,10 +56,10 @@ static bool xlib_init(struct ra_ctx *ctx)
.window = ctx->vo->x11->window,
};
- VkResult res = vkCreateXlibSurfaceKHR(vk->inst, &xinfo, MPVK_ALLOCATOR,
- &vk->surf);
+ VkInstance inst = vk->vkinst->instance;
+ VkResult res = vkCreateXlibSurfaceKHR(inst, &xinfo, NULL, &vk->surface);
if (res != VK_SUCCESS) {
- MP_MSG(ctx, msgl, "Failed creating Xlib surface: %s\n", vk_err(res));
+ MP_MSG(ctx, msgl, "Failed creating Xlib surface\n");
goto error;
}
@@ -76,7 +75,7 @@ error:
static bool resize(struct ra_ctx *ctx)
{
- return ra_vk_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight);
+ return ra_vk_ctx_resize(ctx, ctx->vo->dwidth, ctx->vo->dheight);
}
static bool xlib_reconfig(struct ra_ctx *ctx)
diff --git a/video/out/vulkan/formats.c b/video/out/vulkan/formats.c
deleted file mode 100644
index 327a7ac809..0000000000
--- a/video/out/vulkan/formats.c
+++ /dev/null
@@ -1,55 +0,0 @@
-#include "formats.h"
-
-const struct vk_format vk_formats[] = {
- // Regular, byte-aligned integer formats
- {"r8", VK_FORMAT_R8_UNORM, 1, 1, {8 }, RA_CTYPE_UNORM },
- {"rg8", VK_FORMAT_R8G8_UNORM, 2, 2, {8, 8 }, RA_CTYPE_UNORM },
- {"rgb8", VK_FORMAT_R8G8B8_UNORM, 3, 3, {8, 8, 8 }, RA_CTYPE_UNORM },
- {"rgba8", VK_FORMAT_R8G8B8A8_UNORM, 4, 4, {8, 8, 8, 8 }, RA_CTYPE_UNORM },
- {"r16", VK_FORMAT_R16_UNORM, 1, 2, {16 }, RA_CTYPE_UNORM },
- {"rg16", VK_FORMAT_R16G16_UNORM, 2, 4, {16, 16 }, RA_CTYPE_UNORM },
- {"rgb16", VK_FORMAT_R16G16B16_UNORM, 3, 6, {16, 16, 16 }, RA_CTYPE_UNORM },
- {"rgba16", VK_FORMAT_R16G16B16A16_UNORM, 4, 8, {16, 16, 16, 16}, RA_CTYPE_UNORM },
-
- // Special, integer-only formats
- {"r32ui", VK_FORMAT_R32_UINT, 1, 4, {32 }, RA_CTYPE_UINT },
- {"rg32ui", VK_FORMAT_R32G32_UINT, 2, 8, {32, 32 }, RA_CTYPE_UINT },
- {"rgb32ui", VK_FORMAT_R32G32B32_UINT, 3, 12, {32, 32, 32 }, RA_CTYPE_UINT },
- {"rgba32ui", VK_FORMAT_R32G32B32A32_UINT, 4, 16, {32, 32, 32, 32}, RA_CTYPE_UINT },
- {"r64ui", VK_FORMAT_R64_UINT, 1, 8, {64 }, RA_CTYPE_UINT },
- {"rg64ui", VK_FORMAT_R64G64_UINT, 2, 16, {64, 64 }, RA_CTYPE_UINT },
- {"rgb64ui", VK_FORMAT_R64G64B64_UINT, 3, 24, {64, 64, 64 }, RA_CTYPE_UINT },
- {"rgba64ui", VK_FORMAT_R64G64B64A64_UINT, 4, 32, {64, 64, 64, 64}, RA_CTYPE_UINT },
-
- // Packed integer formats
- {"rg4", VK_FORMAT_R4G4_UNORM_PACK8, 2, 1, {4, 4 }, RA_CTYPE_UNORM },
- {"rgba4", VK_FORMAT_R4G4B4A4_UNORM_PACK16, 4, 2, {4, 4, 4, 4 }, RA_CTYPE_UNORM },
- {"rgb565", VK_FORMAT_R5G6B5_UNORM_PACK16, 3, 2, {5, 6, 5 }, RA_CTYPE_UNORM },
- {"rgb5a1", VK_FORMAT_R5G5B5A1_UNORM_PACK16, 4, 2, {5, 5, 5, 1 }, RA_CTYPE_UNORM },
-
- // Float formats (native formats, hf = half float, df = double float)
- {"r16hf", VK_FORMAT_R16_SFLOAT, 1, 2, {16 }, RA_CTYPE_FLOAT },
- {"rg16hf", VK_FORMAT_R16G16_SFLOAT, 2, 4, {16, 16 }, RA_CTYPE_FLOAT },
- {"rgb16hf", VK_FORMAT_R16G16B16_SFLOAT, 3, 6, {16, 16, 16 }, RA_CTYPE_FLOAT },
- {"rgba16hf", VK_FORMAT_R16G16B16A16_SFLOAT, 4, 8, {16, 16, 16, 16}, RA_CTYPE_FLOAT },
- {"r32f", VK_FORMAT_R32_SFLOAT, 1, 4, {32 }, RA_CTYPE_FLOAT },
- {"rg32f", VK_FORMAT_R32G32_SFLOAT, 2, 8, {32, 32 }, RA_CTYPE_FLOAT },
- {"rgb32f", VK_FORMAT_R32G32B32_SFLOAT, 3, 12, {32, 32, 32 }, RA_CTYPE_FLOAT },
- {"rgba32f", VK_FORMAT_R32G32B32A32_SFLOAT, 4, 16, {32, 32, 32, 32}, RA_CTYPE_FLOAT },
- {"r64df", VK_FORMAT_R64_SFLOAT, 1, 8, {64 }, RA_CTYPE_FLOAT },
- {"rg64df", VK_FORMAT_R64G64_SFLOAT, 2, 16, {64, 64 }, RA_CTYPE_FLOAT },
- {"rgb64df", VK_FORMAT_R64G64B64_SFLOAT, 3, 24, {64, 64, 64 }, RA_CTYPE_FLOAT },
- {"rgba64df", VK_FORMAT_R64G64B64A64_SFLOAT, 4, 32, {64, 64, 64, 64}, RA_CTYPE_FLOAT },
-
- // "Swapped" component order images
- {"bgr8", VK_FORMAT_B8G8R8_UNORM, 3, 3, {8, 8, 8 }, RA_CTYPE_UNORM, true },
- {"bgra8", VK_FORMAT_B8G8R8A8_UNORM, 4, 4, {8, 8, 8, 8 }, RA_CTYPE_UNORM, true },
- {"bgra4", VK_FORMAT_B4G4R4A4_UNORM_PACK16, 4, 2, {4, 4, 4, 4 }, RA_CTYPE_UNORM, true },
- {"bgr565", VK_FORMAT_B5G6R5_UNORM_PACK16, 3, 2, {5, 6, 5 }, RA_CTYPE_UNORM, true },
- {"bgr5a1", VK_FORMAT_B5G5R5A1_UNORM_PACK16, 4, 2, {5, 5, 5, 1 }, RA_CTYPE_UNORM, true },
- {"a1rgb5", VK_FORMAT_A1R5G5B5_UNORM_PACK16, 4, 2, {1, 5, 5, 5 }, RA_CTYPE_UNORM, true },
- {"a2rgb10", VK_FORMAT_A2R10G10B10_UNORM_PACK32, 4, 4, {2, 10, 10, 10}, RA_CTYPE_UNORM, true },
- {"a2bgr10", VK_FORMAT_A2B10G10R10_UNORM_PACK32, 4, 4, {2, 10, 10, 10}, RA_CTYPE_UNORM, true },
- {"abgr8", VK_FORMAT_A8B8G8R8_UNORM_PACK32, 4, 4, {8, 8, 8, 8 }, RA_CTYPE_UNORM, true },
- {0}
-};
diff --git a/video/out/vulkan/formats.h b/video/out/vulkan/formats.h
deleted file mode 100644
index 22782a6958..0000000000
--- a/video/out/vulkan/formats.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#pragma once
-
-#include "video/out/gpu/ra.h"
-#include "common.h"
-
-struct vk_format {
- const char *name;
- VkFormat iformat; // vulkan format enum
- int components; // how many components are there
- int bytes; // how many bytes is a texel
- int bits[4]; // how many bits per component
- enum ra_ctype ctype; // format representation type
- bool fucked_order; // used for formats which are not simply rgba
-};
-
-extern const struct vk_format vk_formats[];
diff --git a/video/out/vulkan/malloc.c b/video/out/vulkan/malloc.c
deleted file mode 100644
index e1e7ae28e6..0000000000
--- a/video/out/vulkan/malloc.c
+++ /dev/null
@@ -1,471 +0,0 @@
-#include "malloc.h"
-#include "utils.h"
-#include "osdep/timer.h"
-
-#if HAVE_WIN32_DESKTOP
-#include <versionhelpers.h>
-#endif
-
-// Controls the multiplication factor for new slab allocations. The new slab
-// will always be allocated such that the size of the slab is this factor times
-// the previous slab. Higher values make it grow faster.
-#define MPVK_HEAP_SLAB_GROWTH_RATE 4
-
-// Controls the minimum slab size, to reduce the frequency at which very small
-// slabs would need to get allocated when allocating the first few buffers.
-// (Default: 1 MB)
-#define MPVK_HEAP_MINIMUM_SLAB_SIZE (1 << 20)
-
-// Controls the maximum slab size, to reduce the effect of unbounded slab
-// growth exhausting memory. If the application needs a single allocation
-// that's bigger than this value, it will be allocated directly from the
-// device. (Default: 512 MB)
-#define MPVK_HEAP_MAXIMUM_SLAB_SIZE (1 << 29)
-
-// Controls the minimum free region size, to reduce thrashing the free space
-// map with lots of small buffers during uninit. (Default: 1 KB)
-#define MPVK_HEAP_MINIMUM_REGION_SIZE (1 << 10)
-
-// Represents a region of available memory
-struct vk_region {
- size_t start; // first offset in region
- size_t end; // first offset *not* in region
-};
-
-static inline size_t region_len(struct vk_region r)
-{
- return r.end - r.start;
-}
-
-// A single slab represents a contiguous region of allocated memory. Actual
-// allocations are served as slices of this. Slabs are organized into linked
-// lists, which represent individual heaps.
-struct vk_slab {
- VkDeviceMemory mem; // underlying device allocation
- size_t size; // total size of `slab`
- size_t used; // number of bytes actually in use (for GC accounting)
- bool dedicated; // slab is allocated specifically for one object
- // free space map: a sorted list of memory regions that are available
- struct vk_region *regions;
- int num_regions;
- // optional, depends on the memory type:
- VkBuffer buffer; // buffer spanning the entire slab
- void *data; // mapped memory corresponding to `mem`
-};
-
-// Represents a single memory heap. We keep track of a vk_heap for each
-// combination of buffer type and memory selection parameters. This shouldn't
-// actually be that many in practice, because some combinations simply never
-// occur, and others will generally be the same for the same objects.
-struct vk_heap {
- VkBufferUsageFlags usage; // the buffer usage type (or 0)
- VkMemoryPropertyFlags flags; // the memory type flags (or 0)
- uint32_t typeBits; // the memory type index requirements (or 0)
- bool exportable; // whether memory is exportable to other APIs
- struct vk_slab **slabs; // array of slabs sorted by size
- int num_slabs;
-};
-
-// The overall state of the allocator, which keeps track of a vk_heap for each
-// memory type.
-struct vk_malloc {
- VkPhysicalDeviceMemoryProperties props;
- struct vk_heap *heaps;
- int num_heaps;
-};
-
-static void slab_free(struct mpvk_ctx *vk, struct vk_slab *slab)
-{
- if (!slab)
- return;
-
- assert(slab->used == 0);
-
- int64_t start = mp_time_us();
- vkDestroyBuffer(vk->dev, slab->buffer, MPVK_ALLOCATOR);
- // also implicitly unmaps the memory if needed
- vkFreeMemory(vk->dev, slab->mem, MPVK_ALLOCATOR);
- int64_t stop = mp_time_us();
-
- MP_VERBOSE(vk, "Freeing slab of size %zu took %lld μs.\n",
- slab->size, (long long)(stop - start));
-
- talloc_free(slab);
-}
-
-static bool find_best_memtype(struct mpvk_ctx *vk, uint32_t typeBits,
- VkMemoryPropertyFlags flags,
- VkMemoryType *out_type, int *out_index)
-{
- struct vk_malloc *ma = vk->alloc;
-
- // The vulkan spec requires memory types to be sorted in the "optimal"
- // order, so the first matching type we find will be the best/fastest one.
- for (int i = 0; i < ma->props.memoryTypeCount; i++) {
- // The memory type flags must include our properties
- if ((ma->props.memoryTypes[i].propertyFlags & flags) != flags)
- continue;
- // The memory type must be supported by the requirements (bitfield)
- if (typeBits && !(typeBits & (1 << i)))
- continue;
- *out_type = ma->props.memoryTypes[i];
- *out_index = i;
- return true;
- }
-
- MP_ERR(vk, "Found no memory type matching property flags 0x%x and type "
- "bits 0x%x!\n", (unsigned)flags, (unsigned)typeBits);
- return false;
-}
-
-static struct vk_slab *slab_alloc(struct mpvk_ctx *vk, struct vk_heap *heap,
- size_t size)
-{
- struct vk_slab *slab = talloc_ptrtype(NULL, slab);
- *slab = (struct vk_slab) {
- .size = size,
- };
-
- MP_TARRAY_APPEND(slab, slab->regions, slab->num_regions, (struct vk_region) {
- .start = 0,
- .end = slab->size,
- });
-
- VkExportMemoryAllocateInfoKHR eminfo = {
- .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR,
-#if HAVE_WIN32_DESKTOP
- .handleTypes = IsWindows8OrGreater()
- ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR
- : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR,
-#else
- .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
-#endif
- };
-
- VkMemoryAllocateInfo minfo = {
- .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
- .pNext = heap->exportable ? &eminfo : NULL,
- .allocationSize = slab->size,
- };
-
- uint32_t typeBits = heap->typeBits ? heap->typeBits : UINT32_MAX;
- if (heap->usage) {
- // FIXME: Since we can't keep track of queue family ownership properly,
- // and we don't know in advance what types of queue families this buffer
- // will belong to, we're forced to share all of our buffers between all
- // command pools.
- uint32_t qfs[3] = {0};
- for (int i = 0; i < vk->num_pools; i++)
- qfs[i] = vk->pools[i]->qf;
-
- VkExternalMemoryBufferCreateInfoKHR ebinfo = {
- .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR,
- .handleTypes = eminfo.handleTypes,
- };
-
- VkBufferCreateInfo binfo = {
- .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
- .pNext = heap->exportable ? &ebinfo : NULL,
- .size = slab->size,
- .usage = heap->usage,
- .sharingMode = vk->num_pools > 1 ? VK_SHARING_MODE_CONCURRENT
- : VK_SHARING_MODE_EXCLUSIVE,
- .queueFamilyIndexCount = vk->num_pools,
- .pQueueFamilyIndices = qfs,
- };
-
- VK(vkCreateBuffer(vk->dev, &binfo, MPVK_ALLOCATOR, &slab->buffer));
-
- VkMemoryRequirements reqs;
- vkGetBufferMemoryRequirements(vk->dev, slab->buffer, &reqs);
- minfo.allocationSize = reqs.size; // this can be larger than slab->size
- typeBits &= reqs.memoryTypeBits; // this can restrict the types
- }
-
- VkMemoryType type;
- int index;
- if (!find_best_memtype(vk, typeBits, heap->flags, &type, &index))
- goto error;
-
- MP_VERBOSE(vk, "Allocating %zu memory of type 0x%x (id %d) in heap %d.\n",
- slab->size, (unsigned)type.propertyFlags, index, (int)type.heapIndex);
-
- minfo.memoryTypeIndex = index;
- VK(vkAllocateMemory(vk->dev, &minfo, MPVK_ALLOCATOR, &slab->mem));
-
- if (heap->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
- VK(vkMapMemory(vk->dev, slab->mem, 0, VK_WHOLE_SIZE, 0, &slab->data));
-
- if (slab->buffer)
- VK(vkBindBufferMemory(vk->dev, slab->buffer, slab->mem, 0));
-
- return slab;
-
-error:
- slab_free(vk, slab);
- return NULL;
-}
-
-static void insert_region(struct vk_slab *slab, struct vk_region region)
-{
- if (region.start == region.end)
- return;
-
- bool big_enough = region_len(region) >= MPVK_HEAP_MINIMUM_REGION_SIZE;
-
- // Find the index of the first region that comes after this
- for (int i = 0; i < slab->num_regions; i++) {
- struct vk_region *r = &slab->regions[i];
-
- // Check for a few special cases which can be coalesced
- if (r->end == region.start) {
- // The new region is at the tail of this region. In addition to
- // modifying this region, we also need to coalesce all the following
- // regions for as long as possible
- r->end = region.end;
-
- struct vk_region *next = &slab->regions[i+1];
- while (i+1 < slab->num_regions && r->end == next->start) {
- r->end = next->end;
- MP_TARRAY_REMOVE_AT(slab->regions, slab->num_regions, i+1);
- }
- return;
- }
-
- if (r->start == region.end) {
- // The new region is at the head of this region. We don't need to
- // do anything special here - because if this could be further
- // coalesced backwards, the previous loop iteration would already
- // have caught it.
- r->start = region.start;
- return;
- }
-
- if (r->start > region.start) {
- // The new region comes somewhere before this region, so insert
- // it into this index in the array.
- if (big_enough) {
- MP_TARRAY_INSERT_AT(slab, slab->regions, slab->num_regions,
- i, region);
- }
- return;
- }
- }
-
- // If we've reached the end of this loop, then all of the regions
- // come before the new region, and are disconnected - so append it
- if (big_enough)
- MP_TARRAY_APPEND(slab, slab->regions, slab->num_regions, region);
-}
-
-static void heap_uninit(struct mpvk_ctx *vk, struct vk_heap *heap)
-{
- for (int i = 0; i < heap->num_slabs; i++)
- slab_free(vk, heap->slabs[i]);
-
- talloc_free(heap->slabs);
- *heap = (struct vk_heap){0};
-}
-
-void vk_malloc_init(struct mpvk_ctx *vk)
-{
- assert(vk->physd);
- vk->alloc = talloc_zero(NULL, struct vk_malloc);
- vkGetPhysicalDeviceMemoryProperties(vk->physd, &vk->alloc->props);
-}
-
-void vk_malloc_uninit(struct mpvk_ctx *vk)
-{
- struct vk_malloc *ma = vk->alloc;
- if (!ma)
- return;
-
- for (int i = 0; i < ma->num_heaps; i++)
- heap_uninit(vk, &ma->heaps[i]);
-
- talloc_free(ma);
- vk->alloc = NULL;
-}
-
-void vk_free_memslice(struct mpvk_ctx *vk, struct vk_memslice slice)
-{
- struct vk_slab *slab = slice.priv;
- if (!slab)
- return;
-
- assert(slab->used >= slice.size);
- slab->used -= slice.size;
-
- MP_DBG(vk, "Freeing slice %zu + %zu from slab with size %zu\n",
- slice.offset, slice.size, slab->size);
-
- if (slab->dedicated) {
- // If the slab was purpose-allocated for this memslice, we can just
- // free it here
- slab_free(vk, slab);
- } else {
- // Return the allocation to the free space map
- insert_region(slab, (struct vk_region) {
- .start = slice.offset,
- .end = slice.offset + slice.size,
- });
- }
-}
-
-// reqs: can be NULL
-static struct vk_heap *find_heap(struct mpvk_ctx *vk, VkBufferUsageFlags usage,
- VkMemoryPropertyFlags flags,
- VkMemoryRequirements *reqs,
- bool exportable)
-{
- struct vk_malloc *ma = vk->alloc;
- int typeBits = reqs ? reqs->memoryTypeBits : 0;
-
- for (int i = 0; i < ma->num_heaps; i++) {
- if (ma->heaps[i].usage != usage)
- continue;
- if (ma->heaps[i].flags != flags)
- continue;
- if (ma->heaps[i].typeBits != typeBits)
- continue;
- if (ma->heaps[i].exportable != exportable)
- continue;
- return &ma->heaps[i];
- }
-
- // Not found => add it
- MP_TARRAY_GROW(ma, ma->heaps, ma->num_heaps + 1);
- struct vk_heap *heap = &ma->heaps[ma->num_heaps++];
- *heap = (struct vk_heap) {
- .usage = usage,
- .flags = flags,
- .typeBits = typeBits,
- .exportable = exportable,
- };
- return heap;
-}
-
-static inline bool region_fits(struct vk_region r, size_t size, size_t align)
-{
- return MP_ALIGN_UP(r.start, align) + size <= r.end;
-}
-
-// Finds the best-fitting region in a heap. If the heap is too small or too
-// fragmented, a new slab will be allocated under the hood.
-static bool heap_get_region(struct mpvk_ctx *vk, struct vk_heap *heap,
- size_t size, size_t align,
- struct vk_slab **out_slab, int *out_index)
-{
- struct vk_slab *slab = NULL;
-
- // If the allocation is very big, serve it directly instead of bothering
- // with the heap
- if (size > MPVK_HEAP_MAXIMUM_SLAB_SIZE) {
- slab = slab_alloc(vk, heap, size);
- *out_slab = slab;
- *out_index = 0;
- return !!slab;
- }
-
- for (int i = 0; i < heap->num_slabs; i++) {
- slab = heap->slabs[i];
- if (slab->size < size)
- continue;
-
- // Attempt a best fit search
- int best = -1;
- for (int n = 0; n < slab->num_regions; n++) {
- struct vk_region r = slab->regions[n];
- if (!region_fits(r, size, align))
- continue;
- if (best >= 0 && region_len(r) > region_len(slab->regions[best]))
- continue;
- best = n;
- }
-
- if (best >= 0) {
- *out_slab = slab;
- *out_index = best;
- return true;
- }
- }
-
- // Otherwise, allocate a new vk_slab and append it to the list.
- size_t cur_size = MPMAX(size, slab ? slab->size : 0);
- size_t slab_size = MPVK_HEAP_SLAB_GROWTH_RATE * cur_size;
- slab_size = MPMAX(MPVK_HEAP_MINIMUM_SLAB_SIZE, slab_size);
- slab_size = MPMIN(MPVK_HEAP_MAXIMUM_SLAB_SIZE, slab_size);
- assert(slab_size >= size);
- slab = slab_alloc(vk, heap, slab_size);
- if (!slab)
- return false;
- MP_TARRAY_APPEND(NULL, heap->slabs, heap->num_slabs, slab);
-
- // Return the only region there is in a newly allocated slab
- assert(slab->num_regions == 1);
- *out_slab = slab;
- *out_index = 0;
- return true;
-}
-
-static bool slice_heap(struct mpvk_ctx *vk, struct vk_heap *heap, size_t size,
- size_t alignment, struct vk_memslice *out)
-{
- struct vk_slab *slab;
- int index;
- alignment = MP_ALIGN_UP(alignment, vk->limits.bufferImageGranularity);
- if (!heap_get_region(vk, heap, size, alignment, &slab, &index))
- return false;
-
- struct vk_region reg = slab->regions[index];
- MP_TARRAY_REMOVE_AT(slab->regions, slab->num_regions, index);
- *out = (struct vk_memslice) {
- .vkmem = slab->mem,
- .offset = MP_ALIGN_UP(reg.start, alignment),
- .size = size,
- .slab_size = slab->size,
- .priv = slab,
- };
-
- MP_DBG(vk, "Sub-allocating slice %zu + %zu from slab with size %zu\n",
- out->offset, out->size, slab->size);
-
- size_t out_end = out->offset + out->size;
- insert_region(slab, (struct vk_region) { reg.start, out->offset });
- insert_region(slab, (struct vk_region) { out_end, reg.end });
-
- slab->used += size;
- return true;
-}
-
-bool vk_malloc_generic(struct mpvk_ctx *vk, VkMemoryRequirements reqs,
- VkMemoryPropertyFlags flags, struct vk_memslice *out)
-{
- struct vk_heap *heap = find_heap(vk, 0, flags, &reqs, false);
- return slice_heap(vk, heap, reqs.size, reqs.alignment, out);
-}
-
-bool vk_malloc_buffer(struct mpvk_ctx *vk, VkBufferUsageFlags bufFlags,
- VkMemoryPropertyFlags memFlags, VkDeviceSize size,
- VkDeviceSize alignment, bool exportable,
- struct vk_bufslice *out)
-{
- if (exportable) {
- if (!vk->has_ext_external_memory_export) {
- MP_ERR(vk, "Exportable memory requires the %s extension\n",
- MP_VK_EXTERNAL_MEMORY_EXPORT_EXTENSION_NAME);
- return false;
- }
- }
-
- struct vk_heap *heap = find_heap(vk, bufFlags, memFlags, NULL, exportable);
- if (!slice_heap(vk, heap, size, alignment, &out->mem))
- return false;
-
- struct vk_slab *slab = out->mem.priv;
- out->buf = slab->buffer;
- if (slab->data)
- out->data = (void *)((uintptr_t)slab->data + (ptrdiff_t)out->mem.offset);
-
- return true;
-}
diff --git a/video/out/vulkan/malloc.h b/video/out/vulkan/malloc.h
deleted file mode 100644
index 9b311ce311..0000000000
--- a/video/out/vulkan/malloc.h
+++ /dev/null
@@ -1,37 +0,0 @@
-#pragma once
-
-#include "common.h"
-
-void vk_malloc_init(struct mpvk_ctx *vk);
-void vk_malloc_uninit(struct mpvk_ctx *vk);
-
-// Represents a single "slice" of generic (non-buffer) memory, plus some
-// metadata for accounting. This struct is essentially read-only.
-struct vk_memslice {
- VkDeviceMemory vkmem;
- size_t offset;
- size_t size;
- size_t slab_size;
- void *priv;
-};
-
-void vk_free_memslice(struct mpvk_ctx *vk, struct vk_memslice slice);
-bool vk_malloc_generic(struct mpvk_ctx *vk, VkMemoryRequirements reqs,
- VkMemoryPropertyFlags flags, struct vk_memslice *out);
-
-// Represents a single "slice" of a larger buffer
-struct vk_bufslice {
- struct vk_memslice mem; // must be freed by the user when done
- VkBuffer buf; // the buffer this memory was sliced from
- // For persistently mapped buffers, this points to the first usable byte of
- // this slice.
- void *data;
-};
-
-// Allocate a buffer slice. This is more efficient than vk_malloc_generic for
-// when the user needs lots of buffers, since it doesn't require
-// creating/destroying lots of (little) VkBuffers.
-bool vk_malloc_buffer(struct mpvk_ctx *vk, VkBufferUsageFlags bufFlags,
- VkMemoryPropertyFlags memFlags, VkDeviceSize size,
- VkDeviceSize alignment, bool exportable,
- struct vk_bufslice *out);
diff --git a/video/out/vulkan/ra_vk.c b/video/out/vulkan/ra_vk.c
deleted file mode 100644
index 3e4ba28ac4..0000000000
--- a/video/out/vulkan/ra_vk.c
+++ /dev/null
@@ -1,1982 +0,0 @@
-#include "video/out/gpu/utils.h"
-#include "video/out/gpu/spirv.h"
-
-#include "ra_vk.h"
-#include "malloc.h"
-
-#if HAVE_WIN32_DESKTOP
-#include <versionhelpers.h>
-#endif
-
-static struct ra_fns ra_fns_vk;
-
-enum queue_type {
- GRAPHICS,
- COMPUTE,
- TRANSFER,
-};
-
-// For ra.priv
-struct ra_vk {
- struct mpvk_ctx *vk;
- struct ra_tex *clear_tex; // stupid hack for clear()
- struct vk_cmd *cmd; // currently recording cmd
-};
-
-struct mpvk_ctx *ra_vk_get(struct ra *ra)
-{
- if (ra->fns != &ra_fns_vk)
- return NULL;
-
- struct ra_vk *p = ra->priv;
- return p->vk;
-}
-
-static void vk_submit(struct ra *ra)
-{
- struct ra_vk *p = ra->priv;
- struct mpvk_ctx *vk = ra_vk_get(ra);
-
- if (p->cmd) {
- vk_cmd_queue(vk, p->cmd);
- p->cmd = NULL;
- }
-}
-
-// Returns a command buffer, or NULL on error
-static struct vk_cmd *vk_require_cmd(struct ra *ra, enum queue_type type)
-{
- struct ra_vk *p = ra->priv;
- struct mpvk_ctx *vk = ra_vk_get(ra);
-
- struct vk_cmdpool *pool;
- switch (type) {
- case GRAPHICS: pool = vk->pool_graphics; break;
- case COMPUTE: pool = vk->pool_compute; break;
-
- // GRAPHICS and COMPUTE also imply TRANSFER capability (vulkan spec)
- case TRANSFER:
- pool = vk->pool_transfer;
- if (!pool)
- pool = vk->pool_compute;
- if (!pool)
- pool = vk->pool_graphics;
- break;
- default: abort();
- }
-
- assert(pool);
- if (p->cmd && p->cmd->pool == pool)
- return p->cmd;
-
- vk_submit(ra);
- p->cmd = vk_cmd_begin(vk, pool);
- return p->cmd;
-}
-
-#define MAKE_LAZY_DESTRUCTOR(fun, argtype) \
- static void fun##_lazy(struct ra *ra, argtype *arg) { \
- struct ra_vk *p = ra->priv; \
- struct mpvk_ctx *vk = ra_vk_get(ra); \
- if (p->cmd) { \
- vk_cmd_callback(p->cmd, (vk_cb) fun, ra, arg); \
- } else { \
- vk_dev_callback(vk, (vk_cb) fun, ra, arg); \
- } \
- }
-
-static void vk_destroy_ra(struct ra *ra)
-{
- struct ra_vk *p = ra->priv;
- struct mpvk_ctx *vk = ra_vk_get(ra);
-
- vk_submit(ra);
- mpvk_flush_commands(vk);
- mpvk_poll_commands(vk, UINT64_MAX);
- ra_tex_free(ra, &p->clear_tex);
-
- talloc_free(ra);
-}
-
-static bool vk_setup_formats(struct ra *ra)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
-
- for (const struct vk_format *vk_fmt = vk_formats; vk_fmt->name; vk_fmt++) {
- VkFormatProperties prop;
- vkGetPhysicalDeviceFormatProperties(vk->physd, vk_fmt->iformat, &prop);
-
- // As a bare minimum, we need to sample from an allocated image
- VkFormatFeatureFlags flags = prop.optimalTilingFeatures;
- if (!(flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT))
- continue;
-
- VkFormatFeatureFlags linear_bits, render_bits;
- linear_bits = VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
- render_bits = VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
- VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
-
- struct ra_format *fmt = talloc_zero(ra, struct ra_format);
- *fmt = (struct ra_format) {
- .name = vk_fmt->name,
- .priv = (void *)vk_fmt,
- .ctype = vk_fmt->ctype,
- .ordered = !vk_fmt->fucked_order,
- .num_components = vk_fmt->components,
- .pixel_size = vk_fmt->bytes,
- .linear_filter = !!(flags & linear_bits),
- .renderable = !!(flags & render_bits),
- };
-
- for (int i = 0; i < 4; i++)
- fmt->component_size[i] = fmt->component_depth[i] = vk_fmt->bits[i];
-
- fmt->glsl_format = ra_fmt_glsl_format(fmt);
-
- MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, fmt);
- }
-
- // Populate some other capabilities related to formats while we're at it
- VkImageType imgType[3] = {
- VK_IMAGE_TYPE_1D,
- VK_IMAGE_TYPE_2D,
- VK_IMAGE_TYPE_3D
- };
-
- // R8_UNORM is supported on literally every single vulkan implementation
- const VkFormat testfmt = VK_FORMAT_R8_UNORM;
-
- for (int d = 0; d < 3; d++) {
- VkImageFormatProperties iprop;
- VkResult res = vkGetPhysicalDeviceImageFormatProperties(vk->physd,
- testfmt, imgType[d], VK_IMAGE_TILING_OPTIMAL,
- VK_IMAGE_USAGE_SAMPLED_BIT, 0, &iprop);
-
- switch (imgType[d]) {
- case VK_IMAGE_TYPE_1D:
- if (res == VK_SUCCESS)
- ra->caps |= RA_CAP_TEX_1D;
- break;
- case VK_IMAGE_TYPE_2D:
- // 2D formats must be supported by RA, so ensure this is the case
- VK_ASSERT(res, "Querying 2D format limits");
- ra->max_texture_wh = MPMIN(iprop.maxExtent.width, iprop.maxExtent.height);
- break;
- case VK_IMAGE_TYPE_3D:
- if (res == VK_SUCCESS)
- ra->caps |= RA_CAP_TEX_3D;
- break;
- }
- }
-
- // RA_CAP_BLIT implies both blitting between images as well as blitting
- // directly to the swapchain image, so check for all three operations
- bool blittable = true;
- VkFormatProperties prop;
- vkGetPhysicalDeviceFormatProperties(vk->physd, testfmt, &prop);
- if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_SRC_BIT))
- blittable = false;
- if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT))
- blittable = false;
-
- vkGetPhysicalDeviceFormatProperties(vk->physd, vk->surf_format.format, &prop);
- if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT))
- blittable = false;
-
- if (blittable)
- ra->caps |= RA_CAP_BLIT;
-
- return true;
-
-error:
- return false;
-}
-
-static struct ra_fns ra_fns_vk;
-
-struct ra *ra_create_vk(struct mpvk_ctx *vk, struct mp_log *log)
-{
- assert(vk->dev);
- assert(vk->alloc);
-
- struct ra *ra = talloc_zero(NULL, struct ra);
- ra->log = log;
- ra->fns = &ra_fns_vk;
-
- struct ra_vk *p = ra->priv = talloc_zero(ra, struct ra_vk);
- p->vk = vk;
-
- ra->caps |= vk->spirv->ra_caps;
- ra->glsl_version = vk->spirv->glsl_version;
- ra->glsl_vulkan = true;
- ra->max_shmem = vk->limits.maxComputeSharedMemorySize;
- ra->max_pushc_size = vk->limits.maxPushConstantsSize;
-
- if (vk->pool_compute) {
- ra->caps |= RA_CAP_COMPUTE | RA_CAP_NUM_GROUPS;
- // If we have more compute queues than graphics queues, we probably
- // want to be using them. (This seems mostly relevant for AMD)
- if (vk->pool_compute->num_queues > vk->pool_graphics->num_queues)
- ra->caps |= RA_CAP_PARALLEL_COMPUTE;
- }
-
- if (!vk_setup_formats(ra))
- goto error;
-
- // UBO support is required
- ra->caps |= RA_CAP_BUF_RO | RA_CAP_FRAGCOORD;
-
- // textureGather requires the ImageGatherExtended capability
- if (vk->features.shaderImageGatherExtended)
- ra->caps |= RA_CAP_GATHER;
-
- // Try creating a shader storage buffer
- struct ra_buf_params ssbo_params = {
- .type = RA_BUF_TYPE_SHADER_STORAGE,
- .size = 16,
- };
-
- struct ra_buf *ssbo = ra_buf_create(ra, &ssbo_params);
- if (ssbo) {
- ra->caps |= RA_CAP_BUF_RW;
- ra_buf_free(ra, &ssbo);
- }
-
- // To support clear() by region, we need to allocate a dummy 1x1 image that
- // will be used as the source of blit operations
- struct ra_tex_params clear_params = {
- .dimensions = 1, // no point in using a 2D image if height = 1
- .w = 1,
- .h = 1,
- .d = 1,
- .format = ra_find_float16_format(ra, 4),
- .blit_src = 1,
- .host_mutable = 1,
- };
-
- p->clear_tex = ra_tex_create(ra, &clear_params);
- if (!p->clear_tex) {
- MP_ERR(ra, "Failed creating 1x1 dummy texture for clear()!\n");
- goto error;
- }
-
- return ra;
-
-error:
- vk_destroy_ra(ra);
- return NULL;
-}
-
-// Boilerplate wrapper around vkCreateRenderPass to ensure passes remain
-// compatible. The renderpass will automatically transition the image out of
-// initialLayout and into finalLayout.
-static VkResult vk_create_render_pass(VkDevice dev, const struct ra_format *fmt,
- VkAttachmentLoadOp loadOp,
- VkImageLayout initialLayout,
- VkImageLayout finalLayout,
- VkRenderPass *out)
-{
- struct vk_format *vk_fmt = fmt->priv;
- assert(fmt->renderable);
-
- VkRenderPassCreateInfo rinfo = {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
- .attachmentCount = 1,
- .pAttachments = &(VkAttachmentDescription) {
- .format = vk_fmt->iformat,
- .samples = VK_SAMPLE_COUNT_1_BIT,
- .loadOp = loadOp,
- .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
- .initialLayout = initialLayout,
- .finalLayout = finalLayout,
- },
- .subpassCount = 1,
- .pSubpasses = &(VkSubpassDescription) {
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .colorAttachmentCount = 1,
- .pColorAttachments = &(VkAttachmentReference) {
- .attachment = 0,
- .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
- },
- },
- };
-
- return vkCreateRenderPass(dev, &rinfo, MPVK_ALLOCATOR, out);
-}
-
-// For ra_tex.priv
-struct ra_tex_vk {
- bool external_img;
- enum queue_type upload_queue;
- VkImageType type;
- VkImage img;
- struct vk_memslice mem;
- // for sampling
- VkImageView view;
- VkSampler sampler;
- // for rendering
- VkFramebuffer framebuffer;
- VkRenderPass dummyPass;
- // for uploading
- struct ra_buf_pool pbo;
- // "current" metadata, can change during the course of execution
- VkImageLayout current_layout;
- VkAccessFlags current_access;
- // the signal guards reuse, and can be NULL
- struct vk_signal *sig;
- VkPipelineStageFlags sig_stage;
- VkSemaphore ext_dep; // external semaphore, not owned by the ra_tex
-};
-
-void ra_tex_vk_external_dep(struct ra *ra, struct ra_tex *tex, VkSemaphore dep)
-{
- struct ra_tex_vk *tex_vk = tex->priv;
- assert(!tex_vk->ext_dep);
- tex_vk->ext_dep = dep;
-}
-
-// Small helper to ease image barrier creation. if `discard` is set, the contents
-// of the image will be undefined after the barrier
-static void tex_barrier(struct ra *ra, struct vk_cmd *cmd, struct ra_tex *tex,
- VkPipelineStageFlags stage, VkAccessFlags newAccess,
- VkImageLayout newLayout, bool discard)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
- struct ra_tex_vk *tex_vk = tex->priv;
-
- if (tex_vk->ext_dep) {
- vk_cmd_dep(cmd, tex_vk->ext_dep, stage);
- tex_vk->ext_dep = NULL;
- }
-
- VkImageMemoryBarrier imgBarrier = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .oldLayout = tex_vk->current_layout,
- .newLayout = newLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .srcAccessMask = tex_vk->current_access,
- .dstAccessMask = newAccess,
- .image = tex_vk->img,
- .subresourceRange = vk_range,
- };
-
- if (discard) {
- imgBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
- imgBarrier.srcAccessMask = 0;
- }
-
- VkEvent event = NULL;
- vk_cmd_wait(vk, cmd, &tex_vk->sig, stage, &event);
-
- bool need_trans = tex_vk->current_layout != newLayout ||
- tex_vk->current_access != newAccess;
-
- // Transitioning to VK_IMAGE_LAYOUT_UNDEFINED is a pseudo-operation
- // that for us means we don't need to perform the actual transition
- if (need_trans && newLayout != VK_IMAGE_LAYOUT_UNDEFINED) {
- if (event) {
- vkCmdWaitEvents(cmd->buf, 1, &event, tex_vk->sig_stage,
- stage, 0, NULL, 0, NULL, 1, &imgBarrier);
- } else {
- // If we're not using an event, then the source stage is irrelevant
- // because we're coming from a different queue anyway, so we can
- // safely set it to TOP_OF_PIPE.
- imgBarrier.srcAccessMask = 0;
- vkCmdPipelineBarrier(cmd->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- stage, 0, 0, NULL, 0, NULL, 1, &imgBarrier);
- }
- }
-
- tex_vk->current_layout = newLayout;
- tex_vk->current_access = newAccess;
-}
-
-static void tex_signal(struct ra *ra, struct vk_cmd *cmd, struct ra_tex *tex,
- VkPipelineStageFlags stage)
-{
- struct ra_tex_vk *tex_vk = tex->priv;
- struct mpvk_ctx *vk = ra_vk_get(ra);
- assert(!tex_vk->sig);
-
- tex_vk->sig = vk_cmd_signal(vk, cmd, stage);
- tex_vk->sig_stage = stage;
-}
-
-static void vk_tex_destroy(struct ra *ra, struct ra_tex *tex)
-{
- if (!tex)
- return;
-
- struct mpvk_ctx *vk = ra_vk_get(ra);
- struct ra_tex_vk *tex_vk = tex->priv;
-
- ra_buf_pool_uninit(ra, &tex_vk->pbo);
- vk_signal_destroy(vk, &tex_vk->sig);
- vkDestroyFramebuffer(vk->dev, tex_vk->framebuffer, MPVK_ALLOCATOR);
- vkDestroyRenderPass(vk->dev, tex_vk->dummyPass, MPVK_ALLOCATOR);
- vkDestroySampler(vk->dev, tex_vk->sampler, MPVK_ALLOCATOR);
- vkDestroyImageView(vk->dev, tex_vk->view, MPVK_ALLOCATOR);
- if (!tex_vk->external_img) {
- vkDestroyImage(vk->dev, tex_vk->img, MPVK_ALLOCATOR);
- vk_free_memslice(vk, tex_vk->mem);
- }
-
- talloc_free(tex);
-}
-
-MAKE_LAZY_DESTRUCTOR(vk_tex_destroy, struct ra_tex);
-
-// Initializes non-VkImage values like the image view, samplers, etc.
-static bool vk_init_image(struct ra *ra, struct ra_tex *tex)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
-
- struct ra_tex_params *params = &tex->params;
- struct ra_tex_vk *tex_vk = tex->priv;
- assert(tex_vk->img);
-
- tex_vk->current_layout = VK_IMAGE_LAYOUT_UNDEFINED;
- tex_vk->current_access = 0;
-
- if (params->render_src || params->render_dst) {
- static const VkImageViewType viewType[] = {
- [VK_IMAGE_TYPE_1D] = VK_IMAGE_VIEW_TYPE_1D,
- [VK_IMAGE_TYPE_2D] = VK_IMAGE_VIEW_TYPE_2D,
- [VK_IMAGE_TYPE_3D] = VK_IMAGE_VIEW_TYPE_3D,
- };
-
- const struct vk_format *fmt = params->format->priv;
- VkImageViewCreateInfo vinfo = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = tex_vk->img,
- .viewType = viewType[tex_vk->type],
- .format = fmt->iformat,
- .subresourceRange = vk_range,
- };
-
- VK(vkCreateImageView(vk->dev, &vinfo, MPVK_ALLOCATOR, &tex_vk->view));
- }
-
- if (params->render_src) {
- assert(params->format->linear_filter || !params->src_linear);
- VkFilter filter = params->src_linear
- ? VK_FILTER_LINEAR
- : VK_FILTER_NEAREST;
- VkSamplerAddressMode wrap = params->src_repeat
- ? VK_SAMPLER_ADDRESS_MODE_REPEAT
- : VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
- VkSamplerCreateInfo sinfo = {
- .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
- .magFilter = filter,
- .minFilter = filter,
- .addressModeU = wrap,
- .addressModeV = wrap,
- .addressModeW = wrap,
- .maxAnisotropy = 1.0,
- };
-
- VK(vkCreateSampler(vk->dev, &sinfo, MPVK_ALLOCATOR, &tex_vk->sampler));
- }
-
- if (params->render_dst) {
- // Framebuffers need to be created against a specific render pass
- // layout, so we need to temporarily create a skeleton/dummy render
- // pass for vulkan to figure out the compatibility
- VK(vk_create_render_pass(vk->dev, params->format,
- VK_ATTACHMENT_LOAD_OP_DONT_CARE,
- VK_IMAGE_LAYOUT_UNDEFINED,
- VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
- &tex_vk->dummyPass));
-
- VkFramebufferCreateInfo finfo = {
- .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
- .renderPass = tex_vk->dummyPass,
- .attachmentCount = 1,
- .pAttachments = &tex_vk->view,
- .width = tex->params.w,
- .height = tex->params.h,
- .layers = 1,
- };
-
- VK(vkCreateFramebuffer(vk->dev, &finfo, MPVK_ALLOCATOR,
- &tex_vk->framebuffer));
-
- // NOTE: Normally we would free the dummyPass again here, but a bug
- // in the nvidia vulkan driver causes a segfault if you do.
- }
-
- return true;
-
-error:
- return false;
-}
-
-static struct ra_tex *vk_tex_create(struct ra *ra,
- const struct ra_tex_params *params)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
- assert(!params->format->dummy_format);
-
- struct ra_tex *tex = talloc_zero(NULL, struct ra_tex);
- tex->params = *params;
- tex->params.initial_data = NULL;
-
- struct ra_tex_vk *tex_vk = tex->priv = talloc_zero(tex, struct ra_tex_vk);
- tex_vk->upload_queue = GRAPHICS;
-
- const struct vk_format *fmt = params->format->priv;
- switch (params->dimensions) {
- case 1: tex_vk->type = VK_IMAGE_TYPE_1D; break;
- case 2: tex_vk->type = VK_IMAGE_TYPE_2D; break;
- case 3: tex_vk->type = VK_IMAGE_TYPE_3D; break;
- default: abort();
- }
-
- VkImageUsageFlags usage = 0;
- if (params->render_src)
- usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
- if (params->render_dst)
- usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
- if (params->storage_dst)
- usage |= VK_IMAGE_USAGE_STORAGE_BIT;
- if (params->blit_src)
- usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
- if (params->host_mutable || params->blit_dst || params->initial_data)
- usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
-
- // Always use the transfer pool if available, for efficiency
- if (params->host_mutable && vk->pool_transfer)
- tex_vk->upload_queue = TRANSFER;
-
- // Double-check image usage support and fail immediately if invalid
- VkImageFormatProperties iprop;
- VkResult res = vkGetPhysicalDeviceImageFormatProperties(vk->physd,
- fmt->iformat, tex_vk->type, VK_IMAGE_TILING_OPTIMAL, usage, 0,
- &iprop);
- if (res == VK_ERROR_FORMAT_NOT_SUPPORTED) {
- return NULL;
- } else {
- VK_ASSERT(res, "Querying image format properties");
- }
-
- VkFormatProperties prop;
- vkGetPhysicalDeviceFormatProperties(vk->physd, fmt->iformat, &prop);
- VkFormatFeatureFlags flags = prop.optimalTilingFeatures;
-
- bool has_blit_src = flags & VK_FORMAT_FEATURE_BLIT_SRC_BIT,
- has_src_linear = flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
-
- if (params->w > iprop.maxExtent.width ||
- params->h > iprop.maxExtent.height ||
- params->d > iprop.maxExtent.depth ||
- (params->blit_src && !has_blit_src) ||
- (params->src_linear && !has_src_linear))
- {
- return NULL;
- }
-
- // FIXME: Since we can't keep track of queue family ownership properly,
- // and we don't know in advance what types of queue families this image
- // will belong to, we're forced to share all of our images between all
- // command pools.
- uint32_t qfs[3] = {0};
- for (int i = 0; i < vk->num_pools; i++)
- qfs[i] = vk->pools[i]->qf;
-
- VkImageCreateInfo iinfo = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
- .imageType = tex_vk->type,
- .format = fmt->iformat,
- .extent = (VkExtent3D) { params->w, params->h, params->d },
- .mipLevels = 1,
- .arrayLayers = 1,
- .samples = VK_SAMPLE_COUNT_1_BIT,
- .tiling = VK_IMAGE_TILING_OPTIMAL,
- .usage = usage,
- .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
- .sharingMode = vk->num_pools > 1 ? VK_SHARING_MODE_CONCURRENT
- : VK_SHARING_MODE_EXCLUSIVE,
- .queueFamilyIndexCount = vk->num_pools,
- .pQueueFamilyIndices = qfs,
- };
-
- VK(vkCreateImage(vk->dev, &iinfo, MPVK_ALLOCATOR, &tex_vk->img));
-
- VkMemoryPropertyFlags memFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
- VkMemoryRequirements reqs;
- vkGetImageMemoryRequirements(vk->dev, tex_vk->img, &reqs);
-
- struct vk_memslice *mem = &tex_vk->mem;
- if (!vk_malloc_generic(vk, reqs, memFlags, mem))
- goto error;
-
- VK(vkBindImageMemory(vk->dev, tex_vk->img, mem->vkmem, mem->offset));
-
- if (!vk_init_image(ra, tex))
- goto error;
-
- if (params->initial_data) {
- struct ra_tex_upload_params ul_params = {
- .tex = tex,
- .invalidate = true,
- .src = params->initial_data,
- .stride = params->w * fmt->bytes,
- };
- if (!ra->fns->tex_upload(ra, &ul_params))
- goto error;
- }
-
- return tex;
-
-error:
- vk_tex_destroy(ra, tex);
- return NULL;
-}
-
-struct ra_tex *ra_vk_wrap_swapchain_img(struct ra *ra, VkImage vkimg,
- VkSwapchainCreateInfoKHR info)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
- struct ra_tex *tex = NULL;
-
- const struct ra_format *format = NULL;
- for (int i = 0; i < ra->num_formats; i++) {
- const struct vk_format *fmt = ra->formats[i]->priv;
- if (fmt->iformat == vk->surf_format.format) {
- format = ra->formats[i];
- break;
- }
- }
-
- if (!format) {
- MP_ERR(ra, "Could not find ra_format suitable for wrapped swchain image "
- "with surface format 0x%x\n", vk->surf_format.format);
- goto error;
- }
-
- tex = talloc_zero(NULL, struct ra_tex);
- tex->params = (struct ra_tex_params) {
- .format = format,
- .dimensions = 2,
- .w = info.imageExtent.width,
- .h = info.imageExtent.height,
- .d = 1,
- .blit_src = !!(info.imageUsage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT),
- .blit_dst = !!(info.imageUsage & VK_IMAGE_USAGE_TRANSFER_DST_BIT),
- .render_src = !!(info.imageUsage & VK_IMAGE_USAGE_SAMPLED_BIT),
- .render_dst = !!(info.imageUsage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT),
- .storage_dst = !!(info.imageUsage & VK_IMAGE_USAGE_STORAGE_BIT),
- };
-
- struct ra_tex_vk *tex_vk = tex->priv = talloc_zero(tex, struct ra_tex_vk);
- tex_vk->type = VK_IMAGE_TYPE_2D;
- tex_vk->external_img = true;
- tex_vk->img = vkimg;
-
- if (!vk_init_image(ra, tex))
- goto error;
-
- return tex;
-
-error:
- vk_tex_destroy(ra, tex);
- return NULL;
-}
-
-// For ra_buf.priv
-struct ra_buf_vk {
- struct vk_bufslice slice;
- int refcount; // 1 = object allocated but not in use, > 1 = in use
- bool needsflush;
- enum queue_type update_queue;
- // "current" metadata, can change during course of execution
- VkPipelineStageFlags current_stage;
- VkAccessFlags current_access;
- // Arbitrary user data for the creator of a buffer
- void *user_data;
-};
-
-void ra_vk_buf_set_user_data(struct ra_buf *buf, void *user_data) {
- struct ra_buf_vk *vk_priv = buf->priv;
- vk_priv->user_data = user_data;
-}
-
-void *ra_vk_buf_get_user_data(struct ra_buf *buf) {
- struct ra_buf_vk *vk_priv = buf->priv;
- return vk_priv->user_data;
-}
-
-static void vk_buf_deref(struct ra *ra, struct ra_buf *buf)
-{
- if (!buf)
- return;
-
- struct mpvk_ctx *vk = ra_vk_get(ra);
- struct ra_buf_vk *buf_vk = buf->priv;
-
- if (--buf_vk->refcount == 0) {
- vk_free_memslice(vk, buf_vk->slice.mem);
- talloc_free(buf);
- }
-}
-
-static void buf_barrier(struct ra *ra, struct vk_cmd *cmd, struct ra_buf *buf,
- VkPipelineStageFlags newStage,
- VkAccessFlags newAccess, int offset, size_t size)
-{
- struct ra_buf_vk *buf_vk = buf->priv;
-
- VkBufferMemoryBarrier buffBarrier = {
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
- .srcAccessMask = buf_vk->current_access,
- .dstAccessMask = newAccess,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = buf_vk->slice.buf,
- .offset = offset,
- .size = size,
- };
-
- if (buf_vk->needsflush || buf->params.host_mapped) {
- buffBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
- buf_vk->current_stage = VK_PIPELINE_STAGE_HOST_BIT;
- buf_vk->needsflush = false;
- }
-
- if (buffBarrier.srcAccessMask != buffBarrier.dstAccessMask) {
- vkCmdPipelineBarrier(cmd->buf, buf_vk->current_stage, newStage, 0,
- 0, NULL, 1, &buffBarrier, 0, NULL);
- }
-
- buf_vk->current_stage = newStage;
- buf_vk->current_access = newAccess;
- buf_vk->refcount++;
- vk_cmd_callback(cmd, (vk_cb) vk_buf_deref, ra, buf);
-}
-
-#define vk_buf_destroy vk_buf_deref
-MAKE_LAZY_DESTRUCTOR(vk_buf_destroy, struct ra_buf);
-
-static void vk_buf_update(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset,
- const void *data, size_t size)
-{
- assert(buf->params.host_mutable || buf->params.initial_data);
- struct ra_buf_vk *buf_vk = buf->priv;
-
- // For host-mapped buffers, we can just directly memcpy the buffer contents.
- // Otherwise, we can update the buffer from the GPU using a command buffer.
- if (buf_vk->slice.data) {
- assert(offset + size <= buf->params.size);
- uintptr_t addr = (uintptr_t)buf_vk->slice.data + offset;
- memcpy((void *)addr, data, size);
- buf_vk->needsflush = true;
- } else {
- struct vk_cmd *cmd = vk_require_cmd(ra, buf_vk->update_queue);
- if (!cmd) {
- MP_ERR(ra, "Failed updating buffer!\n");
- return;
- }
-
- buf_barrier(ra, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT,
- VK_ACCESS_TRANSFER_WRITE_BIT, offset, size);
-
- VkDeviceSize bufOffset = buf_vk->slice.mem.offset + offset;
- assert(bufOffset == MP_ALIGN_UP(bufOffset, 4));
- vkCmdUpdateBuffer(cmd->buf, buf_vk->slice.buf, bufOffset, size, data);
- }
-}
-
-static struct ra_buf *vk_buf_create(struct ra *ra,
- const struct ra_buf_params *params)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
-
- struct ra_buf *buf = talloc_zero(NULL, struct ra_buf);
- buf->params = *params;
-
- struct ra_buf_vk *buf_vk = buf->priv = talloc_zero(buf, struct ra_buf_vk);
- buf_vk->current_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
- buf_vk->current_access = 0;
- buf_vk->refcount = 1;
-
- VkBufferUsageFlags bufFlags = 0;
- VkMemoryPropertyFlags memFlags = 0;
- VkDeviceSize align = 4; // alignment 4 is needed for buf_update
- bool exportable = false;
-
- switch (params->type) {
- case RA_BUF_TYPE_TEX_UPLOAD:
- bufFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
- memFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
- // Use TRANSFER-style updates for large enough buffers for efficiency
- if (params->size > 1024*1024) // 1 MB
- buf_vk->update_queue = TRANSFER;
- break;
- case RA_BUF_TYPE_UNIFORM:
- bufFlags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
- memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
- align = MP_ALIGN_UP(align, vk->limits.minUniformBufferOffsetAlignment);
- break;
- case RA_BUF_TYPE_SHADER_STORAGE:
- bufFlags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
- memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
- align = MP_ALIGN_UP(align, vk->limits.minStorageBufferOffsetAlignment);
- buf_vk->update_queue = COMPUTE;
- break;
- case RA_BUF_TYPE_VERTEX:
- bufFlags |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
- memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
- break;
- case RA_BUF_TYPE_SHARED_MEMORY:
- bufFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
- memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
- exportable = true;
- break;
- default: abort();
- }
-
- if (params->host_mutable || params->initial_data) {
- bufFlags |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
- align = MP_ALIGN_UP(align, vk->limits.optimalBufferCopyOffsetAlignment);
- }
-
- if (params->host_mapped) {
- memFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
- VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
- VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
- }
-
- if (!vk_malloc_buffer(vk, bufFlags, memFlags, params->size, align,
- exportable, &buf_vk->slice))
- {
- goto error;
- }
-
- if (params->host_mapped)
- buf->data = buf_vk->slice.data;
-
- if (params->initial_data)
- vk_buf_update(ra, buf, 0, params->initial_data, params->size);
-
- buf->params.initial_data = NULL; // do this after vk_buf_update
- return buf;
-
-error:
- vk_buf_destroy(ra, buf);
- return NULL;
-}
-
-static bool vk_buf_poll(struct ra *ra, struct ra_buf *buf)
-{
- struct ra_buf_vk *buf_vk = buf->priv;
- return buf_vk->refcount == 1;
-}
-
-static bool vk_tex_upload(struct ra *ra,
- const struct ra_tex_upload_params *params)
-{
- struct ra_tex *tex = params->tex;
- struct ra_tex_vk *tex_vk = tex->priv;
-
- if (!params->buf)
- return ra_tex_upload_pbo(ra, &tex_vk->pbo, params);
-
- assert(!params->src);
- assert(params->buf);
- struct ra_buf *buf = params->buf;
- struct ra_buf_vk *buf_vk = buf->priv;
-
- VkBufferImageCopy region = {
- .bufferOffset = buf_vk->slice.mem.offset + params->buf_offset,
- .bufferRowLength = tex->params.w,
- .bufferImageHeight = tex->params.h,
- .imageSubresource = vk_layers,
- .imageExtent = (VkExtent3D){tex->params.w, tex->params.h, tex->params.d},
- };
-
- if (tex->params.dimensions == 2) {
- int pix_size = tex->params.format->pixel_size;
- region.bufferRowLength = params->stride / pix_size;
- if (region.bufferRowLength * pix_size != params->stride) {
- MP_ERR(ra, "Texture upload strides must be a multiple of the texel "
- "size!\n");
- goto error;
- }
-
- if (params->rc) {
- struct mp_rect *rc = params->rc;
- region.imageOffset = (VkOffset3D){rc->x0, rc->y0, 0};
- region.imageExtent = (VkExtent3D){mp_rect_w(*rc), mp_rect_h(*rc), 1};
- region.bufferImageHeight = region.imageExtent.height;
- }
- }
-
- uint64_t size = region.bufferRowLength * region.bufferImageHeight *
- region.imageExtent.depth;
-
- struct vk_cmd *cmd = vk_require_cmd(ra, tex_vk->upload_queue);
- if (!cmd)
- goto error;
-
- buf_barrier(ra, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT,
- VK_ACCESS_TRANSFER_READ_BIT, region.bufferOffset, size);
-
- tex_barrier(ra, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT,
- VK_ACCESS_TRANSFER_WRITE_BIT,
- VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
- params->invalidate);
-
- vkCmdCopyBufferToImage(cmd->buf, buf_vk->slice.buf, tex_vk->img,
- tex_vk->current_layout, 1, &region);
-
- tex_signal(ra, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT);
-
- return true;
-
-error:
- return false;
-}
-
-static bool ra_vk_mem_get_external_info(struct ra *ra, struct vk_memslice *mem, struct vk_external_mem *ret)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
-
-#if HAVE_WIN32_DESKTOP
- HANDLE mem_handle;
-
- VkMemoryGetWin32HandleInfoKHR info = {
- .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
- .pNext = NULL,
- .memory = mem->vkmem,
- .handleType = IsWindows8OrGreater()
- ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR
- : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR,
- };
-
- VK_LOAD_PFN(vkGetMemoryWin32HandleKHR);
- VK(pfn_vkGetMemoryWin32HandleKHR(vk->dev, &info, &mem_handle));
-
- ret->mem_handle = mem_handle;
-#else
- int mem_fd;
-
- VkMemoryGetFdInfoKHR info = {
- .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
- .pNext = NULL,
- .memory = mem->vkmem,
- .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
- };
-
- VK_LOAD_PFN(vkGetMemoryFdKHR);
- VK(pfn_vkGetMemoryFdKHR(vk->dev, &info, &mem_fd));
-
- ret->mem_fd = mem_fd;
-#endif
- ret->size = mem->size;
- ret->offset = mem->offset;
- ret->mem_size = mem->slab_size;
-
- return true;
-
-error:
- return false;
-}
-
-bool ra_vk_buf_get_external_info(struct ra *ra, struct ra_buf *buf, struct vk_external_mem *ret)
-{
- if (buf->params.type != RA_BUF_TYPE_SHARED_MEMORY) {
- MP_ERR(ra, "Buffer must be of TYPE_SHARED_MEMORY to be able to export it...");
- return false;
- }
-
- struct ra_buf_vk *buf_vk = buf->priv;
- struct vk_memslice *mem = &buf_vk->slice.mem;
-
- return ra_vk_mem_get_external_info(ra, mem, ret);
-}
-
-#define MPVK_NUM_DS MPVK_MAX_STREAMING_DEPTH
-
-// For ra_renderpass.priv
-struct ra_renderpass_vk {
- // Pipeline / render pass
- VkPipeline pipe;
- VkPipelineLayout pipeLayout;
- VkRenderPass renderPass;
- VkImageLayout initialLayout;
- VkImageLayout finalLayout;
- // Descriptor set (bindings)
- VkDescriptorSetLayout dsLayout;
- VkDescriptorPool dsPool;
- VkDescriptorSet dss[MPVK_NUM_DS];
- int dindex;
- // Vertex buffers (vertices)
- struct ra_buf_pool vbo;
-
- // For updating
- VkWriteDescriptorSet *dswrite;
- VkDescriptorImageInfo *dsiinfo;
- VkDescriptorBufferInfo *dsbinfo;
-};
-
-static void vk_renderpass_destroy(struct ra *ra, struct ra_renderpass *pass)
-{
- if (!pass)
- return;
-
- struct mpvk_ctx *vk = ra_vk_get(ra);
- struct ra_renderpass_vk *pass_vk = pass->priv;
-
- ra_buf_pool_uninit(ra, &pass_vk->vbo);
- vkDestroyPipeline(vk->dev, pass_vk->pipe, MPVK_ALLOCATOR);
- vkDestroyRenderPass(vk->dev, pass_vk->renderPass, MPVK_ALLOCATOR);
- vkDestroyPipelineLayout(vk->dev, pass_vk->pipeLayout, MPVK_ALLOCATOR);
- vkDestroyDescriptorPool(vk->dev, pass_vk->dsPool, MPVK_ALLOCATOR);
- vkDestroyDescriptorSetLayout(vk->dev, pass_vk->dsLayout, MPVK_ALLOCATOR);
-
- talloc_free(pass);
-}
-
-MAKE_LAZY_DESTRUCTOR(vk_renderpass_destroy, struct ra_renderpass);
-
-static const VkDescriptorType dsType[] = {
- [RA_VARTYPE_TEX] = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
- [RA_VARTYPE_IMG_W] = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- [RA_VARTYPE_BUF_RO] = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
- [RA_VARTYPE_BUF_RW] = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-};
-
-static bool vk_get_input_format(struct ra *ra, struct ra_renderpass_input *inp,
- VkFormat *out_fmt)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
-
- enum ra_ctype ctype;
- switch (inp->type) {
- case RA_VARTYPE_FLOAT: ctype = RA_CTYPE_FLOAT; break;
- case RA_VARTYPE_BYTE_UNORM: ctype = RA_CTYPE_UNORM; break;
- default: abort();
- }
-
- assert(inp->dim_m == 1);
- for (const struct vk_format *fmt = vk_formats; fmt->name; fmt++) {
- if (fmt->ctype != ctype)
- continue;
- if (fmt->components != inp->dim_v)
- continue;
- if (fmt->bytes != ra_renderpass_input_layout(inp).size)
- continue;
-
- // Ensure this format is valid for vertex attributes
- VkFormatProperties prop;
- vkGetPhysicalDeviceFormatProperties(vk->physd, fmt->iformat, &prop);
- if (!(prop.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT))
- continue;
-
- *out_fmt = fmt->iformat;
- return true;
- }
-
- return false;
-}
-
-static const char vk_cache_magic[4] = {'R','A','V','K'};
-static const int vk_cache_version = 2;
-
-struct vk_cache_header {
- char magic[sizeof(vk_cache_magic)];
- int cache_version;
- char compiler[SPIRV_NAME_MAX_LEN];
- int compiler_version;
- size_t vert_spirv_len;
- size_t frag_spirv_len;
- size_t comp_spirv_len;
- size_t pipecache_len;
-};
-
-static bool vk_use_cached_program(const struct ra_renderpass_params *params,
- const struct spirv_compiler *spirv,
- struct bstr *vert_spirv,
- struct bstr *frag_spirv,
- struct bstr *comp_spirv,
- struct bstr *pipecache)
-{
- struct bstr cache = params->cached_program;
- if (cache.len < sizeof(struct vk_cache_header))
- return false;
-
- struct vk_cache_header *header = (struct vk_cache_header *)cache.start;
- cache = bstr_cut(cache, sizeof(*header));
-
- if (strncmp(header->magic, vk_cache_magic, sizeof(vk_cache_magic)) != 0)
- return false;
- if (header->cache_version != vk_cache_version)
- return false;
- if (strncmp(header->compiler, spirv->name, sizeof(header->compiler)) != 0)
- return false;
- if (header->compiler_version != spirv->compiler_version)
- return false;
-
-#define GET(ptr) \
- if (cache.len < header->ptr##_len) \
- return false; \
- *ptr = bstr_splice(cache, 0, header->ptr##_len); \
- cache = bstr_cut(cache, ptr->len);
-
- GET(vert_spirv);
- GET(frag_spirv);
- GET(comp_spirv);
- GET(pipecache);
- return true;
-}
-
-static VkResult vk_compile_glsl(struct ra *ra, void *tactx,
- enum glsl_shader type, const char *glsl,
- struct bstr *spirv)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
- VkResult ret = VK_SUCCESS;
- int msgl = MSGL_DEBUG;
-
- if (!vk->spirv->fns->compile_glsl(vk->spirv, tactx, type, glsl, spirv)) {
- ret = VK_ERROR_INVALID_SHADER_NV;
- msgl = MSGL_ERR;
- }
-
- static const char *shader_names[] = {
- [GLSL_SHADER_VERTEX] = "vertex",
- [GLSL_SHADER_FRAGMENT] = "fragment",
- [GLSL_SHADER_COMPUTE] = "compute",
- };
-
- if (mp_msg_test(ra->log, msgl)) {
- MP_MSG(ra, msgl, "%s shader source:\n", shader_names[type]);
- mp_log_source(ra->log, msgl, glsl);
- }
- return ret;
-}
-
-static const VkShaderStageFlags stageFlags[] = {
- [RA_RENDERPASS_TYPE_RASTER] = VK_SHADER_STAGE_FRAGMENT_BIT,
- [RA_RENDERPASS_TYPE_COMPUTE] = VK_SHADER_STAGE_COMPUTE_BIT,
-};
-
-static struct ra_renderpass *vk_renderpass_create(struct ra *ra,
- const struct ra_renderpass_params *params)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
- bool success = false;
- assert(vk->spirv);
-
- struct ra_renderpass *pass = talloc_zero(NULL, struct ra_renderpass);
- pass->params = *ra_renderpass_params_copy(pass, params);
- pass->params.cached_program = (bstr){0};
- struct ra_renderpass_vk *pass_vk = pass->priv =
- talloc_zero(pass, struct ra_renderpass_vk);
-
- // temporary allocations/objects
- void *tmp = talloc_new(NULL);
- VkPipelineCache pipeCache = NULL;
- VkShaderModule vert_shader = NULL;
- VkShaderModule frag_shader = NULL;
- VkShaderModule comp_shader = NULL;
-
- static int dsCount[RA_VARTYPE_COUNT] = {0};
- VkDescriptorSetLayoutBinding *bindings = NULL;
- int num_bindings = 0;
-
- for (int i = 0; i < params->num_inputs; i++) {
- struct ra_renderpass_input *inp = &params->inputs[i];
- switch (inp->type) {
- case RA_VARTYPE_TEX:
- case RA_VARTYPE_IMG_W:
- case RA_VARTYPE_BUF_RO:
- case RA_VARTYPE_BUF_RW: {
- VkDescriptorSetLayoutBinding desc = {
- .binding = inp->binding,
- .descriptorType = dsType[inp->type],
- .descriptorCount = 1,
- .stageFlags = stageFlags[params->type],
- };
-
- MP_TARRAY_APPEND(tmp, bindings, num_bindings, desc);
- dsCount[inp->type]++;
- break;
- }
- default: abort();
- }
- }
-
- VkDescriptorPoolSize *dsPoolSizes = NULL;
- int poolSizeCount = 0;
-
- for (enum ra_vartype t = 0; t < RA_VARTYPE_COUNT; t++) {
- if (dsCount[t] > 0) {
- VkDescriptorPoolSize dssize = {
- .type = dsType[t],
- .descriptorCount = dsCount[t] * MPVK_NUM_DS,
- };
-
- MP_TARRAY_APPEND(tmp, dsPoolSizes, poolSizeCount, dssize);
- }
- }
-
- VkDescriptorPoolCreateInfo pinfo = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
- .maxSets = MPVK_NUM_DS,
- .pPoolSizes = dsPoolSizes,
- .poolSizeCount = poolSizeCount,
- };
-
- VK(vkCreateDescriptorPool(vk->dev, &pinfo, MPVK_ALLOCATOR, &pass_vk->dsPool));
-
- pass_vk->dswrite = talloc_array(pass, VkWriteDescriptorSet, num_bindings);
- pass_vk->dsiinfo = talloc_array(pass, VkDescriptorImageInfo, num_bindings);
- pass_vk->dsbinfo = talloc_array(pass, VkDescriptorBufferInfo, num_bindings);
-
- VkDescriptorSetLayoutCreateInfo dinfo = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .pBindings = bindings,
- .bindingCount = num_bindings,
- };
-
- VK(vkCreateDescriptorSetLayout(vk->dev, &dinfo, MPVK_ALLOCATOR,
- &pass_vk->dsLayout));
-
- VkDescriptorSetLayout layouts[MPVK_NUM_DS];
- for (int i = 0; i < MPVK_NUM_DS; i++)
- layouts[i] = pass_vk->dsLayout;
-
- VkDescriptorSetAllocateInfo ainfo = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
- .descriptorPool = pass_vk->dsPool,
- .descriptorSetCount = MPVK_NUM_DS,
- .pSetLayouts = layouts,
- };
-
- VK(vkAllocateDescriptorSets(vk->dev, &ainfo, pass_vk->dss));
-
- VkPipelineLayoutCreateInfo linfo = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 1,
- .pSetLayouts = &pass_vk->dsLayout,
- .pushConstantRangeCount = params->push_constants_size ? 1 : 0,
- .pPushConstantRanges = &(VkPushConstantRange){
- .stageFlags = stageFlags[params->type],
- .offset = 0,
- .size = params->push_constants_size,
- },
- };
-
- VK(vkCreatePipelineLayout(vk->dev, &linfo, MPVK_ALLOCATOR,
- &pass_vk->pipeLayout));
-
- struct bstr vert = {0}, frag = {0}, comp = {0}, pipecache = {0};
- if (vk_use_cached_program(params, vk->spirv, &vert, &frag, &comp, &pipecache)) {
- MP_VERBOSE(ra, "Using cached SPIR-V and VkPipeline.\n");
- } else {
- pipecache.len = 0;
- switch (params->type) {
- case RA_RENDERPASS_TYPE_RASTER:
- VK(vk_compile_glsl(ra, tmp, GLSL_SHADER_VERTEX,
- params->vertex_shader, &vert));
- VK(vk_compile_glsl(ra, tmp, GLSL_SHADER_FRAGMENT,
- params->frag_shader, &frag));
- comp.len = 0;
- break;
- case RA_RENDERPASS_TYPE_COMPUTE:
- VK(vk_compile_glsl(ra, tmp, GLSL_SHADER_COMPUTE,
- params->compute_shader, &comp));
- frag.len = 0;
- vert.len = 0;
- break;
- }
- }
-
- VkPipelineCacheCreateInfo pcinfo = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO,
- .pInitialData = pipecache.start,
- .initialDataSize = pipecache.len,
- };
-
- VK(vkCreatePipelineCache(vk->dev, &pcinfo, MPVK_ALLOCATOR, &pipeCache));
-
- VkShaderModuleCreateInfo sinfo = {
- .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
- };
-
- switch (params->type) {
- case RA_RENDERPASS_TYPE_RASTER: {
- sinfo.pCode = (uint32_t *)vert.start;
- sinfo.codeSize = vert.len;
- VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &vert_shader));
-
- sinfo.pCode = (uint32_t *)frag.start;
- sinfo.codeSize = frag.len;
- VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &frag_shader));
-
- VkVertexInputAttributeDescription *attrs = talloc_array(tmp,
- VkVertexInputAttributeDescription, params->num_vertex_attribs);
-
- for (int i = 0; i < params->num_vertex_attribs; i++) {
- struct ra_renderpass_input *inp = &params->vertex_attribs[i];
- attrs[i] = (VkVertexInputAttributeDescription) {
- .location = i,
- .binding = 0,
- .offset = inp->offset,
- };
-
- if (!vk_get_input_format(ra, inp, &attrs[i].format)) {
- MP_ERR(ra, "No suitable VkFormat for vertex attrib '%s'!\n",
- inp->name);
- goto error;
- }
- }
-
- // This is the most common case, so optimize towards it. In this case,
- // the renderpass will take care of almost all layout transitions
- pass_vk->initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
- pass_vk->finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
- VkAttachmentLoadOp loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
-
- // If we're blending, then we need to explicitly load the previous
- // contents of the color attachment
- if (pass->params.enable_blend)
- loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
-
- // If we're invalidating the target, we don't need to load or transition
- if (pass->params.invalidate_target) {
- pass_vk->initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
- loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
- }
-
- VK(vk_create_render_pass(vk->dev, params->target_format, loadOp,
- pass_vk->initialLayout, pass_vk->finalLayout,
- &pass_vk->renderPass));
-
- static const VkBlendFactor blendFactors[] = {
- [RA_BLEND_ZERO] = VK_BLEND_FACTOR_ZERO,
- [RA_BLEND_ONE] = VK_BLEND_FACTOR_ONE,
- [RA_BLEND_SRC_ALPHA] = VK_BLEND_FACTOR_SRC_ALPHA,
- [RA_BLEND_ONE_MINUS_SRC_ALPHA] = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
- };
-
- VkGraphicsPipelineCreateInfo cinfo = {
- .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
- .stageCount = 2,
- .pStages = (VkPipelineShaderStageCreateInfo[]) {
- {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_VERTEX_BIT,
- .module = vert_shader,
- .pName = "main",
- }, {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
- .module = frag_shader,
- .pName = "main",
- }
- },
- .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
- .vertexBindingDescriptionCount = 1,
- .pVertexBindingDescriptions = &(VkVertexInputBindingDescription) {
- .binding = 0,
- .stride = params->vertex_stride,
- .inputRate = VK_VERTEX_INPUT_RATE_VERTEX,
- },
- .vertexAttributeDescriptionCount = params->num_vertex_attribs,
- .pVertexAttributeDescriptions = attrs,
- },
- .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
- .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
- },
- .pViewportState = &(VkPipelineViewportStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .viewportCount = 1,
- .scissorCount = 1,
- },
- .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
- .polygonMode = VK_POLYGON_MODE_FILL,
- .cullMode = VK_CULL_MODE_NONE,
- .lineWidth = 1.0f,
- },
- .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
- .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
- },
- .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
- .attachmentCount = 1,
- .pAttachments = &(VkPipelineColorBlendAttachmentState) {
- .blendEnable = params->enable_blend,
- .colorBlendOp = VK_BLEND_OP_ADD,
- .srcColorBlendFactor = blendFactors[params->blend_src_rgb],
- .dstColorBlendFactor = blendFactors[params->blend_dst_rgb],
- .alphaBlendOp = VK_BLEND_OP_ADD,
- .srcAlphaBlendFactor = blendFactors[params->blend_src_alpha],
- .dstAlphaBlendFactor = blendFactors[params->blend_dst_alpha],
- .colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
- VK_COLOR_COMPONENT_G_BIT |
- VK_COLOR_COMPONENT_B_BIT |
- VK_COLOR_COMPONENT_A_BIT,
- },
- },
- .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .dynamicStateCount = 2,
- .pDynamicStates = (VkDynamicState[]){
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
- },
- },
- .layout = pass_vk->pipeLayout,
- .renderPass = pass_vk->renderPass,
- };
-
- VK(vkCreateGraphicsPipelines(vk->dev, pipeCache, 1, &cinfo,
- MPVK_ALLOCATOR, &pass_vk->pipe));
- break;
- }
- case RA_RENDERPASS_TYPE_COMPUTE: {
- sinfo.pCode = (uint32_t *)comp.start;
- sinfo.codeSize = comp.len;
- VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &comp_shader));
-
- VkComputePipelineCreateInfo cinfo = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = comp_shader,
- .pName = "main",
- },
- .layout = pass_vk->pipeLayout,
- };
-
- VK(vkCreateComputePipelines(vk->dev, pipeCache, 1, &cinfo,
- MPVK_ALLOCATOR, &pass_vk->pipe));
- break;
- }
- }
-
- // Update params->cached_program
- struct bstr cache = {0};
- VK(vkGetPipelineCacheData(vk->dev, pipeCache, &cache.len, NULL));
- cache.start = talloc_size(tmp, cache.len);
- VK(vkGetPipelineCacheData(vk->dev, pipeCache, &cache.len, cache.start));
-
- struct vk_cache_header header = {
- .cache_version = vk_cache_version,
- .compiler_version = vk->spirv->compiler_version,
- .vert_spirv_len = vert.len,
- .frag_spirv_len = frag.len,
- .comp_spirv_len = comp.len,
- .pipecache_len = cache.len,
- };
-
- for (int i = 0; i < MP_ARRAY_SIZE(header.magic); i++)
- header.magic[i] = vk_cache_magic[i];
- for (int i = 0; i < sizeof(vk->spirv->name); i++)
- header.compiler[i] = vk->spirv->name[i];
-
- struct bstr *prog = &pass->params.cached_program;
- bstr_xappend(pass, prog, (struct bstr){ (char *) &header, sizeof(header) });
- bstr_xappend(pass, prog, vert);
- bstr_xappend(pass, prog, frag);
- bstr_xappend(pass, prog, comp);
- bstr_xappend(pass, prog, cache);
-
- success = true;
-
-error:
- if (!success) {
- vk_renderpass_destroy(ra, pass);
- pass = NULL;
- }
-
- vkDestroyShaderModule(vk->dev, vert_shader, MPVK_ALLOCATOR);
- vkDestroyShaderModule(vk->dev, frag_shader, MPVK_ALLOCATOR);
- vkDestroyShaderModule(vk->dev, comp_shader, MPVK_ALLOCATOR);
- vkDestroyPipelineCache(vk->dev, pipeCache, MPVK_ALLOCATOR);
- talloc_free(tmp);
- return pass;
-}
-
-static const VkPipelineStageFlags passStages[] = {
- [RA_RENDERPASS_TYPE_RASTER] = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
- [RA_RENDERPASS_TYPE_COMPUTE] = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
-};
-
-static void vk_update_descriptor(struct ra *ra, struct vk_cmd *cmd,
- struct ra_renderpass *pass,
- struct ra_renderpass_input_val val,
- VkDescriptorSet ds, int idx)
-{
- struct ra_renderpass_vk *pass_vk = pass->priv;
- struct ra_renderpass_input *inp = &pass->params.inputs[val.index];
-
- VkWriteDescriptorSet *wds = &pass_vk->dswrite[idx];
- *wds = (VkWriteDescriptorSet) {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstSet = ds,
- .dstBinding = inp->binding,
- .descriptorCount = 1,
- .descriptorType = dsType[inp->type],
- };
-
- switch (inp->type) {
- case RA_VARTYPE_TEX: {
- struct ra_tex *tex = *(struct ra_tex **)val.data;
- struct ra_tex_vk *tex_vk = tex->priv;
-
- assert(tex->params.render_src);
- tex_barrier(ra, cmd, tex, passStages[pass->params.type],
- VK_ACCESS_SHADER_READ_BIT,
- VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, false);
-
- VkDescriptorImageInfo *iinfo = &pass_vk->dsiinfo[idx];
- *iinfo = (VkDescriptorImageInfo) {
- .sampler = tex_vk->sampler,
- .imageView = tex_vk->view,
- .imageLayout = tex_vk->current_layout,
- };
-
- wds->pImageInfo = iinfo;
- break;
- }
- case RA_VARTYPE_IMG_W: {
- struct ra_tex *tex = *(struct ra_tex **)val.data;
- struct ra_tex_vk *tex_vk = tex->priv;
-
- assert(tex->params.storage_dst);
- tex_barrier(ra, cmd, tex, passStages[pass->params.type],
- VK_ACCESS_SHADER_WRITE_BIT,
- VK_IMAGE_LAYOUT_GENERAL, false);
-
- VkDescriptorImageInfo *iinfo = &pass_vk->dsiinfo[idx];
- *iinfo = (VkDescriptorImageInfo) {
- .imageView = tex_vk->view,
- .imageLayout = tex_vk->current_layout,
- };
-
- wds->pImageInfo = iinfo;
- break;
- }
- case RA_VARTYPE_BUF_RO:
- case RA_VARTYPE_BUF_RW: {
- struct ra_buf *buf = *(struct ra_buf **)val.data;
- struct ra_buf_vk *buf_vk = buf->priv;
-
- VkBufferUsageFlags access = VK_ACCESS_SHADER_READ_BIT;
- if (inp->type == RA_VARTYPE_BUF_RW)
- access |= VK_ACCESS_SHADER_WRITE_BIT;
-
- buf_barrier(ra, cmd, buf, passStages[pass->params.type],
- access, buf_vk->slice.mem.offset, buf->params.size);
-
- VkDescriptorBufferInfo *binfo = &pass_vk->dsbinfo[idx];
- *binfo = (VkDescriptorBufferInfo) {
- .buffer = buf_vk->slice.buf,
- .offset = buf_vk->slice.mem.offset,
- .range = buf->params.size,
- };
-
- wds->pBufferInfo = binfo;
- break;
- }
- }
-}
-
-static void vk_release_descriptor(struct ra *ra, struct vk_cmd *cmd,
- struct ra_renderpass *pass,
- struct ra_renderpass_input_val val)
-{
- struct ra_renderpass_input *inp = &pass->params.inputs[val.index];
-
- switch (inp->type) {
- case RA_VARTYPE_IMG_W:
- case RA_VARTYPE_TEX: {
- struct ra_tex *tex = *(struct ra_tex **)val.data;
- tex_signal(ra, cmd, tex, passStages[pass->params.type]);
- break;
- }
- }
-}
-
-static void vk_renderpass_run(struct ra *ra,
- const struct ra_renderpass_run_params *params)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
- struct ra_renderpass *pass = params->pass;
- struct ra_renderpass_vk *pass_vk = pass->priv;
-
- static const enum queue_type types[] = {
- [RA_RENDERPASS_TYPE_RASTER] = GRAPHICS,
- [RA_RENDERPASS_TYPE_COMPUTE] = COMPUTE,
- };
-
- struct vk_cmd *cmd = vk_require_cmd(ra, types[pass->params.type]);
- if (!cmd)
- goto error;
-
- static const VkPipelineBindPoint bindPoint[] = {
- [RA_RENDERPASS_TYPE_RASTER] = VK_PIPELINE_BIND_POINT_GRAPHICS,
- [RA_RENDERPASS_TYPE_COMPUTE] = VK_PIPELINE_BIND_POINT_COMPUTE,
- };
-
- vkCmdBindPipeline(cmd->buf, bindPoint[pass->params.type], pass_vk->pipe);
-
- VkDescriptorSet ds = pass_vk->dss[pass_vk->dindex++];
- pass_vk->dindex %= MPVK_NUM_DS;
-
- for (int i = 0; i < params->num_values; i++)
- vk_update_descriptor(ra, cmd, pass, params->values[i], ds, i);
-
- if (params->num_values > 0) {
- vkUpdateDescriptorSets(vk->dev, params->num_values, pass_vk->dswrite,
- 0, NULL);
- }
-
- vkCmdBindDescriptorSets(cmd->buf, bindPoint[pass->params.type],
- pass_vk->pipeLayout, 0, 1, &ds, 0, NULL);
-
- if (pass->params.push_constants_size) {
- vkCmdPushConstants(cmd->buf, pass_vk->pipeLayout,
- stageFlags[pass->params.type], 0,
- pass->params.push_constants_size,
- params->push_constants);
- }
-
- switch (pass->params.type) {
- case RA_RENDERPASS_TYPE_COMPUTE:
- vkCmdDispatch(cmd->buf, params->compute_groups[0],
- params->compute_groups[1],
- params->compute_groups[2]);
- break;
- case RA_RENDERPASS_TYPE_RASTER: {
- struct ra_tex *tex = params->target;
- struct ra_tex_vk *tex_vk = tex->priv;
- assert(tex->params.render_dst);
-
- struct ra_buf_params buf_params = {
- .type = RA_BUF_TYPE_VERTEX,
- .size = params->vertex_count * pass->params.vertex_stride,
- .host_mutable = true,
- };
-
- struct ra_buf *buf = ra_buf_pool_get(ra, &pass_vk->vbo, &buf_params);
- if (!buf) {
- MP_ERR(ra, "Failed allocating vertex buffer!\n");
- goto error;
- }
- struct ra_buf_vk *buf_vk = buf->priv;
-
- vk_buf_update(ra, buf, 0, params->vertex_data, buf_params.size);
-
- buf_barrier(ra, cmd, buf, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
- VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
- buf_vk->slice.mem.offset, buf_params.size);
-
- vkCmdBindVertexBuffers(cmd->buf, 0, 1, &buf_vk->slice.buf,
- &buf_vk->slice.mem.offset);
-
- tex_barrier(ra, cmd, tex, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
- VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, pass_vk->initialLayout,
- pass->params.invalidate_target);
-
- VkViewport viewport = {
- .x = params->viewport.x0,
- .y = params->viewport.y0,
- .width = mp_rect_w(params->viewport),
- .height = mp_rect_h(params->viewport),
- };
-
- VkRect2D scissor = {
- .offset = {params->scissors.x0, params->scissors.y0},
- .extent = {mp_rect_w(params->scissors), mp_rect_h(params->scissors)},
- };
-
- vkCmdSetViewport(cmd->buf, 0, 1, &viewport);
- vkCmdSetScissor(cmd->buf, 0, 1, &scissor);
-
- VkRenderPassBeginInfo binfo = {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
- .renderPass = pass_vk->renderPass,
- .framebuffer = tex_vk->framebuffer,
- .renderArea = (VkRect2D){{0, 0}, {tex->params.w, tex->params.h}},
- };
-
- vkCmdBeginRenderPass(cmd->buf, &binfo, VK_SUBPASS_CONTENTS_INLINE);
- vkCmdDraw(cmd->buf, params->vertex_count, 1, 0, 0);
- vkCmdEndRenderPass(cmd->buf);
-
- // The renderPass implicitly transitions the texture to this layout
- tex_vk->current_layout = pass_vk->finalLayout;
- tex_vk->current_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
- tex_signal(ra, cmd, tex, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT);
- break;
- }
- default: abort();
- };
-
- for (int i = 0; i < params->num_values; i++)
- vk_release_descriptor(ra, cmd, pass, params->values[i]);
-
- // flush the work so far into its own command buffer, for better cross-frame
- // granularity
- vk_submit(ra);
-
-error:
- return;
-}
-
-static void vk_blit(struct ra *ra, struct ra_tex *dst, struct ra_tex *src,
- struct mp_rect *dst_rc, struct mp_rect *src_rc)
-{
- assert(src->params.blit_src);
- assert(dst->params.blit_dst);
-
- struct ra_tex_vk *src_vk = src->priv;
- struct ra_tex_vk *dst_vk = dst->priv;
-
- struct vk_cmd *cmd = vk_require_cmd(ra, GRAPHICS);
- if (!cmd)
- return;
-
- tex_barrier(ra, cmd, src, VK_PIPELINE_STAGE_TRANSFER_BIT,
- VK_ACCESS_TRANSFER_READ_BIT,
- VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
- false);
-
- bool discard = dst_rc->x0 == 0 &&
- dst_rc->y0 == 0 &&
- dst_rc->x1 == dst->params.w &&
- dst_rc->y1 == dst->params.h;
-
- tex_barrier(ra, cmd, dst, VK_PIPELINE_STAGE_TRANSFER_BIT,
- VK_ACCESS_TRANSFER_WRITE_BIT,
- VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
- discard);
-
- // Under certain conditions we can use vkCmdCopyImage instead of
- // vkCmdBlitImage, namely when the blit operation does not require
- // scaling. and the formats are compatible.
- if (src->params.format->pixel_size == dst->params.format->pixel_size &&
- mp_rect_w(*src_rc) == mp_rect_w(*dst_rc) &&
- mp_rect_h(*src_rc) == mp_rect_h(*dst_rc) &&
- mp_rect_w(*src_rc) >= 0 && mp_rect_h(*src_rc) >= 0)
- {
- VkImageCopy region = {
- .srcSubresource = vk_layers,
- .dstSubresource = vk_layers,
- .srcOffset = {src_rc->x0, src_rc->y0, 0},
- .dstOffset = {dst_rc->x0, dst_rc->y0, 0},
- .extent = {mp_rect_w(*src_rc), mp_rect_h(*src_rc), 1},
- };
-
- vkCmdCopyImage(cmd->buf, src_vk->img, src_vk->current_layout,
- dst_vk->img, dst_vk->current_layout, 1, &region);
- } else {
- VkImageBlit region = {
- .srcSubresource = vk_layers,
- .dstSubresource = vk_layers,
- .srcOffsets = {{src_rc->x0, src_rc->y0, 0},
- {src_rc->x1, src_rc->y1, 1}},
- .dstOffsets = {{dst_rc->x0, dst_rc->y0, 0},
- {dst_rc->x1, dst_rc->y1, 1}},
- };
-
- vkCmdBlitImage(cmd->buf, src_vk->img, src_vk->current_layout,
- dst_vk->img, dst_vk->current_layout, 1, &region,
- VK_FILTER_NEAREST);
- }
-
- tex_signal(ra, cmd, src, VK_PIPELINE_STAGE_TRANSFER_BIT);
- tex_signal(ra, cmd, dst, VK_PIPELINE_STAGE_TRANSFER_BIT);
-}
-
-static void vk_clear(struct ra *ra, struct ra_tex *tex, float color[4],
- struct mp_rect *rc)
-{
- struct ra_vk *p = ra->priv;
- struct ra_tex_vk *tex_vk = tex->priv;
- assert(tex->params.blit_dst);
-
- struct vk_cmd *cmd = vk_require_cmd(ra, GRAPHICS);
- if (!cmd)
- return;
-
- struct mp_rect full = {0, 0, tex->params.w, tex->params.h};
- if (!rc || mp_rect_equals(rc, &full)) {
- // To clear the entire image, we can use the efficient clear command
- tex_barrier(ra, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT,
- VK_ACCESS_TRANSFER_WRITE_BIT,
- VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, true);
-
- VkClearColorValue clearColor = {0};
- for (int c = 0; c < 4; c++)
- clearColor.float32[c] = color[c];
-
- vkCmdClearColorImage(cmd->buf, tex_vk->img, tex_vk->current_layout,
- &clearColor, 1, &vk_range);
-
- tex_signal(ra, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT);
- } else {
- // To simulate per-region clearing, we blit from a 1x1 texture instead
- struct ra_tex_upload_params ul_params = {
- .tex = p->clear_tex,
- .invalidate = true,
- .src = &color[0],
- };
- vk_tex_upload(ra, &ul_params);
- vk_blit(ra, tex, p->clear_tex, rc, &(struct mp_rect){0, 0, 1, 1});
- }
-}
-
-static int vk_desc_namespace(struct ra *ra, enum ra_vartype type)
-{
- return 0;
-}
-
-#define VK_QUERY_POOL_SIZE (MPVK_MAX_STREAMING_DEPTH * 4)
-
-struct vk_timer {
- VkQueryPool pool;
- int index_seen; // keeps track of which indices have been used at least once
- int index;
- uint64_t result;
-};
-
-static void vk_timer_destroy(struct ra *ra, ra_timer *ratimer)
-{
- if (!ratimer)
- return;
-
- struct mpvk_ctx *vk = ra_vk_get(ra);
- struct vk_timer *timer = ratimer;
-
- vkDestroyQueryPool(vk->dev, timer->pool, MPVK_ALLOCATOR);
-
- talloc_free(timer);
-}
-
-MAKE_LAZY_DESTRUCTOR(vk_timer_destroy, ra_timer);
-
-static ra_timer *vk_timer_create(struct ra *ra)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
-
- struct vk_timer *timer = talloc_zero(NULL, struct vk_timer);
- timer->index_seen = -1;
-
- struct VkQueryPoolCreateInfo qinfo = {
- .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
- .queryType = VK_QUERY_TYPE_TIMESTAMP,
- .queryCount = VK_QUERY_POOL_SIZE,
- };
-
- VK(vkCreateQueryPool(vk->dev, &qinfo, MPVK_ALLOCATOR, &timer->pool));
-
- return (ra_timer *)timer;
-
-error:
- vk_timer_destroy(ra, timer);
- return NULL;
-}
-
-static void vk_timer_record(struct ra *ra, VkQueryPool pool, int index,
- VkPipelineStageFlags stage)
-{
- struct vk_cmd *cmd = vk_require_cmd(ra, GRAPHICS);
- if (!cmd)
- return;
-
- vkCmdWriteTimestamp(cmd->buf, stage, pool, index);
-}
-
-static void vk_timer_start(struct ra *ra, ra_timer *ratimer)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
- struct vk_timer *timer = ratimer;
-
- VkResult res = VK_NOT_READY;
- uint64_t out[2];
-
- if (timer->index <= timer->index_seen) {
- res = vkGetQueryPoolResults(vk->dev, timer->pool, timer->index, 2,
- sizeof(out), &out[0], sizeof(uint64_t),
- VK_QUERY_RESULT_64_BIT);
- }
-
- switch (res) {
- case VK_SUCCESS:
- timer->result = (out[1] - out[0]) * vk->limits.timestampPeriod;
- break;
- case VK_NOT_READY:
- timer->result = 0;
- break;
- default:
- MP_WARN(vk, "Failed reading timer query result: %s\n", vk_err(res));
- return;
- };
-
- vk_timer_record(ra, timer->pool, timer->index,
- VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
-}
-
-static uint64_t vk_timer_stop(struct ra *ra, ra_timer *ratimer)
-{
- struct vk_timer *timer = ratimer;
- vk_timer_record(ra, timer->pool, timer->index + 1,
- VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
-
- timer->index_seen = MPMAX(timer->index_seen, timer->index);
- timer->index = (timer->index + 2) % VK_QUERY_POOL_SIZE;
-
- return timer->result;
-}
-
-static struct ra_fns ra_fns_vk = {
- .destroy = vk_destroy_ra,
- .tex_create = vk_tex_create,
- .tex_destroy = vk_tex_destroy_lazy,
- .tex_upload = vk_tex_upload,
- .buf_create = vk_buf_create,
- .buf_destroy = vk_buf_destroy_lazy,
- .buf_update = vk_buf_update,
- .buf_poll = vk_buf_poll,
- .clear = vk_clear,
- .blit = vk_blit,
- .uniform_layout = std140_layout,
- .push_constant_layout = std430_layout,
- .desc_namespace = vk_desc_namespace,
- .renderpass_create = vk_renderpass_create,
- .renderpass_destroy = vk_renderpass_destroy_lazy,
- .renderpass_run = vk_renderpass_run,
- .timer_create = vk_timer_create,
- .timer_destroy = vk_timer_destroy_lazy,
- .timer_start = vk_timer_start,
- .timer_stop = vk_timer_stop,
-};
-
-struct vk_cmd *ra_vk_submit(struct ra *ra, struct ra_tex *tex)
-{
- struct ra_vk *p = ra->priv;
- struct vk_cmd *cmd = vk_require_cmd(ra, GRAPHICS);
- if (!cmd)
- return NULL;
-
- struct ra_tex_vk *tex_vk = tex->priv;
- assert(tex_vk->external_img);
- tex_barrier(ra, cmd, tex, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- VK_ACCESS_MEMORY_READ_BIT, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
- false);
-
- // Return this directly instead of going through vk_submit
- p->cmd = NULL;
- return cmd;
-}
diff --git a/video/out/vulkan/ra_vk.h b/video/out/vulkan/ra_vk.h
deleted file mode 100644
index 393c01a3c1..0000000000
--- a/video/out/vulkan/ra_vk.h
+++ /dev/null
@@ -1,51 +0,0 @@
-#pragma once
-
-#include "video/out/gpu/ra.h"
-
-#include "common.h"
-#include "utils.h"
-
-struct ra *ra_create_vk(struct mpvk_ctx *vk, struct mp_log *log);
-
-// Access to the VkDevice is needed for swapchain creation
-VkDevice ra_vk_get_dev(struct ra *ra);
-
-// Allocates a ra_tex that wraps a swapchain image. The contents of the image
-// will be invalidated, and access to it will only be internally synchronized.
-// So the calling could should not do anything else with the VkImage.
-struct ra_tex *ra_vk_wrap_swapchain_img(struct ra *ra, VkImage vkimg,
- VkSwapchainCreateInfoKHR info);
-
-// Associates an external semaphore (dependency) with a ra_tex, such that this
-// ra_tex will not be used by the ra_vk until the external semaphore fires.
-void ra_tex_vk_external_dep(struct ra *ra, struct ra_tex *tex, VkSemaphore dep);
-
-// This function finalizes rendering, transitions `tex` (which must be a
-// wrapped swapchain image) into a format suitable for presentation, and returns
-// the resulting command buffer (or NULL on error). The caller may add their
-// own semaphores to this command buffer, and must submit it afterwards.
-struct vk_cmd *ra_vk_submit(struct ra *ra, struct ra_tex *tex);
-
-// May be called on a struct ra of any type. Returns NULL if the ra is not
-// a vulkan ra.
-struct mpvk_ctx *ra_vk_get(struct ra *ra);
-
-struct vk_external_mem {
-#if HAVE_WIN32_DESKTOP
- HANDLE mem_handle;
-#else
- int mem_fd;
-#endif
- size_t mem_size;
- size_t size;
- size_t offset;
-};
-
-// Export an ra_buf for importing by another api.
-bool ra_vk_buf_get_external_info(struct ra *ra, struct ra_buf *buf, struct vk_external_mem *ret);
-
-// Set the buffer user data
-void ra_vk_buf_set_user_data(struct ra_buf *buf, void *priv);
-
-// Get the buffer user data
-void *ra_vk_buf_get_user_data(struct ra_buf *buf);
diff --git a/video/out/vulkan/utils.c b/video/out/vulkan/utils.c
index 62ac3e87f9..7d9c519c12 100644
--- a/video/out/vulkan/utils.c
+++ b/video/out/vulkan/utils.c
@@ -1,986 +1,46 @@
-#include <libavutil/macros.h>
-
-#include "video/out/gpu/spirv.h"
+#include "video/out/placebo/utils.h"
#include "utils.h"
-#include "malloc.h"
-
-const char* vk_err(VkResult res)
-{
- switch (res) {
- // These are technically success codes, but include them nonetheless
- case VK_SUCCESS: return "VK_SUCCESS";
- case VK_NOT_READY: return "VK_NOT_READY";
- case VK_TIMEOUT: return "VK_TIMEOUT";
- case VK_EVENT_SET: return "VK_EVENT_SET";
- case VK_EVENT_RESET: return "VK_EVENT_RESET";
- case VK_INCOMPLETE: return "VK_INCOMPLETE";
- case VK_SUBOPTIMAL_KHR: return "VK_SUBOPTIMAL_KHR";
-
- // Actual error codes
- case VK_ERROR_OUT_OF_HOST_MEMORY: return "VK_ERROR_OUT_OF_HOST_MEMORY";
- case VK_ERROR_OUT_OF_DEVICE_MEMORY: return "VK_ERROR_OUT_OF_DEVICE_MEMORY";
- case VK_ERROR_INITIALIZATION_FAILED: return "VK_ERROR_INITIALIZATION_FAILED";
- case VK_ERROR_DEVICE_LOST: return "VK_ERROR_DEVICE_LOST";
- case VK_ERROR_MEMORY_MAP_FAILED: return "VK_ERROR_MEMORY_MAP_FAILED";
- case VK_ERROR_LAYER_NOT_PRESENT: return "VK_ERROR_LAYER_NOT_PRESENT";
- case VK_ERROR_EXTENSION_NOT_PRESENT: return "VK_ERROR_EXTENSION_NOT_PRESENT";
- case VK_ERROR_FEATURE_NOT_PRESENT: return "VK_ERROR_FEATURE_NOT_PRESENT";
- case VK_ERROR_INCOMPATIBLE_DRIVER: return "VK_ERROR_INCOMPATIBLE_DRIVER";
- case VK_ERROR_TOO_MANY_OBJECTS: return "VK_ERROR_TOO_MANY_OBJECTS";
- case VK_ERROR_FORMAT_NOT_SUPPORTED: return "VK_ERROR_FORMAT_NOT_SUPPORTED";
- case VK_ERROR_FRAGMENTED_POOL: return "VK_ERROR_FRAGMENTED_POOL";
- case VK_ERROR_INVALID_SHADER_NV: return "VK_ERROR_INVALID_SHADER_NV";
- case VK_ERROR_OUT_OF_DATE_KHR: return "VK_ERROR_OUT_OF_DATE_KHR";
- case VK_ERROR_SURFACE_LOST_KHR: return "VK_ERROR_SURFACE_LOST_KHR";
- }
-
- return "Unknown error!";
-}
-
-static const char* vk_dbg_type(VkDebugReportObjectTypeEXT type)
-{
- switch (type) {
- case VK_DEBUG_REPORT_OBJECT_TYPE_INSTANCE_EXT:
- return "VkInstance";
- case VK_DEBUG_REPORT_OBJECT_TYPE_PHYSICAL_DEVICE_EXT:
- return "VkPhysicalDevice";
- case VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT:
- return "VkDevice";
- case VK_DEBUG_REPORT_OBJECT_TYPE_QUEUE_EXT:
- return "VkQueue";
- case VK_DEBUG_REPORT_OBJECT_TYPE_SEMAPHORE_EXT:
- return "VkSemaphore";
- case VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_BUFFER_EXT:
- return "VkCommandBuffer";
- case VK_DEBUG_REPORT_OBJECT_TYPE_FENCE_EXT:
- return "VkFence";
- case VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT:
- return "VkDeviceMemory";
- case VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_EXT:
- return "VkBuffer";
- case VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT:
- return "VkImage";
- case VK_DEBUG_REPORT_OBJECT_TYPE_EVENT_EXT:
- return "VkEvent";
- case VK_DEBUG_REPORT_OBJECT_TYPE_QUERY_POOL_EXT:
- return "VkQueryPool";
- case VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_VIEW_EXT:
- return "VkBufferView";
- case VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_VIEW_EXT:
- return "VkImageView";
- case VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT:
- return "VkShaderModule";
- case VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_CACHE_EXT:
- return "VkPipelineCache";
- case VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_LAYOUT_EXT:
- return "VkPipelineLayout";
- case VK_DEBUG_REPORT_OBJECT_TYPE_RENDER_PASS_EXT:
- return "VkRenderPass";
- case VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_EXT:
- return "VkPipeline";
- case VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT_EXT:
- return "VkDescriptorSetLayout";
- case VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_EXT:
- return "VkSampler";
- case VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_POOL_EXT:
- return "VkDescriptorPool";
- case VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_EXT:
- return "VkDescriptorSet";
- case VK_DEBUG_REPORT_OBJECT_TYPE_FRAMEBUFFER_EXT:
- return "VkFramebuffer";
- case VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_POOL_EXT:
- return "VkCommandPool";
- case VK_DEBUG_REPORT_OBJECT_TYPE_SURFACE_KHR_EXT:
- return "VkSurfaceKHR";
- case VK_DEBUG_REPORT_OBJECT_TYPE_SWAPCHAIN_KHR_EXT:
- return "VkSwapchainKHR";
- case VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_EXT:
- return "VkDebugReportCallbackEXT";
- case VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT:
- default:
- return "unknown object";
- }
-}
-
-static VkBool32 vk_dbg_callback(VkDebugReportFlagsEXT flags,
- VkDebugReportObjectTypeEXT objType,
- uint64_t obj, size_t loc, int32_t msgCode,
- const char *layer, const char *msg, void *priv)
-{
- struct mpvk_ctx *vk = priv;
- int lev = MSGL_V;
-
- switch (flags) {
- case VK_DEBUG_REPORT_ERROR_BIT_EXT: lev = MSGL_ERR; break;
- case VK_DEBUG_REPORT_WARNING_BIT_EXT: lev = MSGL_WARN; break;
- case VK_DEBUG_REPORT_INFORMATION_BIT_EXT: lev = MSGL_TRACE; break;
- case VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT: lev = MSGL_WARN; break;
- case VK_DEBUG_REPORT_DEBUG_BIT_EXT: lev = MSGL_DEBUG; break;
- };
-
- MP_MSG(vk, lev, "vk [%s] %d: %s (obj 0x%llx (%s), loc 0x%zx)\n",
- layer, (int)msgCode, msg, (unsigned long long)obj,
- vk_dbg_type(objType), loc);
-
- // The return value of this function determines whether the call will
- // be explicitly aborted (to prevent GPU errors) or not. In this case,
- // we generally want this to be on for the errors.
- return (flags & VK_DEBUG_REPORT_ERROR_BIT_EXT);
-}
-static void vk_cmdpool_destroy(struct mpvk_ctx *vk, struct vk_cmdpool *pool);
-static struct vk_cmdpool *vk_cmdpool_create(struct mpvk_ctx *vk,
- VkDeviceQueueCreateInfo qinfo,
- VkQueueFamilyProperties props);
-
-void mpvk_uninit(struct mpvk_ctx *vk)
-{
- if (!vk->inst)
- return;
-
- if (vk->dev) {
- mpvk_flush_commands(vk);
- mpvk_poll_commands(vk, UINT64_MAX);
- assert(vk->num_cmds_queued == 0);
- assert(vk->num_cmds_pending == 0);
- talloc_free(vk->cmds_queued);
- talloc_free(vk->cmds_pending);
- for (int i = 0; i < vk->num_pools; i++)
- vk_cmdpool_destroy(vk, vk->pools[i]);
- talloc_free(vk->pools);
- for (int i = 0; i < vk->num_signals; i++)
- vk_signal_destroy(vk, &vk->signals[i]);
- talloc_free(vk->signals);
- vk_malloc_uninit(vk);
- vkDestroyDevice(vk->dev, MPVK_ALLOCATOR);
- }
-
- if (vk->dbg) {
- // Same deal as creating the debug callback, we need to load this
- // first.
- VK_LOAD_PFN(vkDestroyDebugReportCallbackEXT)
- pfn_vkDestroyDebugReportCallbackEXT(vk->inst, vk->dbg, MPVK_ALLOCATOR);
- }
-
- vkDestroySurfaceKHR(vk->inst, vk->surf, MPVK_ALLOCATOR);
- vkDestroyInstance(vk->inst, MPVK_ALLOCATOR);
-
- *vk = (struct mpvk_ctx){0};
-}
-
-bool mpvk_instance_init(struct mpvk_ctx *vk, struct mp_log *log,
- const char *surf_ext_name, bool debug)
+bool mpvk_init(struct mpvk_ctx *vk, struct ra_ctx *ctx, const char *surface_ext)
{
- *vk = (struct mpvk_ctx) {
- .log = log,
- };
-
- VkInstanceCreateInfo info = {
- .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
- };
-
- if (debug) {
- // Enables the LunarG standard validation layer, which
- // is a meta-layer that loads lots of other validators
- static const char* layers[] = {
- "VK_LAYER_LUNARG_standard_validation",
- };
-
- info.ppEnabledLayerNames = layers;
- info.enabledLayerCount = MP_ARRAY_SIZE(layers);
- }
-
- // Enable whatever extensions were compiled in.
- const char *extensions[] = {
- VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
- VK_KHR_SURFACE_EXTENSION_NAME,
- surf_ext_name,
-
- // Extra extensions only used for debugging. These are toggled by
- // decreasing the enabledExtensionCount, so the number needs to be
- // synchronized with the code below.
- VK_EXT_DEBUG_REPORT_EXTENSION_NAME,
- };
-
- const int debugExtensionCount = 1;
-
- info.ppEnabledExtensionNames = extensions;
- info.enabledExtensionCount = MP_ARRAY_SIZE(extensions);
-
- if (!debug)
- info.enabledExtensionCount -= debugExtensionCount;
-
- MP_VERBOSE(vk, "Creating instance with extensions:\n");
- for (int i = 0; i < info.enabledExtensionCount; i++)
- MP_VERBOSE(vk, " %s\n", info.ppEnabledExtensionNames[i]);
-
- VkResult res = vkCreateInstance(&info, MPVK_ALLOCATOR, &vk->inst);
- if (res != VK_SUCCESS) {
- MP_VERBOSE(vk, "Failed creating instance: %s\n", vk_err(res));
- return false;
- }
-
- if (debug) {
- // Set up a debug callback to catch validation messages
- VkDebugReportCallbackCreateInfoEXT dinfo = {
- .sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT,
- .flags = VK_DEBUG_REPORT_INFORMATION_BIT_EXT |
- VK_DEBUG_REPORT_WARNING_BIT_EXT |
- VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT |
- VK_DEBUG_REPORT_ERROR_BIT_EXT |
- VK_DEBUG_REPORT_DEBUG_BIT_EXT,
- .pfnCallback = vk_dbg_callback,
- .pUserData = vk,
- };
-
- // Since this is not part of the core spec, we need to load it. This
- // can't fail because we've already successfully created an instance
- // with this extension enabled.
- VK_LOAD_PFN(vkCreateDebugReportCallbackEXT)
- pfn_vkCreateDebugReportCallbackEXT(vk->inst, &dinfo, MPVK_ALLOCATOR,
- &vk->dbg);
- }
-
- return true;
-}
-
-#define MPVK_MAX_DEVICES 16
-
-static bool physd_supports_surface(struct mpvk_ctx *vk, VkPhysicalDevice physd)
-{
- uint32_t qfnum;
- vkGetPhysicalDeviceQueueFamilyProperties(physd, &qfnum, NULL);
-
- for (int i = 0; i < qfnum; i++) {
- VkBool32 sup;
- VK(vkGetPhysicalDeviceSurfaceSupportKHR(physd, i, vk->surf, &sup));
- if (sup)
- return true;
- }
-
-error:
- return false;
-}
-
-bool mpvk_find_phys_device(struct mpvk_ctx *vk, const char *name, bool sw)
-{
- assert(vk->surf);
-
- MP_VERBOSE(vk, "Probing for vulkan devices:\n");
-
- VkPhysicalDevice *devices = NULL;
- uint32_t num = 0;
- VK(vkEnumeratePhysicalDevices(vk->inst, &num, NULL));
- devices = talloc_array(NULL, VkPhysicalDevice, num);
- VK(vkEnumeratePhysicalDevices(vk->inst, &num, devices));
-
- // Sorted by "priority". Reuses some m_opt code for convenience
- static const struct m_opt_choice_alternatives types[] = {
- {"discrete", VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU},
- {"integrated", VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU},
- {"virtual", VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU},
- {"software", VK_PHYSICAL_DEVICE_TYPE_CPU},
- {"unknown", VK_PHYSICAL_DEVICE_TYPE_OTHER},
- {0}
- };
-
- VkPhysicalDeviceProperties props[MPVK_MAX_DEVICES];
- for (int i = 0; i < num; i++) {
- vkGetPhysicalDeviceProperties(devices[i], &props[i]);
- MP_VERBOSE(vk, " GPU %d: %s (%s)\n", i, props[i].deviceName,
- m_opt_choice_str(types, props[i].deviceType));
- }
-
- // Iterate through each type in order of decreasing preference
- for (int t = 0; types[t].name; t++) {
- // Disallow SW rendering unless explicitly enabled
- if (types[t].value == VK_PHYSICAL_DEVICE_TYPE_CPU && !sw)
- continue;
-
- for (int i = 0; i < num; i++) {
- VkPhysicalDeviceProperties prop = props[i];
- if (prop.deviceType != types[t].value)
- continue;
- if (name && strcmp(name, prop.deviceName) != 0)
- continue;
- if (!physd_supports_surface(vk, devices[i]))
- continue;
-
- MP_VERBOSE(vk, "Chose device:\n");
- MP_VERBOSE(vk, " Device Name: %s\n", prop.deviceName);
- MP_VERBOSE(vk, " Device ID: %x:%x\n",
- (unsigned)prop.vendorID, (unsigned)prop.deviceID);
- MP_VERBOSE(vk, " Driver version: %d\n", (int)prop.driverVersion);
- MP_VERBOSE(vk, " API version: %d.%d.%d\n",
- (int)VK_VERSION_MAJOR(prop.apiVersion),
- (int)VK_VERSION_MINOR(prop.apiVersion),
- (int)VK_VERSION_PATCH(prop.apiVersion));
- vk->physd = devices[i];
- vk->limits = prop.limits;
- vkGetPhysicalDeviceFeatures(vk->physd, &vk->features);
- talloc_free(devices);
- return true;
- }
- }
-
-error:
- MP_VERBOSE(vk, "Found no suitable device, giving up.\n");
- talloc_free(devices);
- return false;
-}
-
-bool mpvk_get_phys_device_uuid(struct mpvk_ctx *vk, uint8_t uuid_out[VK_UUID_SIZE])
-{
- assert(vk->physd);
-
- VkPhysicalDeviceIDPropertiesKHR idprops = {
- .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR,
- };
-
- VkPhysicalDeviceProperties2KHR props = {
- .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR,
- .pNext = &idprops,
- };
-
- VK_LOAD_PFN(vkGetPhysicalDeviceProperties2KHR);
- pfn_vkGetPhysicalDeviceProperties2KHR(vk->physd, &props);
-
- memcpy(uuid_out, idprops.deviceUUID, VK_UUID_SIZE);
-
- return true;
-}
-
-bool mpvk_pick_surface_format(struct mpvk_ctx *vk)
-{
- assert(vk->physd);
-
- VkSurfaceFormatKHR *formats = NULL;
- int num;
-
- // Enumerate through the surface formats and find one that we can map to
- // a ra_format
- VK(vkGetPhysicalDeviceSurfaceFormatsKHR(vk->physd, vk->surf, &num, NULL));
- formats = talloc_array(NULL, VkSurfaceFormatKHR, num);
- VK(vkGetPhysicalDeviceSurfaceFormatsKHR(vk->physd, vk->surf, &num, formats));
-
- for (int i = 0; i < num; i++) {
- // A value of VK_FORMAT_UNDEFINED means we can pick anything we want
- if (formats[i].format == VK_FORMAT_UNDEFINED) {
- vk->surf_format = (VkSurfaceFormatKHR) {
- .colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR,
- .format = VK_FORMAT_R16G16B16A16_UNORM,
- };
- break;
- }
-
- if (formats[i].colorSpace != VK_COLOR_SPACE_SRGB_NONLINEAR_KHR)
- continue;
-
- // Format whitelist, since we want only >= 8 bit _UNORM formats
- switch (formats[i].format) {
- case VK_FORMAT_R8G8B8_UNORM:
- case VK_FORMAT_B8G8R8_UNORM:
- case VK_FORMAT_R8G8B8A8_UNORM:
- case VK_FORMAT_B8G8R8A8_UNORM:
- case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
- case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
- case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
- case VK_FORMAT_R16G16B16_UNORM:
- case VK_FORMAT_R16G16B16A16_UNORM:
- break; // accept
- default: continue;
- }
-
- vk->surf_format = formats[i];
- break;
- }
-
- talloc_free(formats);
-
- if (!vk->surf_format.format)
- goto error;
-
- return true;
-
-error:
- MP_ERR(vk, "Failed picking surface format!\n");
- talloc_free(formats);
- return false;
-}
-
-// Find the most specialized queue supported a combination of flags. In cases
-// where there are multiple queue families at the same specialization level,
-// this finds the one with the most queues. Returns -1 if no queue was found.
-static int find_qf(VkQueueFamilyProperties *qfs, int qfnum, VkQueueFlags flags)
-{
- int idx = -1;
- for (int i = 0; i < qfnum; i++) {
- if (!(qfs[i].queueFlags & flags))
- continue;
-
- // QF is more specialized. Since we don't care about other bits like
- // SPARSE_BIT, mask the ones we're interestew in
- const VkQueueFlags mask = VK_QUEUE_GRAPHICS_BIT |
- VK_QUEUE_TRANSFER_BIT |
- VK_QUEUE_COMPUTE_BIT;
-
- if (idx < 0 || (qfs[i].queueFlags & mask) < (qfs[idx].queueFlags & mask))
- idx = i;
-
- // QF has more queues (at the same specialization level)
- if (qfs[i].queueFlags == qfs[idx].queueFlags &&
- qfs[i].queueCount > qfs[idx].queueCount)
- idx = i;
- }
-
- return idx;
-}
-
-static void add_qinfo(void *tactx, VkDeviceQueueCreateInfo **qinfos,
- int *num_qinfos, VkQueueFamilyProperties *qfs, int idx,
- int qcount)
-{
- if (idx < 0)
- return;
-
- // Check to see if we've already added this queue family
- for (int i = 0; i < *num_qinfos; i++) {
- if ((*qinfos)[i].queueFamilyIndex == idx)
- return;
- }
-
- float *priorities = talloc_zero_array(tactx, float, qcount);
- VkDeviceQueueCreateInfo qinfo = {
- .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
- .queueFamilyIndex = idx,
- .queueCount = MPMIN(qcount, qfs[idx].queueCount),
- .pQueuePriorities = priorities,
- };
-
- MP_TARRAY_APPEND(tactx, *qinfos, *num_qinfos, qinfo);
-}
-
-static bool detect_device_extensions(struct mpvk_ctx *vk)
-{
- bool ret = false;
- VkExtensionProperties *props = NULL;
-
- uint32_t num_exts;
- VK(vkEnumerateDeviceExtensionProperties(vk->physd, NULL,
- &num_exts, NULL));
-
- props = talloc_array(NULL, VkExtensionProperties, num_exts);
- VK(vkEnumerateDeviceExtensionProperties(vk->physd,
- NULL, &num_exts, props));
-
- for (uint32_t i = 0; i < num_exts; i++) {
- if (!strcmp(VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME,
- props[i].extensionName)) {
- vk->has_ext_external_memory = true;
- continue;
- }
- if (!strcmp(MP_VK_EXTERNAL_MEMORY_EXPORT_EXTENSION_NAME,
- props[i].extensionName)) {
- vk->has_ext_external_memory_export = true;
- continue;
- }
- }
-
- ret = true;
-error:
- talloc_free(props);
- return ret;
-}
-
-bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts)
-{
- assert(vk->physd);
- void *tmp = talloc_new(NULL);
-
- // Enumerate the queue families and find suitable families for each task
- int qfnum;
- vkGetPhysicalDeviceQueueFamilyProperties(vk->physd, &qfnum, NULL);
- VkQueueFamilyProperties *qfs = talloc_array(tmp, VkQueueFamilyProperties, qfnum);
- vkGetPhysicalDeviceQueueFamilyProperties(vk->physd, &qfnum, qfs);
-
- MP_VERBOSE(vk, "Queue families supported by device:\n");
-
- for (int i = 0; i < qfnum; i++) {
- MP_VERBOSE(vk, " QF %d: flags 0x%x num %d\n", i,
- (unsigned)qfs[i].queueFlags, (int)qfs[i].queueCount);
- }
-
- int idx_gfx = -1, idx_comp = -1, idx_tf = -1;
- idx_gfx = find_qf(qfs, qfnum, VK_QUEUE_GRAPHICS_BIT);
- if (opts.async_compute)
- idx_comp = find_qf(qfs, qfnum, VK_QUEUE_COMPUTE_BIT);
- if (opts.async_transfer)
- idx_tf = find_qf(qfs, qfnum, VK_QUEUE_TRANSFER_BIT);
-
- // Vulkan requires at least one GRAPHICS queue, so if this fails something
- // is horribly wrong.
- assert(idx_gfx >= 0);
- MP_VERBOSE(vk, "Using graphics queue (QF %d)\n", idx_gfx);
-
- // Ensure we can actually present to the surface using this queue
- VkBool32 sup;
- VK(vkGetPhysicalDeviceSurfaceSupportKHR(vk->physd, idx_gfx, vk->surf, &sup));
- if (!sup) {
- MP_ERR(vk, "Queue family does not support surface presentation!\n");
+ vk->ctx = pl_context_create(PL_API_VER, NULL);
+ if (!vk->ctx)
goto error;
- }
-
- if (idx_tf >= 0 && idx_tf != idx_gfx)
- MP_VERBOSE(vk, "Using async transfer (QF %d)\n", idx_tf);
- if (idx_comp >= 0 && idx_comp != idx_gfx)
- MP_VERBOSE(vk, "Using async compute (QF %d)\n", idx_comp);
- // Fall back to supporting compute shaders via the graphics pool for
- // devices which support compute shaders but not async compute.
- if (idx_comp < 0 && qfs[idx_gfx].queueFlags & VK_QUEUE_COMPUTE_BIT)
- idx_comp = idx_gfx;
+ vk->pl_log = mp_log_new(ctx, ctx->log, "libplacebo");
+ mppl_ctx_set_log(vk->ctx, vk->pl_log, true);
- // Now that we know which QFs we want, we can create the logical device
- VkDeviceQueueCreateInfo *qinfos = NULL;
- int num_qinfos = 0;
- add_qinfo(tmp, &qinfos, &num_qinfos, qfs, idx_gfx, opts.queue_count);
- add_qinfo(tmp, &qinfos, &num_qinfos, qfs, idx_comp, opts.queue_count);
- add_qinfo(tmp, &qinfos, &num_qinfos, qfs, idx_tf, opts.queue_count);
-
- if (!detect_device_extensions(vk)) {
- MP_WARN(vk, "Failed to enumerate device extensions. "
- "Some features may be disabled.\n");
- }
-
- const char **exts = NULL;
- int num_exts = 0;
- MP_TARRAY_APPEND(tmp, exts, num_exts, VK_KHR_SWAPCHAIN_EXTENSION_NAME);
- if (vk->has_ext_external_memory)
- MP_TARRAY_APPEND(tmp, exts, num_exts, VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME);
- if (vk->has_ext_external_memory_export)
- MP_TARRAY_APPEND(tmp, exts, num_exts, MP_VK_EXTERNAL_MEMORY_EXPORT_EXTENSION_NAME);
- if (vk->spirv->required_ext)
- MP_TARRAY_APPEND(tmp, exts, num_exts, vk->spirv->required_ext);
-
- // Enable all features we optionally use
-#define FEATURE(name) .name = vk->features.name
- VkPhysicalDeviceFeatures feats = {
- FEATURE(shaderImageGatherExtended),
- FEATURE(shaderStorageImageExtendedFormats),
- };
-#undef FEATURE
-
- VkDeviceCreateInfo dinfo = {
- .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
- .pQueueCreateInfos = qinfos,
- .queueCreateInfoCount = num_qinfos,
- .ppEnabledExtensionNames = exts,
- .enabledExtensionCount = num_exts,
- .pEnabledFeatures = &feats,
- };
-
- MP_VERBOSE(vk, "Creating vulkan device with extensions:\n");
- for (int i = 0; i < num_exts; i++)
- MP_VERBOSE(vk, " %s\n", exts[i]);
-
- VK(vkCreateDevice(vk->physd, &dinfo, MPVK_ALLOCATOR, &vk->dev));
-
- // Create the command pools and memory allocator
- for (int i = 0; i < num_qinfos; i++) {
- int qf = qinfos[i].queueFamilyIndex;
- struct vk_cmdpool *pool = vk_cmdpool_create(vk, qinfos[i], qfs[qf]);
- if (!pool)
- goto error;
- MP_TARRAY_APPEND(NULL, vk->pools, vk->num_pools, pool);
-
- // Update the pool_* pointers based on the corresponding QF index
- if (qf == idx_gfx)
- vk->pool_graphics = pool;
- if (qf == idx_comp)
- vk->pool_compute = pool;
- if (qf == idx_tf)
- vk->pool_transfer = pool;
- }
-
- vk_malloc_init(vk);
- talloc_free(tmp);
- return true;
-
-error:
- MP_ERR(vk, "Failed creating logical device!\n");
- talloc_free(tmp);
- return false;
-}
-
-// returns VK_SUCCESS (completed), VK_TIMEOUT (not yet completed) or an error
-static VkResult vk_cmd_poll(struct mpvk_ctx *vk, struct vk_cmd *cmd,
- uint64_t timeout)
-{
- return vkWaitForFences(vk->dev, 1, &cmd->fence, false, timeout);
-}
-
-static void vk_cmd_reset(struct mpvk_ctx *vk, struct vk_cmd *cmd)
-{
- for (int i = 0; i < cmd->num_callbacks; i++) {
- struct vk_callback *cb = &cmd->callbacks[i];
- cb->run(cb->priv, cb->arg);
- }
-
- cmd->num_callbacks = 0;
- cmd->num_deps = 0;
- cmd->num_sigs = 0;
-
- // also make sure to reset vk->last_cmd in case this was the last command
- if (vk->last_cmd == cmd)
- vk->last_cmd = NULL;
-}
-
-static void vk_cmd_destroy(struct mpvk_ctx *vk, struct vk_cmd *cmd)
-{
- if (!cmd)
- return;
-
- vk_cmd_poll(vk, cmd, UINT64_MAX);
- vk_cmd_reset(vk, cmd);
- vkDestroyFence(vk->dev, cmd->fence, MPVK_ALLOCATOR);
- vkFreeCommandBuffers(vk->dev, cmd->pool->pool, 1, &cmd->buf);
-
- talloc_free(cmd);
-}
-
-static struct vk_cmd *vk_cmd_create(struct mpvk_ctx *vk, struct vk_cmdpool *pool)
-{
- struct vk_cmd *cmd = talloc_zero(NULL, struct vk_cmd);
- cmd->pool = pool;
-
- VkCommandBufferAllocateInfo ainfo = {
- .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
- .commandPool = pool->pool,
- .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
- .commandBufferCount = 1,
- };
-
- VK(vkAllocateCommandBuffers(vk->dev, &ainfo, &cmd->buf));
-
- VkFenceCreateInfo finfo = {
- .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
- .flags = VK_FENCE_CREATE_SIGNALED_BIT,
+ const char *exts[] = {
+ VK_KHR_SURFACE_EXTENSION_NAME,
+ surface_ext,
};
- VK(vkCreateFence(vk->dev, &finfo, MPVK_ALLOCATOR, &cmd->fence));
-
- return cmd;
-
-error:
- vk_cmd_destroy(vk, cmd);
- return NULL;
-}
-
-void vk_cmd_callback(struct vk_cmd *cmd, vk_cb callback, void *p, void *arg)
-{
- MP_TARRAY_APPEND(cmd, cmd->callbacks, cmd->num_callbacks, (struct vk_callback) {
- .run = callback,
- .priv = p,
- .arg = arg,
+ vk->vkinst = pl_vk_inst_create(vk->ctx, &(struct pl_vk_inst_params) {
+ .debug = ctx->opts.debug,
+ .extensions = exts,
+ .num_extensions = MP_ARRAY_SIZE(exts),
});
-}
-
-void vk_cmd_dep(struct vk_cmd *cmd, VkSemaphore dep, VkPipelineStageFlags stage)
-{
- int idx = cmd->num_deps++;
- MP_TARRAY_GROW(cmd, cmd->deps, idx);
- MP_TARRAY_GROW(cmd, cmd->depstages, idx);
- cmd->deps[idx] = dep;
- cmd->depstages[idx] = stage;
-}
-
-void vk_cmd_sig(struct vk_cmd *cmd, VkSemaphore sig)
-{
- MP_TARRAY_APPEND(cmd, cmd->sigs, cmd->num_sigs, sig);
-}
-
-static void vk_cmdpool_destroy(struct mpvk_ctx *vk, struct vk_cmdpool *pool)
-{
- if (!pool)
- return;
- for (int i = 0; i < pool->num_cmds; i++)
- vk_cmd_destroy(vk, pool->cmds[i]);
-
- vkDestroyCommandPool(vk->dev, pool->pool, MPVK_ALLOCATOR);
- talloc_free(pool);
-}
-
-static struct vk_cmdpool *vk_cmdpool_create(struct mpvk_ctx *vk,
- VkDeviceQueueCreateInfo qinfo,
- VkQueueFamilyProperties props)
-{
- struct vk_cmdpool *pool = talloc_ptrtype(NULL, pool);
- *pool = (struct vk_cmdpool) {
- .props = props,
- .qf = qinfo.queueFamilyIndex,
- .queues = talloc_array(pool, VkQueue, qinfo.queueCount),
- .num_queues = qinfo.queueCount,
- };
-
- for (int n = 0; n < pool->num_queues; n++)
- vkGetDeviceQueue(vk->dev, pool->qf, n, &pool->queues[n]);
-
- VkCommandPoolCreateInfo cinfo = {
- .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
- .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT |
- VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
- .queueFamilyIndex = pool->qf,
- };
-
- VK(vkCreateCommandPool(vk->dev, &cinfo, MPVK_ALLOCATOR, &pool->pool));
-
- return pool;
-
-error:
- vk_cmdpool_destroy(vk, pool);
- return NULL;
-}
-
-void mpvk_poll_commands(struct mpvk_ctx *vk, uint64_t timeout)
-{
- while (vk->num_cmds_pending > 0) {
- struct vk_cmd *cmd = vk->cmds_pending[0];
- struct vk_cmdpool *pool = cmd->pool;
- VkResult res = vk_cmd_poll(vk, cmd, timeout);
- if (res == VK_TIMEOUT)
- break;
- vk_cmd_reset(vk, cmd);
- MP_TARRAY_REMOVE_AT(vk->cmds_pending, vk->num_cmds_pending, 0);
- MP_TARRAY_APPEND(pool, pool->cmds, pool->num_cmds, cmd);
- }
-}
-
-bool mpvk_flush_commands(struct mpvk_ctx *vk)
-{
- bool ret = true;
-
- for (int i = 0; i < vk->num_cmds_queued; i++) {
- struct vk_cmd *cmd = vk->cmds_queued[i];
- struct vk_cmdpool *pool = cmd->pool;
-
- VkSubmitInfo sinfo = {
- .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
- .commandBufferCount = 1,
- .pCommandBuffers = &cmd->buf,
- .waitSemaphoreCount = cmd->num_deps,
- .pWaitSemaphores = cmd->deps,
- .pWaitDstStageMask = cmd->depstages,
- .signalSemaphoreCount = cmd->num_sigs,
- .pSignalSemaphores = cmd->sigs,
- };
-
- VK(vkQueueSubmit(cmd->queue, 1, &sinfo, cmd->fence));
- MP_TARRAY_APPEND(NULL, vk->cmds_pending, vk->num_cmds_pending, cmd);
-
- if (mp_msg_test(vk->log, MSGL_TRACE)) {
- MP_TRACE(vk, "Submitted command on queue %p (QF %d):\n",
- (void *)cmd->queue, pool->qf);
- for (int n = 0; n < cmd->num_deps; n++)
- MP_TRACE(vk, " waits on semaphore %p\n", (void *)cmd->deps[n]);
- for (int n = 0; n < cmd->num_sigs; n++)
- MP_TRACE(vk, " signals semaphore %p\n", (void *)cmd->sigs[n]);
- }
- continue;
-
-error:
- vk_cmd_reset(vk, cmd);
- MP_TARRAY_APPEND(pool, pool->cmds, pool->num_cmds, cmd);
- ret = false;
- }
-
- vk->num_cmds_queued = 0;
-
- return ret;
-}
-
-void vk_dev_callback(struct mpvk_ctx *vk, vk_cb callback, void *p, void *arg)
-{
- if (vk->last_cmd) {
- vk_cmd_callback(vk->last_cmd, callback, p, arg);
- } else {
- // The device was already idle, so we can just immediately call it
- callback(p, arg);
- }
-}
-
-struct vk_cmd *vk_cmd_begin(struct mpvk_ctx *vk, struct vk_cmdpool *pool)
-{
- // garbage collect the cmdpool first, to increase the chances of getting
- // an already-available command buffer
- mpvk_poll_commands(vk, 0);
-
- struct vk_cmd *cmd = NULL;
- if (MP_TARRAY_POP(pool->cmds, pool->num_cmds, &cmd))
- goto done;
-
- // No free command buffers => allocate another one
- cmd = vk_cmd_create(vk, pool);
- if (!cmd)
+ if (!vk->vkinst)
goto error;
-done: ;
-
- VkCommandBufferBeginInfo binfo = {
- .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
- .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
- };
-
- VK(vkBeginCommandBuffer(cmd->buf, &binfo));
-
- cmd->queue = pool->queues[pool->idx_queues];
- return cmd;
-
-error:
- // Something has to be seriously messed up if we get to this point
- vk_cmd_destroy(vk, cmd);
- return NULL;
-}
-
-void vk_cmd_queue(struct mpvk_ctx *vk, struct vk_cmd *cmd)
-{
- struct vk_cmdpool *pool = cmd->pool;
-
- VK(vkEndCommandBuffer(cmd->buf));
-
- VK(vkResetFences(vk->dev, 1, &cmd->fence));
- MP_TARRAY_APPEND(NULL, vk->cmds_queued, vk->num_cmds_queued, cmd);
- vk->last_cmd = cmd;
- return;
-
-error:
- vk_cmd_reset(vk, cmd);
- MP_TARRAY_APPEND(pool, pool->cmds, pool->num_cmds, cmd);
-}
-
-void vk_signal_destroy(struct mpvk_ctx *vk, struct vk_signal **sig)
-{
- if (!*sig)
- return;
-
- vkDestroySemaphore(vk->dev, (*sig)->semaphore, MPVK_ALLOCATOR);
- vkDestroyEvent(vk->dev, (*sig)->event, MPVK_ALLOCATOR);
- talloc_free(*sig);
- *sig = NULL;
-}
-
-struct vk_signal *vk_cmd_signal(struct mpvk_ctx *vk, struct vk_cmd *cmd,
- VkPipelineStageFlags stage)
-{
- struct vk_signal *sig = NULL;
- if (MP_TARRAY_POP(vk->signals, vk->num_signals, &sig))
- goto done;
-
- // no available signal => initialize a new one
- sig = talloc_zero(NULL, struct vk_signal);
- static const VkSemaphoreCreateInfo sinfo = {
- .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
- };
-
- VK(vkCreateSemaphore(vk->dev, &sinfo, MPVK_ALLOCATOR, &sig->semaphore));
-
- static const VkEventCreateInfo einfo = {
- .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO,
- };
-
- VK(vkCreateEvent(vk->dev, &einfo, MPVK_ALLOCATOR, &sig->event));
-
-done:
- // Signal both the semaphore and the event if possible. (We will only
- // end up using one or the other)
- vk_cmd_sig(cmd, sig->semaphore);
-
- VkQueueFlags req = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT;
- if (cmd->pool->props.queueFlags & req) {
- vkCmdSetEvent(cmd->buf, sig->event, stage);
- sig->event_source = cmd->queue;
- }
-
- return sig;
+ mppl_ctx_set_log(vk->ctx, vk->pl_log, false); // disable probing
+ return true;
error:
- vk_signal_destroy(vk, &sig);
- return NULL;
-}
-
-static bool unsignal_cmd(struct vk_cmd *cmd, VkSemaphore sem)
-{
- for (int n = 0; n < cmd->num_sigs; n++) {
- if (cmd->sigs[n] == sem) {
- MP_TARRAY_REMOVE_AT(cmd->sigs, cmd->num_sigs, n);
- return true;
- }
- }
-
+ mpvk_uninit(vk);
return false;
}
-// Attempts to remove a queued signal operation. Returns true if sucessful,
-// i.e. the signal could be removed before it ever got fired.
-static bool unsignal(struct mpvk_ctx *vk, struct vk_cmd *cmd, VkSemaphore sem)
-{
- if (unsignal_cmd(cmd, sem))
- return true;
-
- // Attempt to remove it from any queued commands
- for (int i = 0; i < vk->num_cmds_queued; i++) {
- if (unsignal_cmd(vk->cmds_queued[i], sem))
- return true;
- }
-
- return false;
-}
-
-static void release_signal(struct mpvk_ctx *vk, struct vk_signal *sig)
-{
- // The semaphore never needs to be recreated, because it's either
- // unsignaled while still queued, or unsignaled as a result of a device
- // wait. But the event *may* need to be reset, so just always reset it.
- if (sig->event_source)
- vkResetEvent(vk->dev, sig->event);
- sig->event_source = NULL;
- MP_TARRAY_APPEND(NULL, vk->signals, vk->num_signals, sig);
-}
-
-void vk_cmd_wait(struct mpvk_ctx *vk, struct vk_cmd *cmd,
- struct vk_signal **sigptr, VkPipelineStageFlags stage,
- VkEvent *out_event)
+void mpvk_uninit(struct mpvk_ctx *vk)
{
- struct vk_signal *sig = *sigptr;
- if (!sig)
- return;
-
- if (out_event && sig->event && sig->event_source == cmd->queue &&
- unsignal(vk, cmd, sig->semaphore))
- {
- // If we can remove the semaphore signal operation from the history and
- // pretend it never happened, then we get to use the VkEvent. This also
- // requires that the VkEvent was signalled from the same VkQueue.
- *out_event = sig->event;
- } else if (sig->semaphore) {
- // Otherwise, we use the semaphore. (This also unsignals it as a result
- // of the command execution)
- vk_cmd_dep(cmd, sig->semaphore, stage);
+ if (vk->surface) {
+ assert(vk->vkinst);
+ vkDestroySurfaceKHR(vk->vkinst->instance, vk->surface, NULL);
+ vk->surface = NULL;
}
- // In either case, once the command completes, we can release the signal
- // resource back to the pool.
- vk_cmd_callback(cmd, (vk_cb) release_signal, vk, sig);
- *sigptr = NULL;
+ pl_vk_inst_destroy(&vk->vkinst);
+ pl_context_destroy(&vk->ctx);
+ TA_FREEP(&vk->pl_log);
}
-
-const VkImageSubresourceRange vk_range = {
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .levelCount = 1,
- .layerCount = 1,
-};
-
-const VkImageSubresourceLayers vk_layers = {
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .layerCount = 1,
-};
diff --git a/video/out/vulkan/utils.h b/video/out/vulkan/utils.h
index 9af59e4b50..a98e1477b6 100644
--- a/video/out/vulkan/utils.h
+++ b/video/out/vulkan/utils.h
@@ -1,192 +1,6 @@
#pragma once
-
-#include "video/out/vo.h"
-#include "video/out/gpu/context.h"
-#include "video/mp_image.h"
-
#include "common.h"
-#include "formats.h"
-
-#define VK_LOAD_PFN(name) PFN_##name pfn_##name = (PFN_##name) \
- vkGetInstanceProcAddr(vk->inst, #name);
-
-#if HAVE_WIN32_DESKTOP
- #define MP_VK_EXTERNAL_MEMORY_EXPORT_EXTENSION_NAME VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME
-#else
- #define MP_VK_EXTERNAL_MEMORY_EXPORT_EXTENSION_NAME VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME
-#endif
-
-// Return a human-readable name for various struct mpvk_ctx enums
-const char* vk_err(VkResult res);
-
-// Convenience macros to simplify a lot of common boilerplate
-#define VK_ASSERT(res, str) \
- do { \
- if (res != VK_SUCCESS) { \
- MP_ERR(vk, str ": %s\n", vk_err(res)); \
- goto error; \
- } \
- } while (0)
-
-#define VK(cmd) \
- do { \
- MP_TRACE(vk, #cmd "\n"); \
- VkResult res ## __LINE__ = (cmd); \
- VK_ASSERT(res ## __LINE__, #cmd); \
- } while (0)
+#include "video/out/gpu/context.h"
-// Uninits everything in the correct order
+bool mpvk_init(struct mpvk_ctx *vk, struct ra_ctx *ctx, const char *surface_ext);
void mpvk_uninit(struct mpvk_ctx *vk);
-
-// Initialization functions: As a rule of thumb, these need to be called in
-// this order, followed by vk_malloc_init, followed by RA initialization, and
-// finally followed by vk_swchain initialization.
-
-// Create a vulkan instance. Returns VK_NULL_HANDLE on failure
-bool mpvk_instance_init(struct mpvk_ctx *vk, struct mp_log *log,
- const char *surf_ext_name, bool debug);
-
-// Generate a VkSurfaceKHR usable for video output. Returns VK_NULL_HANDLE on
-// failure. Must be called after mpvk_instance_init.
-bool mpvk_surface_init(struct vo *vo, struct mpvk_ctx *vk);
-
-// Find a suitable physical device for use with rendering and which supports
-// the surface.
-// name: only match a device with this name
-// sw: also allow software/virtual devices
-bool mpvk_find_phys_device(struct mpvk_ctx *vk, const char *name, bool sw);
-
-// Get the UUID for the selected physical device
-bool mpvk_get_phys_device_uuid(struct mpvk_ctx *vk, uint8_t uuid_out[VK_UUID_SIZE]);
-
-// Pick a suitable surface format that's supported by this physical device.
-bool mpvk_pick_surface_format(struct mpvk_ctx *vk);
-
-struct mpvk_device_opts {
- int queue_count; // number of queues to use
- int async_transfer; // enable async transfer
- int async_compute; // enable async compute
-};
-
-// Create a logical device and initialize the vk_cmdpools
-bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts);
-
-// Wait for all currently pending commands to have completed. This is the only
-// function that actually processes the callbacks. Will wait at most `timeout`
-// nanoseconds for the completion of each command. Using it with a value of
-// UINT64_MAX effectively means waiting until the pool/device is idle. The
-// timeout may also be passed as 0, in which case this function will not block,
-// but only poll for completed commands.
-void mpvk_poll_commands(struct mpvk_ctx *vk, uint64_t timeout);
-
-// Flush all currently queued commands. Call this once per frame, after
-// submitting all of the command buffers for that frame. Calling this more
-// often than that is possible but bad for performance.
-// Returns whether successful. Failed commands will be implicitly dropped.
-bool mpvk_flush_commands(struct mpvk_ctx *vk);
-
-// Since lots of vulkan operations need to be done lazily once the affected
-// resources are no longer in use, provide an abstraction for tracking these.
-// In practice, these are only checked and run when submitting new commands, so
-// the actual execution may be delayed by a frame.
-typedef void (*vk_cb)(void *priv, void *arg);
-
-struct vk_callback {
- vk_cb run;
- void *priv;
- void *arg; // as a convenience, you also get to pass an arg for "free"
-};
-
-// Associate a callback with the completion of all currently pending commands.
-// This will essentially run once the device is completely idle.
-void vk_dev_callback(struct mpvk_ctx *vk, vk_cb callback, void *p, void *arg);
-
-// Helper wrapper around command buffers that also track dependencies,
-// callbacks and synchronization primitives
-struct vk_cmd {
- struct vk_cmdpool *pool; // pool it was allocated from
- VkQueue queue; // the submission queue (for recording/pending)
- VkCommandBuffer buf; // the command buffer itself
- VkFence fence; // the fence guards cmd buffer reuse
- // The semaphores represent dependencies that need to complete before
- // this command can be executed. These are *not* owned by the vk_cmd
- VkSemaphore *deps;
- VkPipelineStageFlags *depstages;
- int num_deps;
- // The signals represent semaphores that fire once the command finishes
- // executing. These are also not owned by the vk_cmd
- VkSemaphore *sigs;
- int num_sigs;
- // Since VkFences are useless, we have to manually track "callbacks"
- // to fire once the VkFence completes. These are used for multiple purposes,
- // ranging from garbage collection (resource deallocation) to fencing.
- struct vk_callback *callbacks;
- int num_callbacks;
-};
-
-// Associate a callback with the completion of the current command. This
-// bool will be set to `true` once the command completes, or shortly thereafter.
-void vk_cmd_callback(struct vk_cmd *cmd, vk_cb callback, void *p, void *arg);
-
-// Associate a raw dependency for the current command. This semaphore must
-// signal by the corresponding stage before the command may execute.
-void vk_cmd_dep(struct vk_cmd *cmd, VkSemaphore dep, VkPipelineStageFlags stage);
-
-// Associate a raw signal with the current command. This semaphore will signal
-// after the command completes.
-void vk_cmd_sig(struct vk_cmd *cmd, VkSemaphore sig);
-
-// Signal abstraction: represents an abstract synchronization mechanism.
-// Internally, this may either resolve as a semaphore or an event depending
-// on whether the appropriate conditions are met.
-struct vk_signal {
- VkSemaphore semaphore;
- VkEvent event;
- VkQueue event_source;
-};
-
-// Generates a signal after the execution of all previous commands matching the
-// given the pipeline stage. The signal is owned by the caller, and must be
-// consumed eith vk_cmd_wait or released with vk_signal_cancel in order to
-// free the resources.
-struct vk_signal *vk_cmd_signal(struct mpvk_ctx *vk, struct vk_cmd *cmd,
- VkPipelineStageFlags stage);
-
-// Consumes a previously generated signal. This signal must fire by the
-// indicated stage before the command can run. If *event is not NULL, then it
-// MAY be set to a VkEvent which the caller MUST manually wait on in the most
-// appropriate way. This function takes over ownership of the signal (and the
-// signal will be released/reused automatically)
-void vk_cmd_wait(struct mpvk_ctx *vk, struct vk_cmd *cmd,
- struct vk_signal **sigptr, VkPipelineStageFlags stage,
- VkEvent *out_event);
-
-// Destroys a currently pending signal, for example if the resource is no
-// longer relevant.
-void vk_signal_destroy(struct mpvk_ctx *vk, struct vk_signal **sig);
-
-// Command pool / queue family hybrid abstraction
-struct vk_cmdpool {
- VkQueueFamilyProperties props;
- int qf; // queue family index
- VkCommandPool pool;
- VkQueue *queues;
- int num_queues;
- int idx_queues;
- // Command buffers associated with this queue. These are available for
- // re-recording
- struct vk_cmd **cmds;
- int num_cmds;
-};
-
-// Fetch a command buffer from a command pool and begin recording to it.
-// Returns NULL on failure.
-struct vk_cmd *vk_cmd_begin(struct mpvk_ctx *vk, struct vk_cmdpool *pool);
-
-// Finish recording a command buffer and queue it for execution. This function
-// takes over ownership of *cmd, i.e. the caller should not touch it again.
-void vk_cmd_queue(struct mpvk_ctx *vk, struct vk_cmd *cmd);
-
-// Predefined structs for a simple non-layered, non-mipped image
-extern const VkImageSubresourceRange vk_range;
-extern const VkImageSubresourceLayers vk_layers;
diff --git a/wscript b/wscript
index d3d49090c8..09eb5da529 100644
--- a/wscript
+++ b/wscript
@@ -805,11 +805,14 @@ video_output_features = [
"Aborting. If you really mean to compile without OpenGL " +
"video outputs use --disable-gl.",
}, {
+ 'name': '--libplacebo',
+ 'desc': 'libplacebo support',
+ 'func': check_pkg_config('libplacebo >= 1.18.0'),
+ }, {
'name': '--vulkan',
- 'desc': 'Vulkan context support',
- 'deps': 'shaderc',
- # Lowest version tested, Ubuntu 16.04's
- 'func': check_pkg_config('vulkan >= 1.0.61'),
+ 'desc': 'Vulkan context support',
+ 'deps': 'libplacebo',
+ 'func': check_pkg_config('vulkan'),
}, {
'name': 'egl-helpers',
'desc': 'EGL helper functions',
diff --git a/wscript_build.py b/wscript_build.py
index 38f704ef34..99f14990ac 100644
--- a/wscript_build.py
+++ b/wscript_build.py
@@ -445,6 +445,8 @@ def build(ctx):
( "video/out/gpu/utils.c" ),
( "video/out/gpu/video.c" ),
( "video/out/gpu/video_shaders.c" ),
+ ( "video/out/placebo/ra_pl.c", "libplacebo" ),
+ ( "video/out/placebo/utils.c", "libplacebo" ),
( "video/out/opengl/angle_dynamic.c", "egl-angle" ),
( "video/out/opengl/common.c", "gl" ),
( "video/out/opengl/context.c", "gl" ),
@@ -495,11 +497,8 @@ def build(ctx):
( "video/out/vo_xv.c", "xv" ),
( "video/out/vulkan/context.c", "vulkan" ),
( "video/out/vulkan/context_wayland.c", "vulkan && wayland" ),
- ( "video/out/vulkan/context_win.c", "vulkan && win32-desktop" ),
+ #( "video/out/vulkan/context_win.c", "vulkan && win32-desktop" ),
( "video/out/vulkan/context_xlib.c", "vulkan && x11" ),
- ( "video/out/vulkan/formats.c", "vulkan" ),
- ( "video/out/vulkan/malloc.c", "vulkan" ),
- ( "video/out/vulkan/ra_vk.c", "vulkan" ),
( "video/out/vulkan/utils.c", "vulkan" ),
( "video/out/w32_common.c", "win32-desktop" ),
( "video/out/wayland/idle-inhibit-v1.c", "wayland" ),