summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--common/msg.c8
-rw-r--r--common/msg.h7
-rw-r--r--video/out/gpu/context.c4
-rw-r--r--video/out/opengl/hwdec_cuda.c11
-rw-r--r--video/out/placebo/ra_pl.c628
-rw-r--r--video/out/placebo/ra_pl.h10
-rw-r--r--video/out/placebo/utils.c62
-rw-r--r--video/out/placebo/utils.h18
-rw-r--r--video/out/vulkan/common.h58
-rw-r--r--video/out/vulkan/context.c482
-rw-r--r--video/out/vulkan/context.h4
-rw-r--r--video/out/vulkan/context_wayland.c18
-rw-r--r--video/out/vulkan/context_win.c105
-rw-r--r--video/out/vulkan/context_xlib.c11
-rw-r--r--video/out/vulkan/formats.c55
-rw-r--r--video/out/vulkan/formats.h16
-rw-r--r--video/out/vulkan/malloc.c471
-rw-r--r--video/out/vulkan/malloc.h37
-rw-r--r--video/out/vulkan/ra_vk.c1982
-rw-r--r--video/out/vulkan/ra_vk.h51
-rw-r--r--video/out/vulkan/utils.c990
-rw-r--r--video/out/vulkan/utils.h190
-rw-r--r--wscript11
-rw-r--r--wscript_build.py7
24 files changed, 868 insertions, 4368 deletions
diff --git a/common/msg.c b/common/msg.c
index cb41ea4168..e35b953f7d 100644
--- a/common/msg.c
+++ b/common/msg.c
@@ -127,19 +127,19 @@ static void update_loglevel(struct mp_log *log)
pthread_mutex_unlock(&mp_msg_lock);
}
-// Return whether the message at this verbosity level would be actually printed.
+// Get the current effective msg level.
// Thread-safety: see mp_msg().
-bool mp_msg_test(struct mp_log *log, int lev)
+int mp_msg_level(struct mp_log *log)
{
struct mp_log_root *root = log->root;
if (!root)
- return false;
+ return -1;
if (atomic_load_explicit(&log->reload_counter, memory_order_relaxed) !=
atomic_load_explicit(&root->reload_counter, memory_order_relaxed))
{
update_loglevel(log);
}
- return lev <= log->level;
+ return log->level;
}
// Reposition cursor and clear lines for outputting the status line. In certain
diff --git a/common/msg.h b/common/msg.h
index 21228870f4..635a85191f 100644
--- a/common/msg.h
+++ b/common/msg.h
@@ -52,7 +52,12 @@ void mp_msg(struct mp_log *log, int lev, const char *format, ...)
PRINTF_ATTRIBUTE(3, 4);
void mp_msg_va(struct mp_log *log, int lev, const char *format, va_list va);
-bool mp_msg_test(struct mp_log *log, int lev);
+int mp_msg_level(struct mp_log *log);
+
+static inline bool mp_msg_test(struct mp_log *log, int lev)
+{
+ return lev <= mp_msg_level(log);
+}
// Convenience macros.
#define mp_fatal(log, ...) mp_msg(log, MSGL_FATAL, __VA_ARGS__)
diff --git a/video/out/gpu/context.c b/video/out/gpu/context.c
index 85f1aa7667..0a46936708 100644
--- a/video/out/gpu/context.c
+++ b/video/out/gpu/context.c
@@ -50,7 +50,7 @@ extern const struct ra_ctx_fns ra_ctx_vdpauglx;
/* Vulkan */
extern const struct ra_ctx_fns ra_ctx_vulkan_wayland;
-extern const struct ra_ctx_fns ra_ctx_vulkan_win;
+//extern const struct ra_ctx_fns ra_ctx_vulkan_win;
extern const struct ra_ctx_fns ra_ctx_vulkan_xlib;
/* Direct3D 11 */
@@ -105,9 +105,11 @@ static const struct ra_ctx_fns *contexts[] = {
// Vulkan contexts:
#if HAVE_VULKAN
+/*
#if HAVE_WIN32_DESKTOP
&ra_ctx_vulkan_win,
#endif
+*/
#if HAVE_WAYLAND
&ra_ctx_vulkan_wayland,
#endif
diff --git a/video/out/opengl/hwdec_cuda.c b/video/out/opengl/hwdec_cuda.c
index fee1f83f98..2e87593c78 100644
--- a/video/out/opengl/hwdec_cuda.c
+++ b/video/out/opengl/hwdec_cuda.c
@@ -39,9 +39,11 @@
#include "ra_gl.h"
#endif
#if HAVE_VULKAN
+/*
#include "video/out/vulkan/formats.h"
#include "video/out/vulkan/ra_vk.h"
#include "video/out/vulkan/utils.h"
+*/
#endif
#if HAVE_WIN32_DESKTOP
@@ -125,6 +127,8 @@ static int cuda_init(struct ra_hwdec *hw)
#endif
#if HAVE_VULKAN
+ return -1; // TODO: reimplement
+ /*
p->is_vk = ra_vk_get(hw->ra) != NULL;
if (p->is_vk) {
if (!ra_vk_get(hw->ra)->has_ext_external_memory_export) {
@@ -133,6 +137,7 @@ static int cuda_init(struct ra_hwdec *hw)
return -1;
}
}
+ */
#endif
if (!p->is_gl && !p->is_vk) {
@@ -197,6 +202,7 @@ static int cuda_init(struct ra_hwdec *hw)
}
} else if (p->is_vk) {
#if HAVE_VULKAN
+ /*
uint8_t vk_uuid[VK_UUID_SIZE];
struct mpvk_ctx *vk = ra_vk_get(hw->ra);
@@ -236,6 +242,7 @@ static int cuda_init(struct ra_hwdec *hw)
return -1;
p->decode_ctx = p->display_ctx;
+ */
#endif
}
@@ -293,6 +300,7 @@ static void cuda_uninit(struct ra_hwdec *hw)
#define CHECK_CU(x) check_cu((mapper)->owner, (x), #x)
#if HAVE_VULKAN
+/*
static struct ra_buf *cuda_buf_pool_get(struct ra_hwdec_mapper *mapper, int n)
{
struct priv_owner *p_owner = mapper->owner->priv;
@@ -390,6 +398,7 @@ static void cuda_buf_pool_uninit(struct ra_hwdec_mapper *mapper, int n)
}
ra_buf_pool_uninit(mapper->ra, pool);
}
+*/
#endif // HAVE_VULKAN
static int mapper_init(struct ra_hwdec_mapper *mapper)
@@ -497,7 +506,7 @@ static void mapper_uninit(struct ra_hwdec_mapper *mapper)
ra_tex_free(mapper->ra, &mapper->tex[n]);
#if HAVE_VULKAN
- cuda_buf_pool_uninit(mapper, n);
+ //cuda_buf_pool_uninit(mapper, n);
#endif
}
CHECK_CU(cu->cuCtxPopCurrent(&dummy));
diff --git a/video/out/placebo/ra_pl.c b/video/out/placebo/ra_pl.c
new file mode 100644
index 0000000000..334f2f135f
--- /dev/null
+++ b/video/out/placebo/ra_pl.c
@@ -0,0 +1,628 @@
+#include "common/common.h"
+#include "common/msg.h"
+
+#include "ra_pl.h"
+#include "utils.h"
+
+struct ra_pl {
+ const struct pl_gpu *gpu;
+};
+
+static inline const struct pl_gpu *get_gpu(struct ra *ra)
+{
+ struct ra_pl *p = ra->priv;
+ return p->gpu;
+}
+
+static struct ra_fns ra_fns_pl;
+
+struct ra *ra_create_pl(const struct pl_gpu *gpu, struct mp_log *log)
+{
+ assert(gpu);
+
+ struct ra *ra = talloc_zero(NULL, struct ra);
+ ra->log = log;
+ ra->fns = &ra_fns_pl;
+
+ struct ra_pl *p = ra->priv = talloc_zero(ra, struct ra_pl);
+ p->gpu = gpu;
+
+ ra->glsl_version = gpu->glsl.version;
+ ra->glsl_vulkan = gpu->glsl.vulkan;
+ ra->glsl_es = gpu->glsl.gles;
+
+ ra->caps = RA_CAP_DIRECT_UPLOAD | RA_CAP_NESTED_ARRAY | RA_CAP_FRAGCOORD;
+
+ if (gpu->caps & PL_GPU_CAP_COMPUTE)
+ ra->caps |= RA_CAP_COMPUTE | RA_CAP_NUM_GROUPS;
+ if (gpu->caps & PL_GPU_CAP_PARALLEL_COMPUTE)
+ ra->caps |= RA_CAP_PARALLEL_COMPUTE;
+ if (gpu->caps & PL_GPU_CAP_INPUT_VARIABLES)
+ ra->caps |= RA_CAP_GLOBAL_UNIFORM;
+
+ if (gpu->limits.max_tex_1d_dim)
+ ra->caps |= RA_CAP_TEX_1D;
+ if (gpu->limits.max_tex_3d_dim)
+ ra->caps |= RA_CAP_TEX_3D;
+ if (gpu->limits.max_ubo_size)
+ ra->caps |= RA_CAP_BUF_RO;
+ if (gpu->limits.max_ssbo_size)
+ ra->caps |= RA_CAP_BUF_RW;
+ if (gpu->limits.min_gather_offset && gpu->limits.max_gather_offset)
+ ra->caps |= RA_CAP_GATHER;
+
+ // Semi-hack: assume all textures are blittable if r8 is
+ const struct pl_fmt *r8 = pl_find_named_fmt(gpu, "r8");
+ if (r8->caps & PL_FMT_CAP_BLITTABLE)
+ ra->caps |= RA_CAP_BLIT;
+
+ ra->max_texture_wh = gpu->limits.max_tex_2d_dim;
+ ra->max_shmem = gpu->limits.max_shmem_size;
+ ra->max_pushc_size = gpu->limits.max_pushc_size;
+
+ // Set up format wrappers
+ for (int i = 0; i < gpu->num_formats; i++) {
+ const struct pl_fmt *plfmt = gpu->formats[i];
+ static const enum ra_ctype fmt_type_map[PL_FMT_TYPE_COUNT] = {
+ [PL_FMT_UNORM] = RA_CTYPE_UNORM,
+ [PL_FMT_UINT] = RA_CTYPE_UINT,
+ [PL_FMT_FLOAT] = RA_CTYPE_FLOAT,
+ };
+
+ enum ra_ctype type = fmt_type_map[plfmt->type];
+ if (!type || !(plfmt->caps & PL_FMT_CAP_SAMPLEABLE))
+ continue;
+
+ struct ra_format *rafmt = talloc_zero(ra, struct ra_format);
+ *rafmt = (struct ra_format) {
+ .name = plfmt->name,
+ .priv = (void *) plfmt,
+ .ctype = type,
+ .ordered = pl_fmt_is_ordered(plfmt),
+ .num_components = plfmt->num_components,
+ .pixel_size = plfmt->texel_size,
+ .linear_filter = plfmt->caps & PL_FMT_CAP_LINEAR,
+ .renderable = plfmt->caps & PL_FMT_CAP_RENDERABLE,
+ .glsl_format = plfmt->glsl_format,
+ };
+
+ for (int c = 0; c < plfmt->num_components; c++) {
+ rafmt->component_size[c] = plfmt->host_bits[c];
+ rafmt->component_depth[c] = plfmt->component_depth[c];
+ }
+
+ MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, rafmt);
+ }
+
+ return ra;
+}
+
+static void destroy_ra_pl(struct ra *ra)
+{
+ talloc_free(ra);
+}
+
+static struct ra_format *map_fmt(struct ra *ra, const struct pl_fmt *plfmt)
+{
+ for (int i = 0; i < ra->num_formats; i++) {
+ if (ra->formats[i]->priv == plfmt)
+ return ra->formats[i];
+ }
+
+ MP_ERR(ra, "Failed mapping pl_fmt '%s' to ra_fmt?\n", plfmt->name);
+ return NULL;
+}
+
+bool mppl_wrap_tex(struct ra *ra, const struct pl_tex *pltex,
+ struct ra_tex *out_tex)
+{
+ if (!pltex)
+ return false;
+
+ *out_tex = (struct ra_tex) {
+ .params = {
+ .dimensions = pl_tex_params_dimension(pltex->params),
+ .w = pltex->params.w,
+ .h = pltex->params.h,
+ .d = pltex->params.d,
+ .format = map_fmt(ra, pltex->params.format),
+ .render_src = pltex->params.sampleable,
+ .render_dst = pltex->params.renderable,
+ .storage_dst = pltex->params.storable,
+ .blit_src = pltex->params.blit_src,
+ .blit_dst = pltex->params.blit_dst,
+ .host_mutable = pltex->params.host_writable,
+ .downloadable = pltex->params.host_readable,
+ .src_linear = pltex->params.sample_mode == PL_TEX_SAMPLE_LINEAR,
+ .src_repeat = pltex->params.address_mode == PL_TEX_ADDRESS_REPEAT,
+ },
+ .priv = (void *) pltex,
+ };
+
+ return !!out_tex->params.format;
+}
+
+static struct ra_tex *tex_create_pl(struct ra *ra,
+ const struct ra_tex_params *params)
+{
+ const struct pl_gpu *gpu = get_gpu(ra);
+
+ // Check size limits
+ bool ok = false;
+ switch (params->dimensions) {
+ case 1:
+ ok = params->w <= gpu->limits.max_tex_1d_dim;
+ break;
+
+ case 2:
+ ok = params->w <= gpu->limits.max_tex_2d_dim &&
+ params->h <= gpu->limits.max_tex_2d_dim;
+ break;
+
+ case 3:
+ ok = params->w <= gpu->limits.max_tex_2d_dim &&
+ params->h <= gpu->limits.max_tex_2d_dim &&
+ params->d <= gpu->limits.max_tex_2d_dim;
+ break;
+ };
+
+ if (!ok) {
+ MP_ERR(ra, "Texture size %dx%dx%d exceeds dimension limits!\n",
+ params->w, params->h, params->d);
+ return NULL;
+ }
+
+ const struct pl_tex *pltex = pl_tex_create(gpu, &(struct pl_tex_params) {
+ .w = params->w,
+ .h = params->dimensions >= 2 ? params->h : 0,
+ .d = params->dimensions >= 3 ? params->d : 0,
+ .format = params->format->priv,
+ .sampleable = params->render_src,
+ .renderable = params->render_dst,
+ .storable = params->storage_dst,
+ .blit_src = params->blit_src,
+ .blit_dst = params->blit_dst || params->render_dst,
+ .host_writable = params->host_mutable,
+ .host_readable = params->downloadable,
+ .sample_mode = params->src_linear ? PL_TEX_SAMPLE_LINEAR
+ : PL_TEX_SAMPLE_NEAREST,
+ .address_mode = params->src_repeat ? PL_TEX_ADDRESS_REPEAT
+ : PL_TEX_ADDRESS_CLAMP,
+ .initial_data = params->initial_data,
+ });
+
+ struct ra_tex *ratex = talloc_ptrtype(NULL, ratex);
+ if (!mppl_wrap_tex(ra, pltex, ratex)) {
+ pl_tex_destroy(gpu, &pltex);
+ talloc_free(ratex);
+ return NULL;
+ }
+
+ return ratex;
+}
+
+static void tex_destroy_pl(struct ra *ra, struct ra_tex *tex)
+{
+ if (!tex)
+ return;
+
+ pl_tex_destroy(get_gpu(ra), (const struct pl_tex **) &tex->priv);
+ talloc_free(tex);
+}
+
+static int texel_stride_w(size_t stride, const struct pl_tex *tex)
+{
+ size_t texel_size = tex->params.format->texel_size;
+ int texels = stride / texel_size;
+ assert(texels * texel_size == stride);
+ return texels;
+}
+
+static bool tex_upload_pl(struct ra *ra, const struct ra_tex_upload_params *params)
+{
+ const struct pl_tex *tex = params->tex->priv;
+ struct pl_tex_transfer_params pl_params = {
+ .tex = tex,
+ .buf = params->buf ? params->buf->priv : NULL,
+ .buf_offset = params->buf_offset,
+ .ptr = (void *) params->src,
+ };
+
+ if (params->tex->params.dimensions == 2) {
+ pl_params.stride_w = texel_stride_w(params->stride, tex);
+ if (params->rc) {
+ pl_params.rc = (struct pl_rect3d) {
+ .x0 = params->rc->x0, .x1 = params->rc->x1,
+ .y0 = params->rc->y0, .y1 = params->rc->y1,
+ };
+ }
+ }
+
+ return pl_tex_upload(get_gpu(ra), &pl_params);
+}
+
+static bool tex_download_pl(struct ra *ra, struct ra_tex_download_params *params)
+{
+ const struct pl_tex *tex = params->tex->priv;
+ struct pl_tex_transfer_params pl_params = {
+ .tex = tex,
+ .ptr = params->dst,
+ .stride_w = texel_stride_w(params->stride, tex),
+ };
+
+ return pl_tex_download(get_gpu(ra), &pl_params);
+}
+
+static struct ra_buf *buf_create_pl(struct ra *ra,
+ const struct ra_buf_params *params)
+{
+ static const enum pl_buf_type buf_type[] = {
+ [RA_BUF_TYPE_TEX_UPLOAD] = PL_BUF_TEX_TRANSFER,
+ [RA_BUF_TYPE_SHADER_STORAGE] = PL_BUF_STORAGE,
+ [RA_BUF_TYPE_UNIFORM] = PL_BUF_UNIFORM,
+ [RA_BUF_TYPE_SHARED_MEMORY] = 0,
+ };
+
+ const struct pl_gpu *gpu = get_gpu(ra);
+ size_t max_size[] = {
+ [PL_BUF_TEX_TRANSFER] = gpu->limits.max_xfer_size,
+ [PL_BUF_UNIFORM] = gpu->limits.max_ubo_size,
+ [PL_BUF_STORAGE] = gpu->limits.max_ssbo_size,
+ };
+
+ if (params->size > max_size[buf_type[params->type]]) {
+ MP_ERR(ra, "Buffer size %zu exceeds size limits!\n", params->size);
+ return NULL;
+ }
+
+ const struct pl_buf *plbuf = pl_buf_create(gpu, &(struct pl_buf_params) {
+ .type = buf_type[params->type],
+ .size = params->size,
+ .host_mapped = params->host_mapped,
+ .host_writable = params->host_mutable,
+ .initial_data = params->initial_data,
+ });
+
+ if (!plbuf)
+ return NULL;
+
+ struct ra_buf *rabuf = talloc_ptrtype(NULL, rabuf);
+ *rabuf = (struct ra_buf) {
+ .params = *params,
+ .data = plbuf->data,
+ .priv = (void *) plbuf,
+ };
+
+ rabuf->params.initial_data = NULL;
+ return rabuf;
+}
+
+static void buf_destroy_pl(struct ra *ra, struct ra_buf *buf)
+{
+ if (!buf)
+ return;
+
+ pl_buf_destroy(get_gpu(ra), (const struct pl_buf **) &buf->priv);
+ talloc_free(buf);
+}
+
+static void buf_update_pl(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset,
+ const void *data, size_t size)
+{
+ pl_buf_write(get_gpu(ra), buf->priv, offset, data, size);
+}
+
+static bool buf_poll_pl(struct ra *ra, struct ra_buf *buf)
+{
+ return !pl_buf_poll(get_gpu(ra), buf->priv, 0);
+}
+
+static void clear_pl(struct ra *ra, struct ra_tex *dst, float color[4],
+ struct mp_rect *scissor)
+{
+ // TODO: implement scissor clearing by bltting a 1x1 tex instead
+ pl_tex_clear(get_gpu(ra), dst->priv, color);
+}
+
+static void blit_pl(struct ra *ra, struct ra_tex *dst, struct ra_tex *src,
+ struct mp_rect *dst_rc, struct mp_rect *src_rc)
+{
+ struct pl_rect3d plsrc = {0}, pldst = {0};
+ if (src_rc) {
+ plsrc.x0 = MPMIN(MPMAX(src_rc->x0, 0), src->params.w);
+ plsrc.y0 = MPMIN(MPMAX(src_rc->y0, 0), src->params.h);
+ plsrc.x1 = MPMIN(MPMAX(src_rc->x1, 0), src->params.w);
+ plsrc.y1 = MPMIN(MPMAX(src_rc->y1, 0), src->params.h);
+ }
+
+ if (dst_rc) {
+ pldst.x0 = MPMIN(MPMAX(dst_rc->x0, 0), dst->params.w);
+ pldst.y0 = MPMIN(MPMAX(dst_rc->y0, 0), dst->params.h);
+ pldst.x1 = MPMIN(MPMAX(dst_rc->x1, 0), dst->params.w);
+ pldst.y1 = MPMIN(MPMAX(dst_rc->y1, 0), dst->params.h);
+ }
+
+ pl_tex_blit(get_gpu(ra), dst->priv, src->priv, pldst, plsrc);
+}
+
+static const enum pl_var_type var_type[RA_VARTYPE_COUNT] = {
+ [RA_VARTYPE_INT] = PL_VAR_SINT,
+ [RA_VARTYPE_FLOAT] = PL_VAR_FLOAT,
+};
+
+static const enum pl_desc_type desc_type[RA_VARTYPE_COUNT] = {
+ [RA_VARTYPE_TEX] = PL_DESC_SAMPLED_TEX,
+ [RA_VARTYPE_IMG_W] = PL_DESC_STORAGE_IMG,
+ [RA_VARTYPE_BUF_RO] = PL_DESC_BUF_UNIFORM,
+ [RA_VARTYPE_BUF_RW] = PL_DESC_BUF_STORAGE,
+};
+
+static const enum pl_fmt_type fmt_type[RA_VARTYPE_COUNT] = {
+ [RA_VARTYPE_INT] = PL_FMT_SINT,
+ [RA_VARTYPE_FLOAT] = PL_FMT_FLOAT,
+ [RA_VARTYPE_BYTE_UNORM] = PL_FMT_UNORM,
+};
+
+static const size_t var_size[RA_VARTYPE_COUNT] = {
+ [RA_VARTYPE_INT] = sizeof(int),
+ [RA_VARTYPE_FLOAT] = sizeof(float),
+ [RA_VARTYPE_BYTE_UNORM] = sizeof(uint8_t),
+};
+
+static struct ra_layout uniform_layout_pl(struct ra_renderpass_input *inp)
+{
+ // To get the alignment requirements, we try laying this out with
+ // an offset of 1 and then see where it ends up. This will always be
+ // the minimum alignment requirement.
+ struct pl_var_layout layout = pl_buf_uniform_layout(1, &(struct pl_var) {
+ .name = inp->name,
+ .type = var_type[inp->type],
+ .dim_v = inp->dim_v,
+ .dim_m = inp->dim_m,
+ .dim_a = 1,
+ });
+
+ return (struct ra_layout) {
+ .align = layout.offset,
+ .stride = layout.stride,
+ .size = layout.size,
+ };
+}
+
+static struct ra_layout push_constant_layout_pl(struct ra_renderpass_input *inp)
+{
+ struct pl_var_layout layout = pl_push_constant_layout(1, &(struct pl_var) {
+ .name = inp->name,
+ .type = var_type[inp->type],
+ .dim_v = inp->dim_v,
+ .dim_m = inp->dim_m,
+ .dim_a = 1,
+ });
+
+ return (struct ra_layout) {
+ .align = layout.offset,
+ .stride = layout.stride,
+ .size = layout.size,
+ };
+}
+
+static int desc_namespace_pl(struct ra *ra, enum ra_vartype type)
+{
+ return pl_desc_namespace(get_gpu(ra), desc_type[type]);
+}
+
+struct pass_priv {
+ const struct pl_pass *pl_pass;
+ uint16_t *inp_index; // index translation map
+ // Space to hold the descriptor bindings and variable updates
+ struct pl_desc_binding *binds;
+ struct pl_var_update *varups;
+ int num_varups;
+};
+
+static struct ra_renderpass *renderpass_create_pl(struct ra *ra,
+ const struct ra_renderpass_params *params)
+{
+ void *tmp = talloc_new(NULL);
+ const struct pl_gpu *gpu = get_gpu(ra);
+ struct ra_renderpass *pass = NULL;
+
+ static const enum pl_pass_type pass_type[] = {
+ [RA_RENDERPASS_TYPE_RASTER] = PL_PASS_RASTER,
+ [RA_RENDERPASS_TYPE_COMPUTE] = PL_PASS_COMPUTE,
+ };
+
+ struct pl_var *vars = NULL;
+ struct pl_desc *descs = NULL;
+ int num_vars = 0, num_descs = 0;
+
+ struct pass_priv *priv = talloc_ptrtype(tmp, priv);
+ priv->inp_index = talloc_zero_array(priv, uint16_t, params->num_inputs);
+
+ for (int i = 0; i < params->num_inputs; i++) {
+ const struct ra_renderpass_input *inp = &params->inputs[i];
+ if (var_type[inp->type]) {
+ priv->inp_index[i] = num_vars;
+ MP_TARRAY_APPEND(tmp, vars, num_vars, (struct pl_var) {
+ .name = inp->name,
+ .type = var_type[inp->type],
+ .dim_v = inp->dim_v,
+ .dim_m = inp->dim_m,
+ .dim_a = 1,
+ });
+ } else if (desc_type[inp->type]) {
+ priv->inp_index[i] = num_descs;
+ MP_TARRAY_APPEND(tmp, descs, num_descs, (struct pl_desc) {
+ .name = inp->name,
+ .type = desc_type[inp->type],
+ .binding = inp->binding,
+ .access = inp->type == RA_VARTYPE_IMG_W ? PL_DESC_ACCESS_WRITEONLY
+ : inp->type == RA_VARTYPE_BUF_RW ? PL_DESC_ACCESS_READWRITE
+ : PL_DESC_ACCESS_READONLY,
+ });
+ }
+ }
+
+ // Allocate space to store the bindings map persistently
+ priv->binds = talloc_zero_array(priv, struct pl_desc_binding, num_descs);
+
+ struct pl_pass_params pl_params = {
+ .type = pass_type[params->type],
+ .variables = vars,
+ .num_variables = num_vars,
+ .descriptors = descs,
+ .num_descriptors = num_descs,
+ .push_constants_size = params->push_constants_size,
+ .glsl_shader = params->type == RA_RENDERPASS_TYPE_COMPUTE
+ ? params->compute_shader
+ : params->frag_shader,
+ .cached_program = params->cached_program.start,
+ .cached_program_len = params->cached_program.len,
+ };
+
+ struct pl_blend_params blend_params;
+
+ if (params->type == RA_RENDERPASS_TYPE_RASTER) {
+ pl_params.vertex_shader = params->vertex_shader;
+ pl_params.vertex_type = PL_PRIM_TRIANGLE_LIST;
+ pl_params.vertex_stride = params->vertex_stride;
+ pl_params.target_dummy.params.format = params->target_format->priv;
+ pl_params.load_target = !params->invalidate_target;
+
+ if (params->enable_blend) {
+ pl_params.blend_params = &blend_params;
+ blend_params = (struct pl_blend_params) {
+ // Same enum order as ra_blend
+ .src_rgb = (enum ra_blend) params->blend_src_rgb,
+ .dst_rgb = (enum ra_blend) params->blend_dst_rgb,
+ .src_alpha = (enum ra_blend) params->blend_src_alpha,
+ .dst_alpha = (enum ra_blend) params->blend_dst_alpha,
+ };
+ }
+
+ for (int i = 0; i < params->num_vertex_attribs; i++) {
+ const struct ra_renderpass_input *inp = &params->vertex_attribs[i];
+ struct pl_vertex_attrib attrib = {
+ .name = inp->name,
+ .offset = inp->offset,
+ .location = i,
+ .fmt = pl_find_fmt(gpu, fmt_type[inp->type], inp->dim_v, 0,
+ var_size[inp->type] * 8, PL_FMT_CAP_VERTEX),
+ };
+
+ if (!attrib.fmt) {
+ MP_ERR(ra, "Failed mapping vertex attrib '%s' to pl_fmt?\n",
+ inp->name);
+ goto error;
+ }
+
+ MP_TARRAY_APPEND(tmp, pl_params.vertex_attribs,
+ pl_params.num_vertex_attribs, attrib);
+ }
+ }
+
+ priv->pl_pass = pl_pass_create(gpu, &pl_params);
+ if (!priv->pl_pass)
+ goto error;
+
+ pass = talloc_ptrtype(NULL, pass);
+ *pass = (struct ra_renderpass) {
+ .params = *ra_renderpass_params_copy(pass, params),
+ .priv = talloc_steal(pass, priv),
+ };
+
+ pass->params.cached_program = (struct bstr) {
+ .start = (void *) priv->pl_pass->params.cached_program,
+ .len = priv->pl_pass->params.cached_program_len,
+ };
+
+ // fall through
+error:
+ talloc_free(tmp);
+ return pass;
+}
+
+static void renderpass_destroy_pl(struct ra *ra, struct ra_renderpass *pass)
+{
+ if (!pass)
+ return;
+
+ struct pass_priv *priv = pass->priv;
+ pl_pass_destroy(get_gpu(ra), (const struct pl_pass **) &priv->pl_pass);
+ talloc_free(pass);
+}
+
+static void renderpass_run_pl(struct ra *ra,
+ const struct ra_renderpass_run_params *params)
+{
+ struct pass_priv *p = params->pass->priv;
+ p->num_varups = 0;
+
+ for (int i = 0; i < params->num_values; i++) {
+ const struct ra_renderpass_input_val *val = &params->values[i];
+ const struct ra_renderpass_input *inp = &params->pass->params.inputs[i];
+ if (var_type[inp->type]) {
+ MP_TARRAY_APPEND(p, p->varups, p->num_varups, (struct pl_var_update) {
+ .index = p->inp_index[val->index],
+ .data = val->data,
+ });
+ } else {
+ struct pl_desc_binding bind;
+ switch (inp->type) {
+ case RA_VARTYPE_TEX:
+ case RA_VARTYPE_IMG_W:
+ bind.object = (* (struct ra_tex **) val->data)->priv;
+ break;
+ case RA_VARTYPE_BUF_RO:
+ case RA_VARTYPE_BUF_RW:
+ bind.object = (* (struct ra_buf **) val->data)->priv;
+ break;
+ default: abort();
+ };
+
+ p->binds[p->inp_index[val->index]] = bind;
+ };
+ }
+
+ struct pl_pass_run_params pl_params = {
+ .pass = p->pl_pass,
+ .var_updates = p->varups,
+ .num_var_updates = p->num_varups,
+ .desc_bindings = p->binds,
+ .push_constants = params->push_constants,
+ };
+
+ if (p->pl_pass->params.type == PL_PASS_RASTER) {
+ pl_params.target = params->target->priv;
+ pl_params.viewport = mp_rect2d_to_pl(params->viewport);
+ pl_params.scissors = mp_rect2d_to_pl(params->scissors);
+ pl_params.vertex_data = params->vertex_data;
+ pl_params.vertex_count = params->vertex_count;
+ } else {
+ for (int i = 0; i < MP_ARRAY_SIZE(pl_params.compute_groups); i++)
+ pl_params.compute_groups[i] = params->compute_groups[i];
+ }
+
+ pl_pass_run(get_gpu(ra), &pl_params);
+}
+
+static struct ra_fns ra_fns_pl = {
+ .destroy = destroy_ra_pl,
+ .tex_create = tex_create_pl,
+ .tex_destroy = tex_destroy_pl,
+ .tex_upload = tex_upload_pl,
+ .tex_download = tex_download_pl,
+ .buf_create = buf_create_pl,
+ .buf_destroy = buf_destroy_pl,
+ .buf_update = buf_update_pl,
+ .buf_poll = buf_poll_pl,
+ .clear = clear_pl,
+ .blit = blit_pl,
+ .uniform_layout = uniform_layout_pl,
+ .push_constant_layout = push_constant_layout_pl,
+ .desc_namespace = desc_namespace_pl,
+ .renderpass_create = renderpass_create_pl,
+ .renderpass_destroy = renderpass_destroy_pl,
+ .renderpass_run = renderpass_run_pl,
+};
+
diff --git a/video/out/placebo/ra_pl.h b/video/out/placebo/ra_pl.h
new file mode 100644
index 0000000000..a342892963
--- /dev/null
+++ b/video/out/placebo/ra_pl.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include "video/out/gpu/ra.h"
+#include <libplacebo/gpu.h>
+
+struct ra *ra_create_pl(const struct pl_gpu *gpu, struct mp_log *log);
+
+// Wrap a pl_tex into a ra_tex struct, returns if successful
+bool mppl_wrap_tex(struct ra *ra, const struct pl_tex *pltex,
+ struct ra_tex *out_tex);
diff --git a/video/out/placebo/utils.c b/video/out/placebo/utils.c
new file mode 100644
index 0000000000..79f313872c
--- /dev/null
+++ b/video/out/placebo/utils.c
@@ -0,0 +1,62 @@
+#include "common/common.h"
+#include "utils.h"
+
+static const int pl_log_to_msg_lev[PL_LOG_ALL+1] = {
+ [PL_LOG_FATAL] = MSGL_FATAL,
+ [PL_LOG_ERR] = MSGL_ERR,
+ [PL_LOG_WARN] = MSGL_WARN,
+ [PL_LOG_INFO] = MSGL_V,
+ [PL_LOG_DEBUG] = MSGL_DEBUG,
+ [PL_LOG_TRACE] = MSGL_TRACE,
+};
+
+static const enum pl_log_level msg_lev_to_pl_log[MSGL_MAX+1] = {
+ [MSGL_FATAL] = PL_LOG_FATAL,
+ [MSGL_ERR] = PL_LOG_ERR,
+ [MSGL_WARN] = PL_LOG_WARN,
+ [MSGL_INFO] = PL_LOG_WARN,
+ [MSGL_STATUS] = PL_LOG_WARN,
+ [MSGL_V] = PL_LOG_INFO,
+ [MSGL_DEBUG] = PL_LOG_DEBUG,
+ [MSGL_TRACE] = PL_LOG_TRACE,
+ [MSGL_MAX] = PL_LOG_ALL,
+};
+
+// translates log levels while probing
+static const enum pl_log_level probing_map(enum pl_log_level level)
+{
+ switch (level) {
+ case PL_LOG_FATAL:
+ return PL_LOG_ERR;
+
+ case PL_LOG_ERR:
+ case PL_LOG_WARN:
+ return PL_LOG_INFO;
+
+ default:
+ return level;
+ }
+}
+
+static void log_cb(void *priv, enum pl_log_level level, const char *msg)
+{
+ struct mp_log *log = priv;
+ mp_msg(log, pl_log_to_msg_lev[level], "%s\n", msg);
+}
+
+static void log_cb_probing(void *priv, enum pl_log_level level, const char *msg)
+{
+ struct mp_log *log = priv;
+ mp_msg(log, pl_log_to_msg_lev[probing_map(level)], "%s\n", msg);
+}
+
+void mppl_ctx_set_log(struct pl_context *ctx, struct mp_log *log, bool probing)
+{
+ assert(log);
+
+ pl_context_update(ctx, &(struct pl_context_params) {
+ .log_cb = probing ? log_cb_probing : log_cb,
+ .log_level = msg_lev_to_pl_log[mp_msg_level(log)],
+ .log_priv = log,
+ });
+}
diff --git a/video/out/placebo/utils.h b/video/out/placebo/utils.h
new file mode 100644
index 0000000000..03bcb0f252
--- /dev/null
+++ b/video/out/placebo/utils.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include "common/common.h"
+#include "common/msg.h"
+
+#include <libplacebo/common.h>
+
+void mppl_ctx_set_log(struct pl_context *ctx, struct mp_log *log, bool probing);
+
+static inline struct pl_rect2d mp_rect2d_to_pl(struct mp_rect rc)
+{
+ return (struct pl_rect2d) {
+ .x0 = rc.x0,
+ .y0 = rc.y0,
+ .x1 = rc.x1,
+ .y1 = rc.y1,
+ };
+}
diff --git a/video/out/vulkan/common.h b/video/out/vulkan/common.h
index a4284f9055..b085fb462c 100644
--- a/video/out/vulkan/common.h
+++ b/video/out/vulkan/common.h
@@ -23,58 +23,14 @@
#define VK_USE_PLATFORM_WIN32_KHR
#endif
-#include <vulkan/vulkan.h>
-
-// Vulkan allows the optional use of a custom allocator. We don't need one but
-// mark this parameter with a better name in case we ever decide to change this
-// in the future. (And to make the code more readable)
-#define MPVK_ALLOCATOR NULL
-
-// A lot of things depend on streaming resources across frames. Depending on
-// how many frames we render ahead of time, we need to pick enough to avoid
-// any conflicts, so make all of these tunable relative to this constant in
-// order to centralize them.
-#define MPVK_MAX_STREAMING_DEPTH 8
+#include <libplacebo/vulkan.h>
// Shared struct used to hold vulkan context information
struct mpvk_ctx {
- struct mp_log *log;
- VkInstance inst;
- VkPhysicalDevice physd;
- VkDebugReportCallbackEXT dbg;
- VkDevice dev;
-
- // Surface, must be initialized fter the context itself
- VkSurfaceKHR surf;
- VkSurfaceFormatKHR surf_format; // picked at surface initialization time
-
- struct vk_malloc *alloc; // memory allocator for this device
- struct spirv_compiler *spirv; // GLSL -> SPIR-V compiler
- struct vk_cmdpool **pools; // command pools (one per queue family)
- int num_pools;
- struct vk_cmd *last_cmd; // most recently submitted command
-
- // Queued/pending commands. These are shared for the entire mpvk_ctx to
- // ensure submission and callbacks are FIFO
- struct vk_cmd **cmds_queued; // recorded but not yet submitted
- struct vk_cmd **cmds_pending; // submitted but not completed
- int num_cmds_queued;
- int num_cmds_pending;
-
- // Pointers into *pools
- struct vk_cmdpool *pool_graphics; // required
- struct vk_cmdpool *pool_compute; // optional
- struct vk_cmdpool *pool_transfer; // optional
-
- // Common pool of signals, to avoid having to re-create these objects often
- struct vk_signal **signals;
- int num_signals;
-
- // Cached capabilities
- VkPhysicalDeviceLimits limits;
- VkPhysicalDeviceFeatures features;
-
- // Extension availability
- bool has_ext_external_memory;
- bool has_ext_external_memory_export;
+ struct mp_log *pl_log;
+ struct pl_context *ctx;
+ const struct pl_vk_inst *vkinst;
+ const struct pl_vulkan *vulkan;
+ const struct pl_gpu *gpu; // points to vulkan->gpu for convenience
+ VkSurfaceKHR surface;
};
diff --git a/video/out/vulkan/context.c b/video/out/vulkan/context.c
index 29a2c9b727..c05a5ac209 100644
--- a/video/out/vulkan/context.c
+++ b/video/out/vulkan/context.c
@@ -16,25 +16,17 @@
*/
#include "options/m_config.h"
-#include "video/out/gpu/spirv.h"
+#include "video/out/placebo/ra_pl.h"
#include "context.h"
-#include "ra_vk.h"
#include "utils.h"
-enum {
- SWAP_AUTO = 0,
- SWAP_FIFO,
- SWAP_FIFO_RELAXED,
- SWAP_MAILBOX,
- SWAP_IMMEDIATE,
- SWAP_COUNT,
-};
-
struct vulkan_opts {
- struct mpvk_device_opts dev_opts; // logical device options
char *device; // force a specific GPU
int swap_mode;
+ int queue_count;
+ int async_transfer;
+ int async_compute;
};
static int vk_validate_dev(struct mp_log *log, const struct m_option *opt,
@@ -52,7 +44,7 @@ static int vk_validate_dev(struct mp_log *log, const struct m_option *opt,
VkPhysicalDevice *devices = NULL;
uint32_t num = 0;
- res = vkCreateInstance(&info, MPVK_ALLOCATOR, &inst);
+ res = vkCreateInstance(&info, NULL, &inst);
if (res != VK_SUCCESS)
goto done;
@@ -97,45 +89,30 @@ const struct m_sub_options vulkan_conf = {
.opts = (const struct m_option[]) {
OPT_STRING_VALIDATE("vulkan-device", device, 0, vk_validate_dev),
OPT_CHOICE("vulkan-swap-mode", swap_mode, 0,
- ({"auto", SWAP_AUTO},
- {"fifo", SWAP_FIFO},
- {"fifo-relaxed", SWAP_FIFO_RELAXED},
- {"mailbox", SWAP_MAILBOX},
- {"immediate", SWAP_IMMEDIATE})),
- OPT_INTRANGE("vulkan-queue-count", dev_opts.queue_count, 0, 1, 8,
- OPTDEF_INT(1)),
- OPT_FLAG("vulkan-async-transfer", dev_opts.async_transfer, 0),
- OPT_FLAG("vulkan-async-compute", dev_opts.async_compute, 0),
+ ({"auto", -1},
+ {"fifo", VK_PRESENT_MODE_FIFO_KHR},
+ {"fifo-relaxed", VK_PRESENT_MODE_FIFO_RELAXED_KHR},
+ {"mailbox", VK_PRESENT_MODE_MAILBOX_KHR},
+ {"immediate", VK_PRESENT_MODE_IMMEDIATE_KHR})),
+ OPT_INTRANGE("vulkan-queue-count", queue_count, 0, 1, 8),
+ OPT_FLAG("vulkan-async-transfer", async_transfer, 0),
+ OPT_FLAG("vulkan-async-compute", async_compute, 0),
{0}
},
.size = sizeof(struct vulkan_opts),
.defaults = &(struct vulkan_opts) {
- .dev_opts = {
- .async_transfer = 1,
- },
+ .swap_mode = -1,
+ .queue_count = 1,
+ .async_transfer = true,
+ .async_compute = true,
},
};
struct priv {
struct mpvk_ctx *vk;
struct vulkan_opts *opts;
- // Swapchain metadata:
- int w, h; // current size
- VkSwapchainCreateInfoKHR protoInfo; // partially filled-in prototype
- VkSwapchainKHR swapchain;
- VkSwapchainKHR old_swapchain;
- int frames_in_flight;
- // state of the images:
- struct ra_tex **images; // ra_tex wrappers for the vkimages
- int num_images; // size of images
- VkSemaphore *sems_in; // pool of semaphores used to synchronize images
- VkSemaphore *sems_out; // outgoing semaphores (rendering complete)
- int num_sems;
- int idx_sems; // index of next free semaphore pair
- int last_imgidx; // the image index last acquired (for submit)
-
- // This is used to pre-fetch the next frame at the end of swap_buffers
- struct ra_fbo queued_fbo;
+ const struct pl_swapchain *swapchain;
+ struct ra_tex proxy_tex;
};
static const struct ra_swapchain_fns vulkan_swapchain;
@@ -149,133 +126,26 @@ struct mpvk_ctx *ra_vk_ctx_get(struct ra_ctx *ctx)
return p->vk;
}
-static bool update_swapchain_info(struct priv *p,
- VkSwapchainCreateInfoKHR *info)
-{
- struct mpvk_ctx *vk = p->vk;
-
- // Query the supported capabilities and update this struct as needed
- VkSurfaceCapabilitiesKHR caps;
- VK(vkGetPhysicalDeviceSurfaceCapabilitiesKHR(vk->physd, vk->surf, &caps));
-
- // Sorted by preference
- static const VkCompositeAlphaFlagsKHR alphaModes[] = {
- VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR,
- VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR,
- };
-
- for (int i = 0; i < MP_ARRAY_SIZE(alphaModes); i++) {
- if (caps.supportedCompositeAlpha & alphaModes[i]) {
- info->compositeAlpha = alphaModes[i];
- break;
- }
- }
-
- if (!info->compositeAlpha) {
- MP_ERR(vk, "Failed picking alpha compositing mode (caps: 0x%x)\n",
- caps.supportedCompositeAlpha);
- goto error;
- }
-
- static const VkSurfaceTransformFlagsKHR rotModes[] = {
- VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR,
- VK_SURFACE_TRANSFORM_INHERIT_BIT_KHR,
- };
-
- for (int i = 0; i < MP_ARRAY_SIZE(rotModes); i++) {
- if (caps.supportedTransforms & rotModes[i]) {
- info->preTransform = rotModes[i];
- break;
- }
- }
-
- if (!info->preTransform) {
- MP_ERR(vk, "Failed picking surface transform mode (caps: 0x%x)\n",
- caps.supportedTransforms);
- goto error;
- }
-
- // Image count as required
- MP_VERBOSE(vk, "Requested image count: %d (min %d max %d)\n",
- (int)info->minImageCount, (int)caps.minImageCount,
- (int)caps.maxImageCount);
-
- info->minImageCount = MPMAX(info->minImageCount, caps.minImageCount);
- if (caps.maxImageCount)
- info->minImageCount = MPMIN(info->minImageCount, caps.maxImageCount);
-
- // Check the extent against the allowed parameters
- if (caps.currentExtent.width != info->imageExtent.width &&
- caps.currentExtent.width != 0xFFFFFFFF)
- {
- MP_WARN(vk, "Requested width %d does not match current width %d\n",
- (int)info->imageExtent.width, (int)caps.currentExtent.width);
- info->imageExtent.width = caps.currentExtent.width;
- }
-
- if (caps.currentExtent.height != info->imageExtent.height &&
- caps.currentExtent.height != 0xFFFFFFFF)
- {
- MP_WARN(vk, "Requested height %d does not match current height %d\n",
- (int)info->imageExtent.height, (int)caps.currentExtent.height);
- info->imageExtent.height = caps.currentExtent.height;
- }
-
- if (caps.minImageExtent.width > info->imageExtent.width ||
- caps.minImageExtent.height > info->imageExtent.height)
- {
- MP_ERR(vk, "Requested size %dx%d smaller than device minimum %d%d\n",
- (int)info->imageExtent.width, (int)info->imageExtent.height,
- (int)caps.minImageExtent.width, (int)caps.minImageExtent.height);
- goto error;
- }
-
- if (caps.maxImageExtent.width < info->imageExtent.width ||
- caps.maxImageExtent.height < info->imageExtent.height)
- {
- MP_ERR(vk, "Requested size %dx%d larger than device maximum %d%d\n",
- (int)info->imageExtent.width, (int)info->imageExtent.height,
- (int)caps.maxImageExtent.width, (int)caps.maxImageExtent.height);
- goto error;
- }
-
- // We just request whatever usage we can, and let the ra_vk decide what
- // ra_tex_params that translates to. This makes the images as flexible
- // as possible.
- info->imageUsage = caps.supportedUsageFlags;
- return true;
-
-error:
- return false;
-}
-
void ra_vk_ctx_uninit(struct ra_ctx *ctx)
{
- if (ctx->ra) {
- struct priv *p = ctx->swapchain->priv;
- struct mpvk_ctx *vk = p->vk;
-
- mpvk_flush_commands(vk);
- mpvk_poll_commands(vk, UINT64_MAX);
+ if (!ctx->swapchain)
+ return;
- for (int i = 0; i < p->num_images; i++)
- ra_tex_free(ctx->ra, &p->images[i]);
- for (int i = 0; i < p->num_sems; i++) {
- vkDestroySemaphore(vk->dev, p->sems_in[i], MPVK_ALLOCATOR);
- vkDestroySemaphore(vk->dev, p->sems_out[i], MPVK_ALLOCATOR);
- }
+ struct priv *p = ctx->swapchain->priv;
+ struct mpvk_ctx *vk = p->vk;
- vkDestroySwapchainKHR(vk->dev, p->swapchain, MPVK_ALLOCATOR);
+ if (ctx->ra) {
+ pl_gpu_finish(vk->gpu);
+ pl_swapchain_destroy(&p->swapchain);
ctx->ra->fns->destroy(ctx->ra);
ctx->ra = NULL;
}
- talloc_free(ctx->swapchain);
- ctx->swapchain = NULL;
+ vk->gpu = NULL;
+ pl_vulkan_destroy(&vk->vulkan);
+ TA_FREEP(&ctx->swapchain);
}
-static const struct ra_swapchain_fns vulkan_swapchain;
-
bool ra_vk_ctx_init(struct ra_ctx *ctx, struct mpvk_ctx *vk,
VkPresentModeKHR preferred_mode)
{
@@ -287,56 +157,36 @@ bool ra_vk_ctx_init(struct ra_ctx *ctx, struct mpvk_ctx *vk,
p->vk = vk;
p->opts = mp_get_config_group(p, ctx->global, &vulkan_conf);
- if (!mpvk_find_phys_device(vk, p->opts->device, ctx->opts.allow_sw))
- goto error;
- if (!spirv_compiler_init(ctx))
- goto error;
- vk->spirv = ctx->spirv;
- if (!mpvk_pick_surface_format(vk))
- goto error;
- if (!mpvk_device_init(vk, p->opts->dev_opts))
+ assert(vk->ctx);
+ assert(vk->vkinst);
+ vk->vulkan = pl_vulkan_create(vk->ctx, &(struct pl_vulkan_params) {
+ .instance = vk->vkinst->instance,
+ .surface = vk->surface,
+ .async_transfer = p->opts->async_transfer,
+ .async_compute = p->opts->async_compute,
+ .queue_count = p->opts->queue_count,
+ });
+ if (!vk->vulkan)
goto error;
- ctx->ra = ra_create_vk(vk, ctx->log);
+ vk->gpu = vk->vulkan->gpu;
+ ctx->ra = ra_create_pl(vk->gpu, ctx->log);
if (!ctx->ra)
goto error;
- static const VkPresentModeKHR present_modes[SWAP_COUNT] = {
- [SWAP_FIFO] = VK_PRESENT_MODE_FIFO_KHR,
- [SWAP_FIFO_RELAXED] = VK_PRESENT_MODE_FIFO_RELAXED_KHR,
- [SWAP_MAILBOX] = VK_PRESENT_MODE_MAILBOX_KHR,
- [SWAP_IMMEDIATE] = VK_PRESENT_MODE_IMMEDIATE_KHR,
+ // Create the swapchain
+ struct pl_vulkan_swapchain_params params = {
+ .surface = vk->surface,
+ .present_mode = preferred_mode,
+ .swapchain_depth = ctx->opts.swapchain_depth,
};
- p->protoInfo = (VkSwapchainCreateInfoKHR) {
- .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
- .surface = vk->surf,
- .imageFormat = vk->surf_format.format,
- .imageColorSpace = vk->surf_format.colorSpace,
- .imageArrayLayers = 1, // non-stereoscopic
- .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE,
- .minImageCount = ctx->opts.swapchain_depth + 1, // +1 for FB
- .presentMode = p->opts->swap_mode ? present_modes[p->opts->swap_mode]
- : preferred_mode,
- .clipped = true,
- };
+ if (p->opts->swap_mode >= 0) // user override
+ params.present_mode = p->opts->swap_mode;
- // Make sure the swapchain present mode is supported
- int num_modes;
- VK(vkGetPhysicalDeviceSurfacePresentModesKHR(vk->physd, vk->surf,
- &num_modes, NULL));
- VkPresentModeKHR *modes = talloc_array(NULL, VkPresentModeKHR, num_modes);
- VK(vkGetPhysicalDeviceSurfacePresentModesKHR(vk->physd, vk->surf,
- &num_modes, modes));
- bool supported = false;
- for (int i = 0; i < num_modes; i++)
- supported |= (modes[i] == p->protoInfo.presentMode);
- talloc_free(modes);
-
- if (!supported) {
- MP_ERR(ctx, "Requested swap mode unsupported by this device!\n");
+ p->swapchain = pl_vulkan_create_swapchain(vk->vulkan, &params);
+ if (!p->swapchain)
goto error;
- }
return true;
@@ -345,245 +195,49 @@ error:
return false;
}
-static void destroy_swapchain(struct mpvk_ctx *vk, struct priv *p)
+bool ra_vk_ctx_resize(struct ra_ctx *ctx, int width, int height)
{
- assert(p->old_swapchain);
- vkDestroySwapchainKHR(vk->dev, p->old_swapchain, MPVK_ALLOCATOR);
- p->old_swapchain = NULL;
-}
-
-bool ra_vk_ctx_resize(struct ra_swapchain *sw, int w, int h)
-{
- struct priv *p = sw->priv;
- if (w == p->w && h == p->h)
- return true;
-
- struct ra *ra = sw->ctx->ra;
- struct mpvk_ctx *vk = p->vk;
- VkImage *vkimages = NULL;
-
- // It's invalid to trigger another swapchain recreation while there's
- // more than one swapchain already active, so we need to flush any pending
- // asynchronous swapchain release operations that may be ongoing.
- while (p->old_swapchain)
- mpvk_poll_commands(vk, 100000); // 100μs
-
- VkSwapchainCreateInfoKHR sinfo = p->protoInfo;
- sinfo.imageExtent = (VkExtent2D){ w, h };
- sinfo.oldSwapchain = p->swapchain;
-
- if (!update_swapchain_info(p, &sinfo))
- goto error;
-
- VK(vkCreateSwapchainKHR(vk->dev, &sinfo, MPVK_ALLOCATOR, &p->swapchain));
- p->w = w;
- p->h = h;
-
- // Freeing the old swapchain while it's still in use is an error, so do
- // it asynchronously once the device is idle.
- if (sinfo.oldSwapchain) {
- p->old_swapchain = sinfo.oldSwapchain;
- vk_dev_callback(vk, (vk_cb) destroy_swapchain, vk, p);
- }
-
- // Get the new swapchain images
- int num;
- VK(vkGetSwapchainImagesKHR(vk->dev, p->swapchain, &num, NULL));
- vkimages = talloc_array(NULL, VkImage, num);
- VK(vkGetSwapchainImagesKHR(vk->dev, p->swapchain, &num, vkimages));
-
- // If needed, allocate some more semaphores
- while (num > p->num_sems) {
- VkSemaphore sem_in, sem_out;
- static const VkSemaphoreCreateInfo seminfo = {
- .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
- };
- VK(vkCreateSemaphore(vk->dev, &seminfo, MPVK_ALLOCATOR, &sem_in));
- VK(vkCreateSemaphore(vk->dev, &seminfo, MPVK_ALLOCATOR, &sem_out));
-
- int idx = p->num_sems++;
- MP_TARRAY_GROW(p, p->sems_in, idx);
- MP_TARRAY_GROW(p, p->sems_out, idx);
- p->sems_in[idx] = sem_in;
- p->sems_out[idx] = sem_out;
- }
-
- // Invalidate the queued texture
- p->queued_fbo = (struct ra_fbo) {0};
-
- // Recreate the ra_tex wrappers
- for (int i = 0; i < p->num_images; i++)
- ra_tex_free(ra, &p->images[i]);
-
- p->num_images = num;
- MP_TARRAY_GROW(p, p->images, p->num_images);
- for (int i = 0; i < num; i++) {
- p->images[i] = ra_vk_wrap_swapchain_img(ra, vkimages[i], sinfo);
- if (!p->images[i])
- goto error;
- }
+ struct priv *p = ctx->swapchain->priv;
- talloc_free(vkimages);
- return true;
+ bool ok = pl_swapchain_resize(p->swapchain, &width, &height);
+ ctx->vo->dwidth = width;
+ ctx->vo->dheight = height;
-error:
- talloc_free(vkimages);
- vkDestroySwapchainKHR(vk->dev, p->swapchain, MPVK_ALLOCATOR);
- p->swapchain = NULL;
- return false;
+ return ok;
}
static int color_depth(struct ra_swapchain *sw)
{
- struct priv *p = sw->priv;
- int bits = 0;
-
- if (!p->num_images)
- return bits;
-
- // The channel with the most bits is probably the most authoritative about
- // the actual color information (consider e.g. a2bgr10). Slight downside
- // in that it results in rounding r/b for e.g. rgb565, but we don't pick
- // surfaces with fewer than 8 bits anyway.
- const struct ra_format *fmt = p->images[0]->params.format;
- for (int i = 0; i < fmt->num_components; i++) {
- int depth = fmt->component_depth[i];
- bits = MPMAX(bits, depth ? depth : fmt->component_size[i]);
- }
-
- return bits;
+ return 0; // TODO: implement this somehow?
}
static bool start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo)
{
struct priv *p = sw->priv;
- struct mpvk_ctx *vk = p->vk;
- if (!p->swapchain)
+ struct pl_swapchain_frame frame;
+ if (!pl_swapchain_start_frame(p->swapchain, &frame))
+ return false;
+ if (!mppl_wrap_tex(sw->ctx->ra, frame.fbo, &p->proxy_tex))
return false;
- if (p->queued_fbo.tex) {
- assert(out_fbo != &p->queued_fbo);
- *out_fbo = p->queued_fbo;
- p->queued_fbo = (struct ra_fbo) {0};
- return true;
- }
-
- VkSemaphore sem_in = p->sems_in[p->idx_sems];
- MP_TRACE(vk, "vkAcquireNextImageKHR signals %p\n", (void *)sem_in);
-
- for (int attempts = 0; attempts < 2; attempts++) {
- uint32_t imgidx = 0;
- VkResult res = vkAcquireNextImageKHR(vk->dev, p->swapchain, UINT64_MAX,
- sem_in, NULL, &imgidx);
-
- switch (res) {
- case VK_SUCCESS:
- p->last_imgidx = imgidx;
- *out_fbo = (struct ra_fbo) {
- .tex = p->images[imgidx],
- .flip = false,
- };
- ra_tex_vk_external_dep(sw->ctx->ra, out_fbo->tex, sem_in);
- return true;
-
- case VK_ERROR_OUT_OF_DATE_KHR: {
- // In these cases try recreating the swapchain
- int w = p->w, h = p->h;
- p->w = p->h = 0; // invalidate the current state
- if (!ra_vk_ctx_resize(sw, w, h))
- return false;
- continue;
- }
-
- default:
- MP_ERR(vk, "Failed acquiring swapchain image: %s\n", vk_err(res));
- return false;
- }
- }
-
- // If we've exhausted the number of attempts to recreate the swapchain,
- // just give up silently.
- return false;
-}
+ *out_fbo = (struct ra_fbo) {
+ .tex = &p->proxy_tex,
+ .flip = frame.flipped,
+ };
-static void present_cb(struct priv *p, void *arg)
-{
- p->frames_in_flight--;
+ return true;
}
static bool submit_frame(struct ra_swapchain *sw, const struct vo_frame *frame)
{
struct priv *p = sw->priv;
- struct ra *ra = sw->ctx->ra;
- struct mpvk_ctx *vk = p->vk;
- if (!p->swapchain)
- return false;
-
- struct vk_cmd *cmd = ra_vk_submit(ra, p->images[p->last_imgidx]);
- if (!cmd)
- return false;
-
- VkSemaphore sem_out = p->sems_out[p->idx_sems++];
- p->idx_sems %= p->num_sems;
- vk_cmd_sig(cmd, sem_out);
-
- p->frames_in_flight++;
- vk_cmd_callback(cmd, (vk_cb) present_cb, p, NULL);
-
- vk_cmd_queue(vk, cmd);
- if (!mpvk_flush_commands(vk))
- return false;
-
- // Submit to the same queue that we were currently rendering to
- struct vk_cmdpool *pool_gfx = vk->pool_graphics;
- VkQueue queue = pool_gfx->queues[pool_gfx->idx_queues];
-
- // Rotate the queues to ensure good parallelism across frames
- for (int i = 0; i < vk->num_pools; i++) {
- struct vk_cmdpool *pool = vk->pools[i];
- pool->idx_queues = (pool->idx_queues + 1) % pool->num_queues;
- }
-
- VkPresentInfoKHR pinfo = {
- .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
- .waitSemaphoreCount = 1,
- .pWaitSemaphores = &sem_out,
- .swapchainCount = 1,
- .pSwapchains = &p->swapchain,
- .pImageIndices = &p->last_imgidx,
- };
-
- MP_TRACE(vk, "vkQueuePresentKHR waits on %p\n", (void *)sem_out);
- VkResult res = vkQueuePresentKHR(queue, &pinfo);
- switch (res) {
- case VK_SUCCESS:
- case VK_SUBOPTIMAL_KHR:
- return true;
-
- case VK_ERROR_OUT_OF_DATE_KHR:
- // We can silently ignore this error, since the next start_frame will
- // recreate the swapchain automatically.
- return true;
-
- default:
- MP_ERR(vk, "Failed presenting to queue %p: %s\n", (void *)queue,
- vk_err(res));
- return false;
- }
+ return pl_swapchain_submit_frame(p->swapchain);
}
static void swap_buffers(struct ra_swapchain *sw)
{
struct priv *p = sw->priv;
-
- while (p->frames_in_flight >= sw->ctx->opts.swapchain_depth)
- mpvk_poll_commands(p->vk, 100000); // 100μs
-
- // Also try and block until the next hardware buffer swap early. this
- // prevents start_frame from blocking later, thus slightly improving the
- // frame timing stats. (since mpv assumes most blocking will happen in
- // swap_buffers)
- start_frame(sw, &p->queued_fbo);
+ pl_swapchain_swap_buffers(p->swapchain);
}
static const struct ra_swapchain_fns vulkan_swapchain = {
diff --git a/video/out/vulkan/context.h b/video/out/vulkan/context.h
index a64d39f125..30c97cfb4f 100644
--- a/video/out/vulkan/context.h
+++ b/video/out/vulkan/context.h
@@ -7,7 +7,9 @@
void ra_vk_ctx_uninit(struct ra_ctx *ctx);
bool ra_vk_ctx_init(struct ra_ctx *ctx, struct mpvk_ctx *vk,
VkPresentModeKHR preferred_mode);
-bool ra_vk_ctx_resize(struct ra_swapchain *sw, int w, int h);
+
+// Handles a resize request, and updates ctx->vo->dwidth/dheight
+bool ra_vk_ctx_resize(struct ra_ctx *ctx, int width, int height);
// May be called on a ra_ctx of any type.
struct mpvk_ctx *ra_vk_ctx_get(struct ra_ctx *ctx);
diff --git a/video/out/vulkan/context_wayland.c b/video/out/vulkan/context_wayland.c
index 7276775242..f1091a534f 100644
--- a/video/out/vulkan/context_wayland.c
+++ b/video/out/vulkan/context_wayland.c
@@ -41,8 +41,7 @@ static bool wayland_vk_init(struct ra_ctx *ctx)
struct mpvk_ctx *vk = &p->vk;
int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR;
- if (!mpvk_instance_init(vk, ctx->log, VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
- ctx->opts.debug))
+ if (!mpvk_init(vk, ctx, VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME))
goto error;
if (!vo_wayland_init(ctx->vo))
@@ -54,10 +53,10 @@ static bool wayland_vk_init(struct ra_ctx *ctx)
.surface = ctx->vo->wl->surface,
};
- VkResult res = vkCreateWaylandSurfaceKHR(vk->inst, &wlinfo, MPVK_ALLOCATOR,
- &vk->surf);
+ VkInstance inst = vk->vkinst->instance;
+ VkResult res = vkCreateWaylandSurfaceKHR(inst, &wlinfo, NULL, &vk->surface);
if (res != VK_SUCCESS) {
- MP_MSG(ctx, msgl, "Failed creating Wayland surface: %s\n", vk_err(res));
+ MP_MSG(ctx, msgl, "Failed creating Wayland surface\n");
goto error;
}
@@ -77,7 +76,7 @@ error:
return false;
}
-static void resize(struct ra_ctx *ctx)
+static bool resize(struct ra_ctx *ctx)
{
struct vo_wayland_state *wl = ctx->vo->wl;
@@ -87,9 +86,7 @@ static void resize(struct ra_ctx *ctx)
const int32_t height = wl->scaling*mp_rect_h(wl->geometry);
wl_surface_set_buffer_scale(wl->surface, wl->scaling);
-
- wl->vo->dwidth = width;
- wl->vo->dheight = height;
+ return ra_vk_ctx_resize(ctx, width, height);
}
static bool wayland_vk_reconfig(struct ra_ctx *ctx)
@@ -104,8 +101,7 @@ static int wayland_vk_control(struct ra_ctx *ctx, int *events, int request, void
{
int ret = vo_wayland_control(ctx->vo, events, request, arg);
if (*events & VO_EVENT_RESIZE) {
- resize(ctx);
- if (ra_vk_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight))
+ if (!resize(ctx))
return VO_ERROR;
}
return ret;
diff --git a/video/out/vulkan/context_win.c b/video/out/vulkan/context_win.c
deleted file mode 100644
index cf31586d00..0000000000
--- a/video/out/vulkan/context_win.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * This file is part of mpv.
- *
- * mpv is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * mpv is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with mpv. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "video/out/gpu/context.h"
-#include "video/out/w32_common.h"
-
-#include "common.h"
-#include "context.h"
-#include "utils.h"
-
-EXTERN_C IMAGE_DOS_HEADER __ImageBase;
-#define HINST_THISCOMPONENT ((HINSTANCE)&__ImageBase)
-
-struct priv {
- struct mpvk_ctx vk;
-};
-
-static void win_uninit(struct ra_ctx *ctx)
-{
- struct priv *p = ctx->priv;
-
- ra_vk_ctx_uninit(ctx);
- mpvk_uninit(&p->vk);
- vo_w32_uninit(ctx->vo);
-}
-
-static bool win_init(struct ra_ctx *ctx)
-{
- struct priv *p = ctx->priv = talloc_zero(ctx, struct priv);
- struct mpvk_ctx *vk = &p->vk;
- int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR;
-
- if (!mpvk_instance_init(vk, ctx->log, VK_KHR_WIN32_SURFACE_EXTENSION_NAME,
- ctx->opts.debug))
- goto error;
-
- if (!vo_w32_init(ctx->vo))
- goto error;
-
- VkWin32SurfaceCreateInfoKHR wininfo = {
- .sType = VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR,
- .hinstance = HINST_THISCOMPONENT,
- .hwnd = vo_w32_hwnd(ctx->vo),
- };
-
- VkResult res = vkCreateWin32SurfaceKHR(vk->inst, &wininfo, MPVK_ALLOCATOR,
- &vk->surf);
- if (res != VK_SUCCESS) {
- MP_MSG(ctx, msgl, "Failed creating Windows surface: %s\n", vk_err(res));
- goto error;
- }
-
- if (!ra_vk_ctx_init(ctx, vk, VK_PRESENT_MODE_FIFO_KHR))
- goto error;
-
- return true;
-
-error:
- win_uninit(ctx);
- return false;
-}
-
-static bool resize(struct ra_ctx *ctx)
-{
- return ra_vk_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight);
-}
-
-static bool win_reconfig(struct ra_ctx *ctx)
-{
- vo_w32_config(ctx->vo);
- return resize(ctx);
-}
-
-static int win_control(struct ra_ctx *ctx, int *events, int request, void *arg)
-{
- int ret = vo_w32_control(ctx->vo, events, request, arg);
- if (*events & VO_EVENT_RESIZE) {
- if (!resize(ctx))
- return VO_ERROR;
- }
- return ret;
-}
-
-const struct ra_ctx_fns ra_ctx_vulkan_win = {
- .type = "vulkan",
- .name = "winvk",
- .reconfig = win_reconfig,
- .control = win_control,
- .init = win_init,
- .uninit = win_uninit,
-};
diff --git a/video/out/vulkan/context_xlib.c b/video/out/vulkan/context_xlib.c
index c3bd49f4fb..9baa7c4dc4 100644
--- a/video/out/vulkan/context_xlib.c
+++ b/video/out/vulkan/context_xlib.c
@@ -41,8 +41,7 @@ static bool xlib_init(struct ra_ctx *ctx)
struct mpvk_ctx *vk = &p->vk;
int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR;
- if (!mpvk_instance_init(vk, ctx->log, VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
- ctx->opts.debug))
+ if (!mpvk_init(vk, ctx, VK_KHR_XLIB_SURFACE_EXTENSION_NAME))
goto error;
if (!vo_x11_init(ctx->vo))
@@ -57,10 +56,10 @@ static bool xlib_init(struct ra_ctx *ctx)
.window = ctx->vo->x11->window,
};
- VkResult res = vkCreateXlibSurfaceKHR(vk->inst, &xinfo, MPVK_ALLOCATOR,
- &vk->surf);
+ VkInstance inst = vk->vkinst->instance;
+ VkResult res = vkCreateXlibSurfaceKHR(inst, &xinfo, NULL, &vk->surface);
if (res != VK_SUCCESS) {
- MP_MSG(ctx, msgl, "Failed creating Xlib surface: %s\n", vk_err(res));
+ MP_MSG(ctx, msgl, "Failed creating Xlib surface\n");
goto error;
}
@@ -76,7 +75,7 @@ error:
static bool resize(struct ra_ctx *ctx)
{
- return ra_vk_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight);
+ return ra_vk_ctx_resize(ctx, ctx->vo->dwidth, ctx->vo->dheight);
}
static bool xlib_reconfig(struct ra_ctx *ctx)
diff --git a/video/out/vulkan/formats.c b/video/out/vulkan/formats.c
deleted file mode 100644
index 327a7ac809..0000000000
--- a/video/out/vulkan/formats.c
+++ /dev/null
@@ -1,55 +0,0 @@
-#include "formats.h"
-
-const struct vk_format vk_formats[] = {
- // Regular, byte-aligned integer formats
- {"r8", VK_FORMAT_R8_UNORM, 1, 1, {8 }, RA_CTYPE_UNORM },
- {"rg8", VK_FORMAT_R8G8_UNORM, 2, 2, {8, 8 }, RA_CTYPE_UNORM },
- {"rgb8", VK_FORMAT_R8G8B8_UNORM, 3, 3, {8, 8, 8 }, RA_CTYPE_UNORM },
- {"rgba8", VK_FORMAT_R8G8B8A8_UNORM, 4, 4, {8, 8, 8, 8 }, RA_CTYPE_UNORM },
- {"r16", VK_FORMAT_R16_UNORM, 1, 2, {16 }, RA_CTYPE_UNORM },
- {"rg16", VK_FORMAT_R16G16_UNORM, 2, 4, {16, 16 }, RA_CTYPE_UNORM },
- {"rgb16", VK_FORMAT_R16G16B16_UNORM, 3, 6, {16, 16, 16 }, RA_CTYPE_UNORM },
- {"rgba16", VK_FORMAT_R16G16B16A16_UNORM, 4, 8, {16, 16, 16, 16}, RA_CTYPE_UNORM },
-
- // Special, integer-only formats
- {"r32ui", VK_FORMAT_R32_UINT, 1, 4, {32 }, RA_CTYPE_UINT },
- {"rg32ui", VK_FORMAT_R32G32_UINT, 2, 8, {32, 32 }, RA_CTYPE_UINT },
- {"rgb32ui", VK_FORMAT_R32G32B32_UINT, 3, 12, {32, 32, 32 }, RA_CTYPE_UINT },
- {"rgba32ui", VK_FORMAT_R32G32B32A32_UINT, 4, 16, {32, 32, 32, 32}, RA_CTYPE_UINT },
- {"r64ui", VK_FORMAT_R64_UINT, 1, 8, {64 }, RA_CTYPE_UINT },
- {"rg64ui", VK_FORMAT_R64G64_UINT, 2, 16, {64, 64 }, RA_CTYPE_UINT },
- {"rgb64ui", VK_FORMAT_R64G64B64_UINT, 3, 24, {64, 64, 64 }, RA_CTYPE_UINT },
- {"rgba64ui", VK_FORMAT_R64G64B64A64_UINT, 4, 32, {64, 64, 64, 64}, RA_CTYPE_UINT },
-
- // Packed integer formats
- {"rg4", VK_FORMAT_R4G4_UNORM_PACK8, 2, 1, {4, 4 }, RA_CTYPE_UNORM },
- {"rgba4", VK_FORMAT_R4G4B4A4_UNORM_PACK16, 4, 2, {4, 4, 4, 4 }, RA_CTYPE_UNORM },
- {"rgb565", VK_FORMAT_R5G6B5_UNORM_PACK16, 3, 2, {5, 6, 5 }, RA_CTYPE_UNORM },
- {"rgb5a1", VK_FORMAT_R5G5B5A1_UNORM_PACK16, 4, 2, {5, 5, 5, 1 }, RA_CTYPE_UNORM },
-
- // Float formats (native formats, hf = half float, df = double float)
- {"r16hf", VK_FORMAT_R16_SFLOAT, 1, 2, {16 }, RA_CTYPE_FLOAT },
- {"rg16hf", VK_FORMAT_R16G16_SFLOAT, 2, 4, {16, 16 }, RA_CTYPE_FLOAT },
- {"rgb16hf", VK_FORMAT_R16G16B16_SFLOAT, 3, 6, {16, 16, 16 }, RA_CTYPE_FLOAT },
- {"rgba16hf", VK_FORMAT_R16G16B16A16_SFLOAT, 4, 8, {16, 16, 16, 16}, RA_CTYPE_FLOAT },
- {"r32f", VK_FORMAT_R32_SFLOAT, 1, 4, {32 }, RA_CTYPE_FLOAT },
- {"rg32f", VK_FORMAT_R32G32_SFLOAT, 2, 8, {32, 32 }, RA_CTYPE_FLOAT },
- {"rgb32f", VK_FORMAT_R32G32B32_SFLOAT, 3, 12, {32, 32, 32 }, RA_CTYPE_FLOAT },
- {"rgba32f", VK_FORMAT_R32G32B32A32_SFLOAT, 4, 16, {32, 32, 32, 32}, RA_CTYPE_FLOAT },
- {"r64df", VK_FORMAT_R64_SFLOAT, 1, 8, {64 }, RA_CTYPE_FLOAT },
- {"rg64df", VK_FORMAT_R64G64_SFLOAT, 2, 16, {64, 64 }, RA_CTYPE_FLOAT },
- {"rgb64df", VK_FORMAT_R64G64B64_SFLOAT, 3, 24, {64, 64, 64 }, RA_CTYPE_FLOAT },
- {"rgba64df", VK_FORMAT_R64G64B64A64_SFLOAT, 4, 32, {64, 64, 64, 64}, RA_CTYPE_FLOAT },
-
- // "Swapped" component order images
- {"bgr8", VK_FORMAT_B8G8R8_UNORM, 3, 3, {8, 8, 8 }, RA_CTYPE_UNORM, true },
- {"bgra8", VK_FORMAT_B8G8R8A8_UNORM, 4, 4, {8, 8, 8, 8 }, RA_CTYPE_UNORM, true },
- {"bgra4", VK_FORMAT_B4G4R4A4_UNORM_PACK16, 4, 2, {4, 4, 4, 4 }, RA_CTYPE_UNORM, true },
- {"bgr565", VK_FORMAT_B5G6R5_UNORM_PACK16, 3, 2, {5, 6, 5 }, RA_CTYPE_UNORM, true },
- {"bgr5a1", VK_FORMAT_B5G5R5A1_UNORM_PACK16, 4, 2, {5, 5, 5, 1 }, RA_CTYPE_UNORM, true },
- {"a1rgb5", VK_FORMAT_A1R5G5B5_UNORM_PACK16, 4, 2, {1, 5, 5, 5 }, RA_CTYPE_UNORM, true },
- {"a2rgb10", VK_FORMAT_A2R10G10B10_UNORM_PACK32, 4, 4, {2, 10, 10, 10}, RA_CTYPE_UNORM, true },
- {"a2bgr10", VK_FORMAT_A2B10G10R10_UNORM_PACK32, 4, 4, {2, 10, 10, 10}, RA_CTYPE_UNORM, true },
- {"abgr8", VK_FORMAT_A8B8G8R8_UNORM_PACK32, 4, 4, {8, 8, 8, 8 }, RA_CTYPE_UNORM, true },
- {0}
-};
diff --git a/video/out/vulkan/formats.h b/video/out/vulkan/formats.h
deleted file mode 100644
index 22782a6958..0000000000
--- a/video/out/vulkan/formats.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#pragma once
-
-#include "video/out/gpu/ra.h"
-#include "common.h"
-
-struct vk_format {
- const char *name;
- VkFormat iformat; // vulkan format enum
- int components; // how many components are there
- int bytes; // how many bytes is a texel
- int bits[4]; // how many bits per component
- enum ra_ctype ctype; // format representation type
- bool fucked_order; // used for formats which are not simply rgba
-};
-
-extern const struct vk_format vk_formats[];
diff --git a/video/out/vulkan/malloc.c b/video/out/vulkan/malloc.c
deleted file mode 100644
index e1e7ae28e6..0000000000
--- a/video/out/vulkan/malloc.c
+++ /dev/null
@@ -1,471 +0,0 @@
-#include "malloc.h"
-#include "utils.h"
-#include "osdep/timer.h"
-
-#if HAVE_WIN32_DESKTOP
-#include <versionhelpers.h>
-#endif
-
-// Controls the multiplication factor for new slab allocations. The new slab
-// will always be allocated such that the size of the slab is this factor times
-// the previous slab. Higher values make it grow faster.
-#define MPVK_HEAP_SLAB_GROWTH_RATE 4
-
-// Controls the minimum slab size, to reduce the frequency at which very small
-// slabs would need to get allocated when allocating the first few buffers.
-// (Default: 1 MB)
-#define MPVK_HEAP_MINIMUM_SLAB_SIZE (1 << 20)
-
-// Controls the maximum slab size, to reduce the effect of unbounded slab
-// growth exhausting memory. If the application needs a single allocation
-// that's bigger than this value, it will be allocated directly from the
-// device. (Default: 512 MB)
-#define MPVK_HEAP_MAXIMUM_SLAB_SIZE (1 << 29)
-
-// Controls the minimum free region size, to reduce thrashing the free space
-// map with lots of small buffers during uninit. (Default: 1 KB)
-#define MPVK_HEAP_MINIMUM_REGION_SIZE (1 << 10)
-
-// Represents a region of available memory
-struct vk_region {
- size_t start; // first offset in region
- size_t end; // first offset *not* in region
-};
-
-static inline size_t region_len(struct vk_region r)
-{
- return r.end - r.start;
-}
-
-// A single slab represents a contiguous region of allocated memory. Actual
-// allocations are served as slices of this. Slabs are organized into linked
-// lists, which represent individual heaps.
-struct vk_slab {
- VkDeviceMemory mem; // underlying device allocation
- size_t size; // total size of `slab`
- size_t used; // number of bytes actually in use (for GC accounting)
- bool dedicated; // slab is allocated specifically for one object
- // free space map: a sorted list of memory regions that are available
- struct vk_region *regions;
- int num_regions;
- // optional, depends on the memory type:
- VkBuffer buffer; // buffer spanning the entire slab
- void *data; // mapped memory corresponding to `mem`
-};
-
-// Represents a single memory heap. We keep track of a vk_heap for each
-// combination of buffer type and memory selection parameters. This shouldn't
-// actually be that many in practice, because some combinations simply never
-// occur, and others will generally be the same for the same objects.
-struct vk_heap {
- VkBufferUsageFlags usage; // the buffer usage type (or 0)
- VkMemoryPropertyFlags flags; // the memory type flags (or 0)
- uint32_t typeBits; // the memory type index requirements (or 0)
- bool exportable; // whether memory is exportable to other APIs
- struct vk_slab **slabs; // array of slabs sorted by size
- int num_slabs;
-};
-
-// The overall state of the allocator, which keeps track of a vk_heap for each
-// memory type.
-struct vk_malloc {
- VkPhysicalDeviceMemoryProperties props;
- struct vk_heap *heaps;
- int num_heaps;
-};
-
-static void slab_free(struct mpvk_ctx *vk, struct vk_slab *slab)
-{
- if (!slab)
- return;
-
- assert(slab->used == 0);
-
- int64_t start = mp_time_us();
- vkDestroyBuffer(vk->dev, slab->buffer, MPVK_ALLOCATOR);
- // also implicitly unmaps the memory if needed
- vkFreeMemory(vk->dev, slab->mem, MPVK_ALLOCATOR);
- int64_t stop = mp_time_us();
-
- MP_VERBOSE(vk, "Freeing slab of size %zu took %lld μs.\n",
- slab->size, (long long)(stop - start));
-
- talloc_free(slab);
-}
-
-static bool find_best_memtype(struct mpvk_ctx *vk, uint32_t typeBits,
- VkMemoryPropertyFlags flags,
- VkMemoryType *out_type, int *out_index)
-{
- struct vk_malloc *ma = vk->alloc;
-
- // The vulkan spec requires memory types to be sorted in the "optimal"
- // order, so the first matching type we find will be the best/fastest one.
- for (int i = 0; i < ma->props.memoryTypeCount; i++) {
- // The memory type flags must include our properties
- if ((ma->props.memoryTypes[i].propertyFlags & flags) != flags)
- continue;
- // The memory type must be supported by the requirements (bitfield)
- if (typeBits && !(typeBits & (1 << i)))
- continue;
- *out_type = ma->props.memoryTypes[i];
- *out_index = i;
- return true;
- }
-
- MP_ERR(vk, "Found no memory type matching property flags 0x%x and type "
- "bits 0x%x!\n", (unsigned)flags, (unsigned)typeBits);
- return false;
-}
-
-static struct vk_slab *slab_alloc(struct mpvk_ctx *vk, struct vk_heap *heap,
- size_t size)
-{
- struct vk_slab *slab = talloc_ptrtype(NULL, slab);
- *slab = (struct vk_slab) {
- .size = size,
- };
-
- MP_TARRAY_APPEND(slab, slab->regions, slab->num_regions, (struct vk_region) {
- .start = 0,
- .end = slab->size,
- });
-
- VkExportMemoryAllocateInfoKHR eminfo = {
- .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR,
-#if HAVE_WIN32_DESKTOP
- .handleTypes = IsWindows8OrGreater()
- ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR
- : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR,
-#else
- .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
-#endif
- };
-
- VkMemoryAllocateInfo minfo = {
- .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
- .pNext = heap->exportable ? &eminfo : NULL,
- .allocationSize = slab->size,
- };
-
- uint32_t typeBits = heap->typeBits ? heap->typeBits : UINT32_MAX;
- if (heap->usage) {
- // FIXME: Since we can't keep track of queue family ownership properly,
- // and we don't know in advance what types of queue families this buffer
- // will belong to, we're forced to share all of our buffers between all
- // command pools.
- uint32_t qfs[3] = {0};
- for (int i = 0; i < vk->num_pools; i++)
- qfs[i] = vk->pools[i]->qf;
-
- VkExternalMemoryBufferCreateInfoKHR ebinfo = {
- .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR,
- .handleTypes = eminfo.handleTypes,
- };
-
- VkBufferCreateInfo binfo = {
- .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
- .pNext = heap->exportable ? &ebinfo : NULL,
- .size = slab->size,
- .usage = heap->usage,
- .sharingMode = vk->num_pools > 1 ? VK_SHARING_MODE_CONCURRENT
- : VK_SHARING_MODE_EXCLUSIVE,
- .queueFamilyIndexCount = vk->num_pools,
- .pQueueFamilyIndices = qfs,
- };
-
- VK(vkCreateBuffer(vk->dev, &binfo, MPVK_ALLOCATOR, &slab->buffer));
-
- VkMemoryRequirements reqs;
- vkGetBufferMemoryRequirements(vk->dev, slab->buffer, &reqs);
- minfo.allocationSize = reqs.size; // this can be larger than slab->size
- typeBits &= reqs.memoryTypeBits; // this can restrict the types
- }
-
- VkMemoryType type;
- int index;
- if (!find_best_memtype(vk, typeBits, heap->flags, &type, &index))
- goto error;
-
- MP_VERBOSE(vk, "Allocating %zu memory of type 0x%x (id %d) in heap %d.\n",
- slab->size, (unsigned)type.propertyFlags, index, (int)type.heapIndex);
-
- minfo.memoryTypeIndex = index;
- VK(vkAllocateMemory(vk->dev, &minfo, MPVK_ALLOCATOR, &slab->mem));
-
- if (heap->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
- VK(vkMapMemory(vk->dev, slab->mem, 0, VK_WHOLE_SIZE, 0, &slab->data));
-
- if (slab->buffer)
- VK(vkBindBufferMemory(vk->dev, slab->buffer, slab->mem, 0));
-
- return slab;
-
-error:
- slab_free(vk, slab);
- return NULL;
-}
-
-static void insert_region(struct vk_slab *slab, struct vk_region region)
-{
- if (region.start == region.end)
- return;
-
- bool big_enough = region_len(region) >= MPVK_HEAP_MINIMUM_REGION_SIZE;
-
- // Find the index of the first region that comes after this
- for (int i = 0; i < slab->num_regions; i++) {
- struct vk_region *r = &slab->regions[i];
-
- // Check for a few special cases which can be coalesced
- if (r->end == region.start) {
- // The new region is at the tail of this region. In addition to
- // modifying this region, we also need to coalesce all the following
- // regions for as long as possible
- r->end = region.end;
-
- struct vk_region *next = &slab->regions[i+1];
- while (i+1 < slab->num_regions && r->end == next->start) {
- r->end = next->end;
- MP_TARRAY_REMOVE_AT(slab->regions, slab->num_regions, i+1);
- }
- return;
- }
-
- if (r->start == region.end) {
- // The new region is at the head of this region. We don't need to
- // do anything special here - because if this could be further
- // coalesced backwards, the previous loop iteration would already
- // have caught it.
- r->start = region.start;
- return;
- }
-
- if (r->start > region.start) {
- // The new region comes somewhere before this region, so insert
- // it into this index in the array.
- if (big_enough) {
- MP_TARRAY_INSERT_AT(slab, slab->regions, slab->num_regions,
- i, region);
- }
- return;
- }
- }
-
- // If we've reached the end of this loop, then all of the regions
- // come before the new region, and are disconnected - so append it
- if (big_enough)
- MP_TARRAY_APPEND(slab, slab->regions, slab->num_regions, region);
-}
-
-static void heap_uninit(struct mpvk_ctx *vk, struct vk_heap *heap)
-{
- for (int i = 0; i < heap->num_slabs; i++)
- slab_free(vk, heap->slabs[i]);
-
- talloc_free(heap->slabs);
- *heap = (struct vk_heap){0};
-}
-
-void vk_malloc_init(struct mpvk_ctx *vk)
-{
- assert(vk->physd);
- vk->alloc = talloc_zero(NULL, struct vk_malloc);
- vkGetPhysicalDeviceMemoryProperties(vk->physd, &vk->alloc->props);
-}
-
-void vk_malloc_uninit(struct mpvk_ctx *vk)
-{
- struct vk_malloc *ma = vk->alloc;
- if (!ma)
- return;
-
- for (int i = 0; i < ma->num_heaps; i++)
- heap_uninit(vk, &ma->heaps[i]);
-
- talloc_free(ma);
- vk->alloc = NULL;
-}
-
-void vk_free_memslice(struct mpvk_ctx *vk, struct vk_memslice slice)
-{
- struct vk_slab *slab = slice.priv;
- if (!slab)
- return;
-
- assert(slab->used >= slice.size);
- slab->used -= slice.size;
-
- MP_DBG(vk, "Freeing slice %zu + %zu from slab with size %zu\n",
- slice.offset, slice.size, slab->size);
-
- if (slab->dedicated) {
- // If the slab was purpose-allocated for this memslice, we can just
- // free it here
- slab_free(vk, slab);
- } else {
- // Return the allocation to the free space map
- insert_region(slab, (struct vk_region) {
- .start = slice.offset,
- .end = slice.offset + slice.size,
- });
- }
-}
-
-// reqs: can be NULL
-static struct vk_heap *find_heap(struct mpvk_ctx *vk, VkBufferUsageFlags usage,
- VkMemoryPropertyFlags flags,
- VkMemoryRequirements *reqs,
- bool exportable)
-{
- struct vk_malloc *ma = vk->alloc;
- int typeBits = reqs ? reqs->memoryTypeBits : 0;
-
- for (int i = 0; i < ma->num_heaps; i++) {
- if (ma->heaps[i].usage != usage)
- continue;
- if (ma->heaps[i].flags != flags)
- continue;
- if (ma->heaps[i].typeBits != typeBits)
- continue;
- if (ma->heaps[i].exportable != exportable)
- continue;
- return &ma->heaps[i];
- }
-
- // Not found => add it
- MP_TARRAY_GROW(ma, ma->heaps, ma->num_heaps + 1);
- struct vk_heap *heap = &ma->heaps[ma->num_heaps++];
- *heap = (struct vk_heap) {
- .usage = usage,
- .flags = flags,
- .typeBits = typeBits,
- .exportable = exportable,
- };
- return heap;
-}
-
-static inline bool region_fits(struct vk_region r, size_t size, size_t align)
-{
- return MP_ALIGN_UP(r.start, align) + size <= r.end;
-}
-
-// Finds the best-fitting region in a heap. If the heap is too small or too
-// fragmented, a new slab will be allocated under the hood.
-static bool heap_get_region(struct mpvk_ctx *vk, struct vk_heap *heap,
- size_t size, size_t align,
- struct vk_slab **out_slab, int *out_index)
-{
- struct vk_slab *slab = NULL;
-
- // If the allocation is very big, serve it directly instead of bothering
- // with the heap
- if (size > MPVK_HEAP_MAXIMUM_SLAB_SIZE) {
- slab = slab_alloc(vk, heap, size);
- *out_slab = slab;
- *out_index = 0;
- return !!slab;
- }
-
- for (int i = 0; i < heap->num_slabs; i++) {
- slab = heap->slabs[i];
- if (slab->size < size)
- continue;
-
- // Attempt a best fit search
- int best = -1;
- for (int n = 0; n < slab->num_regions; n++) {
- struct vk_region r = slab->regions[n];
- if (!region_fits(r, size, align))
- continue;
- if (best >= 0 && region_len(r) > region_len(slab->regions[best]))
- continue;
- best = n;
- }
-
- if (best >= 0) {
- *out_slab = slab;
- *out_index = best;
- return true;
- }
- }
-
- // Otherwise, allocate a new vk_slab and append it to the list.
- size_t cur_size = MPMAX(size, slab ? slab->size : 0);
- size_t slab_size = MPVK_HEAP_SLAB_GROWTH_RATE * cur_size;
- slab_size = MPMAX(MPVK_HEAP_MINIMUM_SLAB_SIZE, slab_size);
- slab_size = MPMIN(MPVK_HEAP_MAXIMUM_SLAB_SIZE, slab_size);
- assert(slab_size >= size);
- slab = slab_alloc(vk, heap, slab_size);
- if (!slab)
- return false;
- MP_TARRAY_APPEND(NULL, heap->slabs, heap->num_slabs, slab);
-
- // Return the only region there is in a newly allocated slab
- assert(slab->num_regions == 1);
- *out_slab = slab;
- *out_index = 0;
- return true;
-}
-
-static bool slice_heap(struct mpvk_ctx *vk, struct vk_heap *heap, size_t size,
- size_t alignment, struct vk_memslice *out)
-{
- struct vk_slab *slab;
- int index;
- alignment = MP_ALIGN_UP(alignment, vk->limits.bufferImageGranularity);
- if (!heap_get_region(vk, heap, size, alignment, &slab, &index))
- return false;
-
- struct vk_region reg = slab->regions[index];
- MP_TARRAY_REMOVE_AT(slab->regions, slab->num_regions, index);
- *out = (struct vk_memslice) {
- .vkmem = slab->mem,
- .offset = MP_ALIGN_UP(reg.start, alignment),
- .size = size,
- .slab_size = slab->size,
- .priv = slab,
- };
-
- MP_DBG(vk, "Sub-allocating slice %zu + %zu from slab with size %zu\n",
- out->offset, out->size, slab->size);
-
- size_t out_end = out->offset + out->size;
- insert_region(slab, (struct vk_region) { reg.start, out->offset });
- insert_region(slab, (struct vk_region) { out_end, reg.end });
-
- slab->used += size;
- return true;
-}
-
-bool vk_malloc_generic(struct mpvk_ctx *vk, VkMemoryRequirements reqs,
- VkMemoryPropertyFlags flags, struct vk_memslice *out)
-{
- struct vk_heap *heap = find_heap(vk, 0, flags, &reqs, false);
- return slice_heap(vk, heap, reqs.size, reqs.alignment, out);
-}
-
-bool vk_malloc_buffer(struct mpvk_ctx *vk, VkBufferUsageFlags bufFlags,
- VkMemoryPropertyFlags memFlags, VkDeviceSize size,
- VkDeviceSize alignment, bool exportable,
- struct vk_bufslice *out)
-{
- if (exportable) {
- if (!vk->has_ext_external_memory_export) {
- MP_ERR(vk, "Exportable memory requires the %s extension\n",
- MP_VK_EXTERNAL_MEMORY_EXPORT_EXTENSION_NAME);
- return false;
- }
- }
-
- struct vk_heap *heap = find_heap(vk, bufFlags, memFlags, NULL, exportable);
- if (!slice_heap(vk, heap, size, alignment, &out->mem))
- return false;
-
- struct vk_slab *slab = out->mem.priv;
- out->buf = slab->buffer;
- if (slab->data)
- out->data = (void *)((uintptr_t)slab->data + (ptrdiff_t)out->mem.offset);
-
- return true;
-}
diff --git a/video/out/vulkan/malloc.h b/video/out/vulkan/malloc.h
deleted file mode 100644
index 9b311ce311..0000000000
--- a/video/out/vulkan/malloc.h
+++ /dev/null
@@ -1,37 +0,0 @@
-#pragma once
-
-#include "common.h"
-
-void vk_malloc_init(struct mpvk_ctx *vk);
-void vk_malloc_uninit(struct mpvk_ctx *vk);
-
-// Represents a single "slice" of generic (non-buffer) memory, plus some
-// metadata for accounting. This struct is essentially read-only.
-struct vk_memslice {
- VkDeviceMemory vkmem;
- size_t offset;
- size_t size;
- size_t slab_size;
- void *priv;
-};
-
-void vk_free_memslice(struct mpvk_ctx *vk, struct vk_memslice slice);
-bool vk_malloc_generic(struct mpvk_ctx *vk, VkMemoryRequirements reqs,
- VkMemoryPropertyFlags flags, struct vk_memslice *out);
-
-// Represents a single "slice" of a larger buffer
-struct vk_bufslice {
- struct vk_memslice mem; // must be freed by the user when done
- VkBuffer buf; // the buffer this memory was sliced from
- // For persistently mapped buffers, this points to the first usable byte of
- // this slice.
- void *data;
-};
-
-// Allocate a buffer slice. This is more efficient than vk_malloc_generic for
-// when the user needs lots of buffers, since it doesn't require
-// creating/destroying lots of (little) VkBuffers.
-bool vk_malloc_buffer(struct mpvk_ctx *vk, VkBufferUsageFlags bufFlags,
- VkMemoryPropertyFlags memFlags, VkDeviceSize size,
- VkDeviceSize alignment, bool exportable,
- struct vk_bufslice *out);
diff --git a/video/out/vulkan/ra_vk.c b/video/out/vulkan/ra_vk.c
deleted file mode 100644
index 3e4ba28ac4..0000000000
--- a/video/out/vulkan/ra_vk.c
+++ /dev/null
@@ -1,1982 +0,0 @@
-#include "video/out/gpu/utils.h"
-#include "video/out/gpu/spirv.h"
-
-#include "ra_vk.h"
-#include "malloc.h"
-
-#if HAVE_WIN32_DESKTOP
-#include <versionhelpers.h>
-#endif
-
-static struct ra_fns ra_fns_vk;
-
-enum queue_type {
- GRAPHICS,
- COMPUTE,
- TRANSFER,
-};
-
-// For ra.priv
-struct ra_vk {
- struct mpvk_ctx *vk;
- struct ra_tex *clear_tex; // stupid hack for clear()
- struct vk_cmd *cmd; // currently recording cmd
-};
-
-struct mpvk_ctx *ra_vk_get(struct ra *ra)
-{
- if (ra->fns != &ra_fns_vk)
- return NULL;
-
- struct ra_vk *p = ra->priv;
- return p->vk;
-}
-
-static void vk_submit(struct ra *ra)
-{
- struct ra_vk *p = ra->priv;
- struct mpvk_ctx *vk = ra_vk_get(ra);
-
- if (p->cmd) {
- vk_cmd_queue(vk, p->cmd);
- p->cmd = NULL;
- }
-}
-
-// Returns a command buffer, or NULL on error
-static struct vk_cmd *vk_require_cmd(struct ra *ra, enum queue_type type)
-{
- struct ra_vk *p = ra->priv;
- struct mpvk_ctx *vk = ra_vk_get(ra);
-
- struct vk_cmdpool *pool;
- switch (type) {
- case GRAPHICS: pool = vk->pool_graphics; break;
- case COMPUTE: pool = vk->pool_compute; break;
-
- // GRAPHICS and COMPUTE also imply TRANSFER capability (vulkan spec)
- case TRANSFER:
- pool = vk->pool_transfer;
- if (!pool)
- pool = vk->pool_compute;
- if (!pool)
- pool = vk->pool_graphics;
- break;
- default: abort();
- }
-
- assert(pool);
- if (p->cmd && p->cmd->pool == pool)
- return p->cmd;
-
- vk_submit(ra);
- p->cmd = vk_cmd_begin(vk, pool);
- return p->cmd;
-}
-
-#define MAKE_LAZY_DESTRUCTOR(fun, argtype) \
- static void fun##_lazy(struct ra *ra, argtype *arg) { \
- struct ra_vk *p = ra->priv; \
- struct mpvk_ctx *vk = ra_vk_get(ra); \
- if (p->cmd) { \
- vk_cmd_callback(p->cmd, (vk_cb) fun, ra, arg); \
- } else { \
- vk_dev_callback(vk, (vk_cb) fun, ra, arg); \
- } \
- }
-
-static void vk_destroy_ra(struct ra *ra)
-{
- struct ra_vk *p = ra->priv;
- struct mpvk_ctx *vk = ra_vk_get(ra);
-
- vk_submit(ra);
- mpvk_flush_commands(vk);
- mpvk_poll_commands(vk, UINT64_MAX);
- ra_tex_free(ra, &p->clear_tex);
-
- talloc_free(ra);
-}
-
-static bool vk_setup_formats(struct ra *ra)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
-
- for (const struct vk_format *vk_fmt = vk_formats; vk_fmt->name; vk_fmt++) {
- VkFormatProperties prop;
- vkGetPhysicalDeviceFormatProperties(vk->physd, vk_fmt->iformat, &prop);
-
- // As a bare minimum, we need to sample from an allocated image
- VkFormatFeatureFlags flags = prop.optimalTilingFeatures;
- if (!(flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT))
- continue;
-
- VkFormatFeatureFlags linear_bits, render_bits;
- linear_bits = VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
- render_bits = VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
- VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
-
- struct ra_format *fmt = talloc_zero(ra, struct ra_format);
- *fmt = (struct ra_format) {
- .name = vk_fmt->name,
- .priv = (void *)vk_fmt,
- .ctype = vk_fmt->ctype,
- .ordered = !vk_fmt->fucked_order,
- .num_components = vk_fmt->components,
- .pixel_size = vk_fmt->bytes,
- .linear_filter = !!(flags & linear_bits),
- .renderable = !!(flags & render_bits),
- };
-
- for (int i = 0; i < 4; i++)
- fmt->component_size[i] = fmt->component_depth[i] = vk_fmt->bits[i];
-
- fmt->glsl_format = ra_fmt_glsl_format(fmt);
-
- MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, fmt);
- }
-
- // Populate some other capabilities related to formats while we're at it
- VkImageType imgType[3] = {
- VK_IMAGE_TYPE_1D,
- VK_IMAGE_TYPE_2D,
- VK_IMAGE_TYPE_3D
- };
-
- // R8_UNORM is supported on literally every single vulkan implementation
- const VkFormat testfmt = VK_FORMAT_R8_UNORM;
-
- for (int d = 0; d < 3; d++) {
- VkImageFormatProperties iprop;
- VkResult res = vkGetPhysicalDeviceImageFormatProperties(vk->physd,
- testfmt, imgType[d], VK_IMAGE_TILING_OPTIMAL,
- VK_IMAGE_USAGE_SAMPLED_BIT, 0, &iprop);
-
- switch (imgType[d]) {
- case VK_IMAGE_TYPE_1D:
- if (res == VK_SUCCESS)
- ra->caps |= RA_CAP_TEX_1D;
- break;
- case VK_IMAGE_TYPE_2D:
- // 2D formats must be supported by RA, so ensure this is the case
- VK_ASSERT(res, "Querying 2D format limits");
- ra->max_texture_wh = MPMIN(iprop.maxExtent.width, iprop.maxExtent.height);
- break;
- case VK_IMAGE_TYPE_3D:
- if (res == VK_SUCCESS)
- ra->caps |= RA_CAP_TEX_3D;
- break;
- }
- }
-
- // RA_CAP_BLIT implies both blitting between images as well as blitting
- // directly to the swapchain image, so check for all three operations
- bool blittable = true;
- VkFormatProperties prop;
- vkGetPhysicalDeviceFormatProperties(vk->physd, testfmt, &prop);
- if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_SRC_BIT))
- blittable = false;
- if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT))
- blittable = false;
-
- vkGetPhysicalDeviceFormatProperties(vk->physd, vk->surf_format.format, &prop);
- if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT))
- blittable = false;
-
- if (blittable)
- ra->caps |= RA_CAP_BLIT;
-
- return true;
-
-error:
- return false;
-}
-
-static struct ra_fns ra_fns_vk;
-
-struct ra *ra_create_vk(struct mpvk_ctx *vk, struct mp_log *log)
-{
- assert(vk->dev);
- assert(vk->alloc);
-
- struct ra *ra = talloc_zero(NULL, struct ra);
- ra->log = log;
- ra->fns = &ra_fns_vk;
-
- struct ra_vk *p = ra->priv = talloc_zero(ra, struct ra_vk);
- p->vk = vk;
-
- ra->caps |= vk->spirv->ra_caps;
- ra->glsl_version = vk->spirv->glsl_version;
- ra->glsl_vulkan = true;
- ra->max_shmem = vk->limits.maxComputeSharedMemorySize;
- ra->max_pushc_size = vk->limits.maxPushConstantsSize;
-
- if (vk->pool_compute) {
- ra->caps |= RA_CAP_COMPUTE | RA_CAP_NUM_GROUPS;
- // If we have more compute queues than graphics queues, we probably
- // want to be using them. (This seems mostly relevant for AMD)
- if (vk->pool_compute->num_queues > vk->pool_graphics->num_queues)
- ra->caps |= RA_CAP_PARALLEL_COMPUTE;
- }
-
- if (!vk_setup_formats(ra))
- goto error;
-
- // UBO support is required
- ra->caps |= RA_CAP_BUF_RO | RA_CAP_FRAGCOORD;
-
- // textureGather requires the ImageGatherExtended capability
- if (vk->features.shaderImageGatherExtended)
- ra->caps |= RA_CAP_GATHER;
-
- // Try creating a shader storage buffer
- struct ra_buf_params ssbo_params = {
- .type = RA_BUF_TYPE_SHADER_STORAGE,
- .size = 16,
- };
-
- struct ra_buf *ssbo = ra_buf_create(ra, &ssbo_params);
- if (ssbo) {
- ra->caps |= RA_CAP_BUF_RW;
- ra_buf_free(ra, &ssbo);
- }
-
- // To support clear() by region, we need to allocate a dummy 1x1 image that
- // will be used as the source of blit operations
- struct ra_tex_params clear_params = {
- .dimensions = 1, // no point in using a 2D image if height = 1
- .w = 1,
- .h = 1,
- .d = 1,
- .format = ra_find_float16_format(ra, 4),
- .blit_src = 1,
- .host_mutable = 1,
- };
-
- p->clear_tex = ra_tex_create(ra, &clear_params);
- if (!p->clear_tex) {
- MP_ERR(ra, "Failed creating 1x1 dummy texture for clear()!\n");
- goto error;
- }
-
- return ra;
-
-error:
- vk_destroy_ra(ra);
- return NULL;
-}
-
-// Boilerplate wrapper around vkCreateRenderPass to ensure passes remain
-// compatible. The renderpass will automatically transition the image out of
-// initialLayout and into finalLayout.
-static VkResult vk_create_render_pass(VkDevice dev, const struct ra_format *fmt,
- VkAttachmentLoadOp loadOp,
- VkImageLayout initialLayout,
- VkImageLayout finalLayout,
- VkRenderPass *out)
-{
- struct vk_format *vk_fmt = fmt->priv;
- assert(fmt->renderable);
-
- VkRenderPassCreateInfo rinfo = {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
- .attachmentCount = 1,
- .pAttachments = &(VkAttachmentDescription) {
- .format = vk_fmt->iformat,
- .samples = VK_SAMPLE_COUNT_1_BIT,
- .loadOp = loadOp,
- .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
- .initialLayout = initialLayout,
- .finalLayout = finalLayout,
- },
- .subpassCount = 1,
- .pSubpasses = &(VkSubpassDescription) {
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .colorAttachmentCount = 1,
- .pColorAttachments = &(VkAttachmentReference) {
- .attachment = 0,
- .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
- },
- },
- };
-
- return vkCreateRenderPass(dev, &rinfo, MPVK_ALLOCATOR, out);
-}
-
-// For ra_tex.priv
-struct ra_tex_vk {
- bool external_img;
- enum queue_type upload_queue;
- VkImageType type;
- VkImage img;
- struct vk_memslice mem;
- // for sampling
- VkImageView view;
- VkSampler sampler;
- // for rendering
- VkFramebuffer framebuffer;
- VkRenderPass dummyPass;
- // for uploading
- struct ra_buf_pool pbo;
- // "current" metadata, can change during the course of execution
- VkImageLayout current_layout;
- VkAccessFlags current_access;
- // the signal guards reuse, and can be NULL
- struct vk_signal *sig;
- VkPipelineStageFlags sig_stage;
- VkSemaphore ext_dep; // external semaphore, not owned by the ra_tex
-};
-
-void ra_tex_vk_external_dep(struct ra *ra, struct ra_tex *tex, VkSemaphore dep)
-{
- struct ra_tex_vk *tex_vk = tex->priv;
- assert(!tex_vk->ext_dep);
- tex_vk->ext_dep = dep;
-}
-
-// Small helper to ease image barrier creation. if `discard` is set, the contents
-// of the image will be undefined after the barrier
-static void tex_barrier(struct ra *ra, struct vk_cmd *cmd, struct ra_tex *tex,
- VkPipelineStageFlags stage, VkAccessFlags newAccess,
- VkImageLayout newLayout, bool discard)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
- struct ra_tex_vk *tex_vk = tex->priv;
-
- if (tex_vk->ext_dep) {
- vk_cmd_dep(cmd, tex_vk->ext_dep, stage);
- tex_vk->ext_dep = NULL;
- }
-
- VkImageMemoryBarrier imgBarrier = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .oldLayout = tex_vk->current_layout,
- .newLayout = newLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .srcAccessMask = tex_vk->current_access,
- .dstAccessMask = newAccess,
- .image = tex_vk->img,
- .subresourceRange = vk_range,
- };
-
- if (discard) {
- imgBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
- imgBarrier.srcAccessMask = 0;
- }
-
- VkEvent event = NULL;
- vk_cmd_wait(vk, cmd, &tex_vk->sig, stage, &event);
-
- bool need_trans = tex_vk->current_layout != newLayout ||
- tex_vk->current_access != newAccess;
-
- // Transitioning to VK_IMAGE_LAYOUT_UNDEFINED is a pseudo-operation
- // that for us means we don't need to perform the actual transition
- if (need_trans && newLayout != VK_IMAGE_LAYOUT_UNDEFINED) {
- if (event) {
- vkCmdWaitEvents(cmd->buf, 1, &event, tex_vk->sig_stage,
- stage, 0, NULL, 0, NULL, 1, &imgBarrier);
- } else {
- // If we're not using an event, then the source stage is irrelevant
- // because we're coming from a different queue anyway, so we can
- // safely set it to TOP_OF_PIPE.
- imgBarrier.srcAccessMask = 0;
- vkCmdPipelineBarrier(cmd->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- stage, 0, 0, NULL, 0, NULL, 1, &imgBarrier);
- }
- }
-
- tex_vk->current_layout = newLayout;
- tex_vk->current_access = newAccess;
-}
-
-static void tex_signal(struct ra *ra, struct vk_cmd *cmd, struct ra_tex *tex,
- VkPipelineStageFlags stage)
-{
- struct ra_tex_vk *tex_vk = tex->priv;
- struct mpvk_ctx *vk = ra_vk_get(ra);
- assert(!tex_vk->sig);
-
- tex_vk->sig = vk_cmd_signal(vk, cmd, stage);
- tex_vk->sig_stage = stage;
-}
-
-static void vk_tex_destroy(struct ra *ra, struct ra_tex *tex)
-{
- if (!tex)
- return;
-
- struct mpvk_ctx *vk = ra_vk_get(ra);
- struct ra_tex_vk *tex_vk = tex->priv;
-
- ra_buf_pool_uninit(ra, &tex_vk->pbo);
- vk_signal_destroy(vk, &tex_vk->sig);
- vkDestroyFramebuffer(vk->dev, tex_vk->framebuffer, MPVK_ALLOCATOR);
- vkDestroyRenderPass(vk->dev, tex_vk->dummyPass, MPVK_ALLOCATOR);
- vkDestroySampler(vk->dev, tex_vk->sampler, MPVK_ALLOCATOR);
- vkDestroyImageView(vk->dev, tex_vk->view, MPVK_ALLOCATOR);
- if (!tex_vk->external_img) {
- vkDestroyImage(vk->dev, tex_vk->img, MPVK_ALLOCATOR);
- vk_free_memslice(vk, tex_vk->mem);
- }
-
- talloc_free(tex);
-}
-
-MAKE_LAZY_DESTRUCTOR(vk_tex_destroy, struct ra_tex);
-
-// Initializes non-VkImage values like the image view, samplers, etc.
-static bool vk_init_image(struct ra *ra, struct ra_tex *tex)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
-
- struct ra_tex_params *params = &tex->params;
- struct ra_tex_vk *tex_vk = tex->priv;
- assert(tex_vk->img);
-
- tex_vk->current_layout = VK_IMAGE_LAYOUT_UNDEFINED;
- tex_vk->current_access = 0;
-
- if (params->render_src || params->render_dst) {
- static const VkImageViewType viewType[] = {
- [VK_IMAGE_TYPE_1D] = VK_IMAGE_VIEW_TYPE_1D,
- [VK_IMAGE_TYPE_2D] = VK_IMAGE_VIEW_TYPE_2D,
- [VK_IMAGE_TYPE_3D] = VK_IMAGE_VIEW_TYPE_3D,
- };
-
- const struct vk_format *fmt = params->format->priv;
- VkImageViewCreateInfo vinfo = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = tex_vk->img,
- .viewType = viewType[tex_vk->type],
- .format = fmt->iformat,
- .subresourceRange = vk_range,
- };
-
- VK(vkCreateImageView(vk->dev, &vinfo, MPVK_ALLOCATOR, &tex_vk->view));
- }
-
- if (params->render_src) {
- assert(params->format->linear_filter || !params->src_linear);
- VkFilter filter = params->src_linear
- ? VK_FILTER_LINEAR
- : VK_FILTER_NEAREST;
- VkSamplerAddressMode wrap = params->src_repeat
- ? VK_SAMPLER_ADDRESS_MODE_REPEAT
- : VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
- VkSamplerCreateInfo sinfo = {
- .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
- .magFilter = filter,
- .minFilter = filter,
- .addressModeU = wrap,
- .addressModeV = wrap,
- .addressModeW = wrap,
- .maxAnisotropy = 1.0,
- };
-
- VK(vkCreateSampler(vk->dev, &sinfo, MPVK_ALLOCATOR, &tex_vk->sampler));
- }
-
- if (params->render_dst) {
- // Framebuffers need to be created against a specific render pass
- // layout, so we need to temporarily create a skeleton/dummy render
- // pass for vulkan to figure out the compatibility
- VK(vk_create_render_pass(vk->dev, params->format,
- VK_ATTACHMENT_LOAD_OP_DONT_CARE,
- VK_IMAGE_LAYOUT_UNDEFINED,
- VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
- &tex_vk->dummyPass));
-
- VkFramebufferCreateInfo finfo = {
- .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
- .renderPass = tex_vk->dummyPass,
- .attachmentCount = 1,
- .pAttachments = &tex_vk->view,
- .width = tex->params.w,
- .height = tex->params.h,
- .layers = 1,
- };
-
- VK(vkCreateFramebuffer(vk->dev, &finfo, MPVK_ALLOCATOR,
- &tex_vk->framebuffer));
-
- // NOTE: Normally we would free the dummyPass again here, but a bug
- // in the nvidia vulkan driver causes a segfault if you do.
- }
-
- return true;
-
-error:
- return false;
-}
-
-static struct ra_tex *vk_tex_create(struct ra *ra,
- const struct ra_tex_params *params)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
- assert(!params->format->dummy_format);
-
- struct ra_tex *tex = talloc_zero(NULL, struct ra_tex);
- tex->params = *params;
- tex->params.initial_data = NULL;
-
- struct ra_tex_vk *tex_vk = tex->priv = talloc_zero(tex, struct ra_tex_vk);
- tex_vk->upload_queue = GRAPHICS;
-
- const struct vk_format *fmt = params->format->priv;
- switch (params->dimensions) {
- case 1: tex_vk->type = VK_IMAGE_TYPE_1D; break;
- case 2: tex_vk->type = VK_IMAGE_TYPE_2D; break;
- case 3: tex_vk->type = VK_IMAGE_TYPE_3D; break;
- default: abort();
- }
-
- VkImageUsageFlags usage = 0;
- if (params->render_src)
- usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
- if (params->render_dst)
- usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
- if (params->storage_dst)
- usage |= VK_IMAGE_USAGE_STORAGE_BIT;
- if (params->blit_src)
- usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
- if (params->host_mutable || params->blit_dst || params->initial_data)
- usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
-
- // Always use the transfer pool if available, for efficiency
- if (params->host_mutable && vk->pool_transfer)
- tex_vk->upload_queue = TRANSFER;
-
- // Double-check image usage support and fail immediately if invalid
- VkImageFormatProperties iprop;
- VkResult res = vkGetPhysicalDeviceImageFormatProperties(vk->physd,
- fmt->iformat, tex_vk->type, VK_IMAGE_TILING_OPTIMAL, usage, 0,
- &iprop);
- if (res == VK_ERROR_FORMAT_NOT_SUPPORTED) {
- return NULL;
- } else {
- VK_ASSERT(res, "Querying image format properties");
- }
-
- VkFormatProperties prop;
- vkGetPhysicalDeviceFormatProperties(vk->physd, fmt->iformat, &prop);
- VkFormatFeatureFlags flags = prop.optimalTilingFeatures;
-
- bool has_blit_src = flags & VK_FORMAT_FEATURE_BLIT_SRC_BIT,
- has_src_linear = flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
-
- if (params->w > iprop.maxExtent.width ||
- params->h > iprop.maxExtent.height ||
- params->d > iprop.maxExtent.depth ||
- (params->blit_src && !has_blit_src) ||
- (params->src_linear && !has_src_linear))
- {
- return NULL;
- }
-
- // FIXME: Since we can't keep track of queue family ownership properly,
- // and we don't know in advance what types of queue families this image
- // will belong to, we're forced to share all of our images between all
- // command pools.
- uint32_t qfs[3] = {0};
- for (int i = 0; i < vk->num_pools; i++)
- qfs[i] = vk->pools[i]->qf;
-
- VkImageCreateInfo iinfo = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
- .imageType = tex_vk->type,
- .format = fmt->iformat,
- .extent = (VkExtent3D) { params->w, params->h, params->d },
- .mipLevels = 1,
- .arrayLayers = 1,
- .samples = VK_SAMPLE_COUNT_1_BIT,
- .tiling = VK_IMAGE_TILING_OPTIMAL,
- .usage = usage,
- .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
- .sharingMode = vk->num_pools > 1 ? VK_SHARING_MODE_CONCURRENT
- : VK_SHARING_MODE_EXCLUSIVE,
- .queueFamilyIndexCount = vk->num_pools,
- .pQueueFamilyIndices = qfs,
- };
-
- VK(vkCreateImage(vk->dev, &iinfo, MPVK_ALLOCATOR, &tex_vk->img));
-
- VkMemoryPropertyFlags memFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
- VkMemoryRequirements reqs;
- vkGetImageMemoryRequirements(vk->dev, tex_vk->img, &reqs);
-
- struct vk_memslice *mem = &tex_vk->mem;
- if (!vk_malloc_generic(vk, reqs, memFlags, mem))
- goto error;
-
- VK(vkBindImageMemory(vk->dev, tex_vk->img, mem->vkmem, mem->offset));
-
- if (!vk_init_image(ra, tex))
- goto error;
-
- if (params->initial_data) {
- struct ra_tex_upload_params ul_params = {
- .tex = tex,
- .invalidate = true,
- .src = params->initial_data,
- .stride = params->w * fmt->bytes,
- };
- if (!ra->fns->tex_upload(ra, &ul_params))
- goto error;
- }
-
- return tex;
-
-error:
- vk_tex_destroy(ra, tex);
- return NULL;
-}
-
-struct ra_tex *ra_vk_wrap_swapchain_img(struct ra *ra, VkImage vkimg,
- VkSwapchainCreateInfoKHR info)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
- struct ra_tex *tex = NULL;
-
- const struct ra_format *format = NULL;
- for (int i = 0; i < ra->num_formats; i++) {
- const struct vk_format *fmt = ra->formats[i]->priv;
- if (fmt->iformat == vk->surf_format.format) {
- format = ra->formats[i];
- break;
- }
- }
-
- if (!format) {
- MP_ERR(ra, "Could not find ra_format suitable for wrapped swchain image "
- "with surface format 0x%x\n", vk->surf_format.format);
- goto error;
- }
-
- tex = talloc_zero(NULL, struct ra_tex);
- tex->params = (struct ra_tex_params) {
- .format = format,
- .dimensions = 2,
- .w = info.imageExtent.width,
- .h = info.imageExtent.height,
- .d = 1,
- .blit_src = !!(info.imageUsage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT),
- .blit_dst = !!(info.imageUsage & VK_IMAGE_USAGE_TRANSFER_DST_BIT),
- .render_src = !!(info.imageUsage & VK_IMAGE_USAGE_SAMPLED_BIT),
- .render_dst = !!(info.imageUsage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT),
- .storage_dst = !!(info.imageUsage & VK_IMAGE_USAGE_STORAGE_BIT),
- };
-
- struct ra_tex_vk *tex_vk = tex->priv = talloc_zero(tex, struct ra_tex_vk);
- tex_vk->type = VK_IMAGE_TYPE_2D;
- tex_vk->external_img = true;
- tex_vk->img = vkimg;
-
- if (!vk_init_image(ra, tex))
- goto error;
-
- return tex;
-
-error:
- vk_tex_destroy(ra, tex);
- return NULL;
-}
-
-// For ra_buf.priv
-struct ra_buf_vk {
- struct vk_bufslice slice;
- int refcount; // 1 = object allocated but not in use, > 1 = in use
- bool needsflush;
- enum queue_type update_queue;
- // "current" metadata, can change during course of execution
- VkPipelineStageFlags current_stage;
- VkAccessFlags current_access;
- // Arbitrary user data for the creator of a buffer
- void *user_data;
-};
-
-void ra_vk_buf_set_user_data(struct ra_buf *buf, void *user_data) {
- struct ra_buf_vk *vk_priv = buf->priv;
- vk_priv->user_data = user_data;
-}
-
-void *ra_vk_buf_get_user_data(struct ra_buf *buf) {
- struct ra_buf_vk *vk_priv = buf->priv;
- return vk_priv->user_data;
-}
-
-static void vk_buf_deref(struct ra *ra, struct ra_buf *buf)
-{
- if (!buf)
- return;
-
- struct mpvk_ctx *vk = ra_vk_get(ra);
- struct ra_buf_vk *buf_vk = buf->priv;
-
- if (--buf_vk->refcount == 0) {
- vk_free_memslice(vk, buf_vk->slice.mem);
- talloc_free(buf);
- }
-}
-
-static void buf_barrier(struct ra *ra, struct vk_cmd *cmd, struct ra_buf *buf,
- VkPipelineStageFlags newStage,
- VkAccessFlags newAccess, int offset, size_t size)
-{
- struct ra_buf_vk *buf_vk = buf->priv;
-
- VkBufferMemoryBarrier buffBarrier = {
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
- .srcAccessMask = buf_vk->current_access,
- .dstAccessMask = newAccess,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = buf_vk->slice.buf,
- .offset = offset,
- .size = size,
- };
-
- if (buf_vk->needsflush || buf->params.host_mapped) {
- buffBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
- buf_vk->current_stage = VK_PIPELINE_STAGE_HOST_BIT;
- buf_vk->needsflush = false;
- }
-
- if (buffBarrier.srcAccessMask != buffBarrier.dstAccessMask) {
- vkCmdPipelineBarrier(cmd->buf, buf_vk->current_stage, newStage, 0,
- 0, NULL, 1, &buffBarrier, 0, NULL);
- }
-
- buf_vk->current_stage = newStage;
- buf_vk->current_access = newAccess;
- buf_vk->refcount++;
- vk_cmd_callback(cmd, (vk_cb) vk_buf_deref, ra, buf);
-}
-
-#define vk_buf_destroy vk_buf_deref
-MAKE_LAZY_DESTRUCTOR(vk_buf_destroy, struct ra_buf);
-
-static void vk_buf_update(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset,
- const void *data, size_t size)
-{
- assert(buf->params.host_mutable || buf->params.initial_data);
- struct ra_buf_vk *buf_vk = buf->priv;
-
- // For host-mapped buffers, we can just directly memcpy the buffer contents.
- // Otherwise, we can update the buffer from the GPU using a command buffer.
- if (buf_vk->slice.data) {
- assert(offset + size <= buf->params.size);
- uintptr_t addr = (uintptr_t)buf_vk->slice.data + offset;
- memcpy((void *)addr, data, size);
- buf_vk->needsflush = true;
- } else {
- struct vk_cmd *cmd = vk_require_cmd(ra, buf_vk->update_queue);
- if (!cmd) {
- MP_ERR(ra, "Failed updating buffer!\n");
- return;
- }
-
- buf_barrier(ra, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT,
- VK_ACCESS_TRANSFER_WRITE_BIT, offset, size);
-
- VkDeviceSize bufOffset = buf_vk->slice.mem.offset + offset;
- assert(bufOffset == MP_ALIGN_UP(bufOffset, 4));
- vkCmdUpdateBuffer(cmd->buf, buf_vk->slice.buf, bufOffset, size, data);
- }
-}
-
-static struct ra_buf *vk_buf_create(struct ra *ra,
- const struct ra_buf_params *params)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
-
- struct ra_buf *buf = talloc_zero(NULL, struct ra_buf);
- buf->params = *params;
-
- struct ra_buf_vk *buf_vk = buf->priv = talloc_zero(buf, struct ra_buf_vk);
- buf_vk->current_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
- buf_vk->current_access = 0;
- buf_vk->refcount = 1;
-
- VkBufferUsageFlags bufFlags = 0;
- VkMemoryPropertyFlags memFlags = 0;
- VkDeviceSize align = 4; // alignment 4 is needed for buf_update
- bool exportable = false;
-
- switch (params->type) {
- case RA_BUF_TYPE_TEX_UPLOAD:
- bufFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
- memFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
- // Use TRANSFER-style updates for large enough buffers for efficiency
- if (params->size > 1024*1024) // 1 MB
- buf_vk->update_queue = TRANSFER;
- break;
- case RA_BUF_TYPE_UNIFORM:
- bufFlags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
- memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
- align = MP_ALIGN_UP(align, vk->limits.minUniformBufferOffsetAlignment);
- break;
- case RA_BUF_TYPE_SHADER_STORAGE:
- bufFlags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
- memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
- align = MP_ALIGN_UP(align, vk->limits.minStorageBufferOffsetAlignment);
- buf_vk->update_queue = COMPUTE;
- break;
- case RA_BUF_TYPE_VERTEX:
- bufFlags |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
- memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
- break;
- case RA_BUF_TYPE_SHARED_MEMORY:
- bufFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
- memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
- exportable = true;
- break;
- default: abort();
- }
-
- if (params->host_mutable || params->initial_data) {
- bufFlags |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
- align = MP_ALIGN_UP(align, vk->limits.optimalBufferCopyOffsetAlignment);
- }
-
- if (params->host_mapped) {
- memFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
- VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
- VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
- }
-
- if (!vk_malloc_buffer(vk, bufFlags, memFlags, params->size, align,
- exportable, &buf_vk->slice))
- {
- goto error;
- }
-
- if (params->host_mapped)
- buf->data = buf_vk->slice.data;
-
- if (params->initial_data)
- vk_buf_update(ra, buf, 0, params->initial_data, params->size);
-
- buf->params.initial_data = NULL; // do this after vk_buf_update
- return buf;
-
-error:
- vk_buf_destroy(ra, buf);
- return NULL;
-}
-
-static bool vk_buf_poll(struct ra *ra, struct ra_buf *buf)
-{
- struct ra_buf_vk *buf_vk = buf->priv;
- return buf_vk->refcount == 1;
-}
-
-static bool vk_tex_upload(struct ra *ra,
- const struct ra_tex_upload_params *params)
-{
- struct ra_tex *tex = params->tex;
- struct ra_tex_vk *tex_vk = tex->priv;
-
- if (!params->buf)
- return ra_tex_upload_pbo(ra, &tex_vk->pbo, params);
-
- assert(!params->src);
- assert(params->buf);
- struct ra_buf *buf = params->buf;
- struct ra_buf_vk *buf_vk = buf->priv;
-
- VkBufferImageCopy region = {
- .bufferOffset = buf_vk->slice.mem.offset + params->buf_offset,
- .bufferRowLength = tex->params.w,
- .bufferImageHeight = tex->params.h,
- .imageSubresource = vk_layers,
- .imageExtent = (VkExtent3D){tex->params.w, tex->params.h, tex->params.d},
- };
-
- if (tex->params.dimensions == 2) {
- int pix_size = tex->params.format->pixel_size;
- region.bufferRowLength = params->stride / pix_size;
- if (region.bufferRowLength * pix_size != params->stride) {
- MP_ERR(ra, "Texture upload strides must be a multiple of the texel "
- "size!\n");
- goto error;
- }
-
- if (params->rc) {
- struct mp_rect *rc = params->rc;
- region.imageOffset = (VkOffset3D){rc->x0, rc->y0, 0};
- region.imageExtent = (VkExtent3D){mp_rect_w(*rc), mp_rect_h(*rc), 1};
- region.bufferImageHeight = region.imageExtent.height;
- }
- }
-
- uint64_t size = region.bufferRowLength * region.bufferImageHeight *
- region.imageExtent.depth;
-
- struct vk_cmd *cmd = vk_require_cmd(ra, tex_vk->upload_queue);
- if (!cmd)
- goto error;
-
- buf_barrier(ra, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT,
- VK_ACCESS_TRANSFER_READ_BIT, region.bufferOffset, size);
-
- tex_barrier(ra, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT,
- VK_ACCESS_TRANSFER_WRITE_BIT,
- VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
- params->invalidate);
-
- vkCmdCopyBufferToImage(cmd->buf, buf_vk->slice.buf, tex_vk->img,
- tex_vk->current_layout, 1, &region);
-
- tex_signal(ra, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT);
-
- return true;
-
-error:
- return false;
-}
-
-static bool ra_vk_mem_get_external_info(struct ra *ra, struct vk_memslice *mem, struct vk_external_mem *ret)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
-
-#if HAVE_WIN32_DESKTOP
- HANDLE mem_handle;
-
- VkMemoryGetWin32HandleInfoKHR info = {
- .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
- .pNext = NULL,
- .memory = mem->vkmem,
- .handleType = IsWindows8OrGreater()
- ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR
- : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR,
- };
-
- VK_LOAD_PFN(vkGetMemoryWin32HandleKHR);
- VK(pfn_vkGetMemoryWin32HandleKHR(vk->dev, &info, &mem_handle));
-
- ret->mem_handle = mem_handle;
-#else
- int mem_fd;
-
- VkMemoryGetFdInfoKHR info = {
- .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
- .pNext = NULL,
- .memory = mem->vkmem,
- .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
- };
-
- VK_LOAD_PFN(vkGetMemoryFdKHR);
- VK(pfn_vkGetMemoryFdKHR(vk->dev, &info, &mem_fd));
-
- ret->mem_fd = mem_fd;
-#endif
- ret->size = mem->size;
- ret->offset = mem->offset;
- ret->mem_size = mem->slab_size;
-
- return true;
-
-error:
- return false;
-}
-
-bool ra_vk_buf_get_external_info(struct ra *ra, struct ra_buf *buf, struct vk_external_mem *ret)
-{
- if (buf->params.type != RA_BUF_TYPE_SHARED_MEMORY) {
- MP_ERR(ra, "Buffer must be of TYPE_SHARED_MEMORY to be able to export it...");
- return false;
- }
-
- struct ra_buf_vk *buf_vk = buf->priv;
- struct vk_memslice *mem = &buf_vk->slice.mem;
-
- return ra_vk_mem_get_external_info(ra, mem, ret);
-}
-
-#define MPVK_NUM_DS MPVK_MAX_STREAMING_DEPTH
-
-// For ra_renderpass.priv
-struct ra_renderpass_vk {
- // Pipeline / render pass
- VkPipeline pipe;
- VkPipelineLayout pipeLayout;
- VkRenderPass renderPass;
- VkImageLayout initialLayout;
- VkImageLayout finalLayout;
- // Descriptor set (bindings)
- VkDescriptorSetLayout dsLayout;
- VkDescriptorPool dsPool;
- VkDescriptorSet dss[MPVK_NUM_DS];
- int dindex;
- // Vertex buffers (vertices)
- struct ra_buf_pool vbo;
-
- // For updating
- VkWriteDescriptorSet *dswrite;
- VkDescriptorImageInfo *dsiinfo;
- VkDescriptorBufferInfo *dsbinfo;
-};
-
-static void vk_renderpass_destroy(struct ra *ra, struct ra_renderpass *pass)
-{
- if (!pass)
- return;
-
- struct mpvk_ctx *vk = ra_vk_get(ra);
- struct ra_renderpass_vk *pass_vk = pass->priv;
-
- ra_buf_pool_uninit(ra, &pass_vk->vbo);
- vkDestroyPipeline(vk->dev, pass_vk->pipe, MPVK_ALLOCATOR);
- vkDestroyRenderPass(vk->dev, pass_vk->renderPass, MPVK_ALLOCATOR);
- vkDestroyPipelineLayout(vk->dev, pass_vk->pipeLayout, MPVK_ALLOCATOR);
- vkDestroyDescriptorPool(vk->dev, pass_vk->dsPool, MPVK_ALLOCATOR);
- vkDestroyDescriptorSetLayout(vk->dev, pass_vk->dsLayout, MPVK_ALLOCATOR);
-
- talloc_free(pass);
-}
-
-MAKE_LAZY_DESTRUCTOR(vk_renderpass_destroy, struct ra_renderpass);
-
-static const VkDescriptorType dsType[] = {
- [RA_VARTYPE_TEX] = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
- [RA_VARTYPE_IMG_W] = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- [RA_VARTYPE_BUF_RO] = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
- [RA_VARTYPE_BUF_RW] = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-};
-
-static bool vk_get_input_format(struct ra *ra, struct ra_renderpass_input *inp,
- VkFormat *out_fmt)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
-
- enum ra_ctype ctype;
- switch (inp->type) {
- case RA_VARTYPE_FLOAT: ctype = RA_CTYPE_FLOAT; break;
- case RA_VARTYPE_BYTE_UNORM: ctype = RA_CTYPE_UNORM; break;
- default: abort();
- }
-
- assert(inp->dim_m == 1);
- for (const struct vk_format *fmt = vk_formats; fmt->name; fmt++) {
- if (fmt->ctype != ctype)
- continue;
- if (fmt->components != inp->dim_v)
- continue;
- if (fmt->bytes != ra_renderpass_input_layout(inp).size)
- continue;
-
- // Ensure this format is valid for vertex attributes
- VkFormatProperties prop;
- vkGetPhysicalDeviceFormatProperties(vk->physd, fmt->iformat, &prop);
- if (!(prop.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT))
- continue;
-
- *out_fmt = fmt->iformat;
- return true;
- }
-
- return false;
-}
-
-static const char vk_cache_magic[4] = {'R','A','V','K'};
-static const int vk_cache_version = 2;
-
-struct vk_cache_header {
- char magic[sizeof(vk_cache_magic)];
- int cache_version;
- char compiler[SPIRV_NAME_MAX_LEN];
- int compiler_version;
- size_t vert_spirv_len;
- size_t frag_spirv_len;
- size_t comp_spirv_len;
- size_t pipecache_len;
-};
-
-static bool vk_use_cached_program(const struct ra_renderpass_params *params,
- const struct spirv_compiler *spirv,
- struct bstr *vert_spirv,
- struct bstr *frag_spirv,
- struct bstr *comp_spirv,
- struct bstr *pipecache)
-{
- struct bstr cache = params->cached_program;
- if (cache.len < sizeof(struct vk_cache_header))
- return false;
-
- struct vk_cache_header *header = (struct vk_cache_header *)cache.start;
- cache = bstr_cut(cache, sizeof(*header));
-
- if (strncmp(header->magic, vk_cache_magic, sizeof(vk_cache_magic)) != 0)
- return false;
- if (header->cache_version != vk_cache_version)
- return false;
- if (strncmp(header->compiler, spirv->name, sizeof(header->compiler)) != 0)
- return false;
- if (header->compiler_version != spirv->compiler_version)
- return false;
-
-#define GET(ptr) \
- if (cache.len < header->ptr##_len) \
- return false; \
- *ptr = bstr_splice(cache, 0, header->ptr##_len); \
- cache = bstr_cut(cache, ptr->len);
-
- GET(vert_spirv);
- GET(frag_spirv);
- GET(comp_spirv);
- GET(pipecache);
- return true;
-}
-
-static VkResult vk_compile_glsl(struct ra *ra, void *tactx,
- enum glsl_shader type, const char *glsl,
- struct bstr *spirv)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
- VkResult ret = VK_SUCCESS;
- int msgl = MSGL_DEBUG;
-
- if (!vk->spirv->fns->compile_glsl(vk->spirv, tactx, type, glsl, spirv)) {
- ret = VK_ERROR_INVALID_SHADER_NV;
- msgl = MSGL_ERR;
- }
-
- static const char *shader_names[] = {
- [GLSL_SHADER_VERTEX] = "vertex",
- [GLSL_SHADER_FRAGMENT] = "fragment",
- [GLSL_SHADER_COMPUTE] = "compute",
- };
-
- if (mp_msg_test(ra->log, msgl)) {
- MP_MSG(ra, msgl, "%s shader source:\n", shader_names[type]);
- mp_log_source(ra->log, msgl, glsl);
- }
- return ret;
-}
-
-static const VkShaderStageFlags stageFlags[] = {
- [RA_RENDERPASS_TYPE_RASTER] = VK_SHADER_STAGE_FRAGMENT_BIT,
- [RA_RENDERPASS_TYPE_COMPUTE] = VK_SHADER_STAGE_COMPUTE_BIT,
-};
-
-static struct ra_renderpass *vk_renderpass_create(struct ra *ra,
- const struct ra_renderpass_params *params)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
- bool success = false;
- assert(vk->spirv);
-
- struct ra_renderpass *pass = talloc_zero(NULL, struct ra_renderpass);
- pass->params = *ra_renderpass_params_copy(pass, params);
- pass->params.cached_program = (bstr){0};
- struct ra_renderpass_vk *pass_vk = pass->priv =
- talloc_zero(pass, struct ra_renderpass_vk);
-
- // temporary allocations/objects
- void *tmp = talloc_new(NULL);
- VkPipelineCache pipeCache = NULL;
- VkShaderModule vert_shader = NULL;
- VkShaderModule frag_shader = NULL;
- VkShaderModule comp_shader = NULL;
-
- static int dsCount[RA_VARTYPE_COUNT] = {0};
- VkDescriptorSetLayoutBinding *bindings = NULL;
- int num_bindings = 0;
-
- for (int i = 0; i < params->num_inputs; i++) {
- struct ra_renderpass_input *inp = &params->inputs[i];
- switch (inp->type) {
- case RA_VARTYPE_TEX:
- case RA_VARTYPE_IMG_W:
- case RA_VARTYPE_BUF_RO:
- case RA_VARTYPE_BUF_RW: {
- VkDescriptorSetLayoutBinding desc = {
- .binding = inp->binding,
- .descriptorType = dsType[inp->type],
- .descriptorCount = 1,
- .stageFlags = stageFlags[params->type],
- };
-
- MP_TARRAY_APPEND(tmp, bindings, num_bindings, desc);
- dsCount[inp->type]++;
- break;
- }
- default: abort();
- }
- }
-
- VkDescriptorPoolSize *dsPoolSizes = NULL;
- int poolSizeCount = 0;
-
- for (enum ra_vartype t = 0; t < RA_VARTYPE_COUNT; t++) {
- if (dsCount[t] > 0) {
- VkDescriptorPoolSize dssize = {
- .type = dsType[t],
- .descriptorCount = dsCount[t] * MPVK_NUM_DS,
- };
-
- MP_TARRAY_APPEND(tmp, dsPoolSizes, poolSizeCount, dssize);
- }
- }
-
- VkDescriptorPoolCreateInfo pinfo = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
- .maxSets = MPVK_NUM_DS,
- .pPoolSizes = dsPoolSizes,
- .poolSizeCount = poolSizeCount,
- };
-
- VK(vkCreateDescriptorPool(vk->dev, &pinfo, MPVK_ALLOCATOR, &pass_vk->dsPool));
-
- pass_vk->dswrite = talloc_array(pass, VkWriteDescriptorSet, num_bindings);
- pass_vk->dsiinfo = talloc_array(pass, VkDescriptorImageInfo, num_bindings);
- pass_vk->dsbinfo = talloc_array(pass, VkDescriptorBufferInfo, num_bindings);
-
- VkDescriptorSetLayoutCreateInfo dinfo = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .pBindings = bindings,
- .bindingCount = num_bindings,
- };
-
- VK(vkCreateDescriptorSetLayout(vk->dev, &dinfo, MPVK_ALLOCATOR,
- &pass_vk->dsLayout));
-
- VkDescriptorSetLayout layouts[MPVK_NUM_DS];
- for (int i = 0; i < MPVK_NUM_DS; i++)
- layouts[i] = pass_vk->dsLayout;
-
- VkDescriptorSetAllocateInfo ainfo = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
- .descriptorPool = pass_vk->dsPool,
- .descriptorSetCount = MPVK_NUM_DS,
- .pSetLayouts = layouts,
- };
-
- VK(vkAllocateDescriptorSets(vk->dev, &ainfo, pass_vk->dss));
-
- VkPipelineLayoutCreateInfo linfo = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 1,
- .pSetLayouts = &pass_vk->dsLayout,
- .pushConstantRangeCount = params->push_constants_size ? 1 : 0,
- .pPushConstantRanges = &(VkPushConstantRange){
- .stageFlags = stageFlags[params->type],
- .offset = 0,
- .size = params->push_constants_size,
- },
- };
-
- VK(vkCreatePipelineLayout(vk->dev, &linfo, MPVK_ALLOCATOR,
- &pass_vk->pipeLayout));
-
- struct bstr vert = {0}, frag = {0}, comp = {0}, pipecache = {0};
- if (vk_use_cached_program(params, vk->spirv, &vert, &frag, &comp, &pipecache)) {
- MP_VERBOSE(ra, "Using cached SPIR-V and VkPipeline.\n");
- } else {
- pipecache.len = 0;
- switch (params->type) {
- case RA_RENDERPASS_TYPE_RASTER:
- VK(vk_compile_glsl(ra, tmp, GLSL_SHADER_VERTEX,
- params->vertex_shader, &vert));
- VK(vk_compile_glsl(ra, tmp, GLSL_SHADER_FRAGMENT,
- params->frag_shader, &frag));
- comp.len = 0;
- break;
- case RA_RENDERPASS_TYPE_COMPUTE:
- VK(vk_compile_glsl(ra, tmp, GLSL_SHADER_COMPUTE,
- params->compute_shader, &comp));
- frag.len = 0;
- vert.len = 0;
- break;
- }
- }
-
- VkPipelineCacheCreateInfo pcinfo = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO,
- .pInitialData = pipecache.start,
- .initialDataSize = pipecache.len,
- };
-
- VK(vkCreatePipelineCache(vk->dev, &pcinfo, MPVK_ALLOCATOR, &pipeCache));
-
- VkShaderModuleCreateInfo sinfo = {
- .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
- };
-
- switch (params->type) {
- case RA_RENDERPASS_TYPE_RASTER: {
- sinfo.pCode = (uint32_t *)vert.start;
- sinfo.codeSize = vert.len;
- VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &vert_shader));
-
- sinfo.pCode = (uint32_t *)frag.start;
- sinfo.codeSize = frag.len;
- VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &frag_shader));
-
- VkVertexInputAttributeDescription *attrs = talloc_array(tmp,
- VkVertexInputAttributeDescription, params->num_vertex_attribs);
-
- for (int i = 0; i < params->num_vertex_attribs; i++) {
- struct ra_renderpass_input *inp = &params->vertex_attribs[i];
- attrs[i] = (VkVertexInputAttributeDescription) {
- .location = i,
- .binding = 0,
- .offset = inp->offset,
- };
-
- if (!vk_get_input_format(ra, inp, &attrs[i].format)) {
- MP_ERR(ra, "No suitable VkFormat for vertex attrib '%s'!\n",
- inp->name);
- goto error;
- }
- }
-
- // This is the most common case, so optimize towards it. In this case,
- // the renderpass will take care of almost all layout transitions
- pass_vk->initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
- pass_vk->finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
- VkAttachmentLoadOp loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
-
- // If we're blending, then we need to explicitly load the previous
- // contents of the color attachment
- if (pass->params.enable_blend)
- loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
-
- // If we're invalidating the target, we don't need to load or transition
- if (pass->params.invalidate_target) {
- pass_vk->initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
- loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
- }
-
- VK(vk_create_render_pass(vk->dev, params->target_format, loadOp,
- pass_vk->initialLayout, pass_vk->finalLayout,
- &pass_vk->renderPass));
-
- static const VkBlendFactor blendFactors[] = {
- [RA_BLEND_ZERO] = VK_BLEND_FACTOR_ZERO,
- [RA_BLEND_ONE] = VK_BLEND_FACTOR_ONE,
- [RA_BLEND_SRC_ALPHA] = VK_BLEND_FACTOR_SRC_ALPHA,
- [RA_BLEND_ONE_MINUS_SRC_ALPHA] = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
- };
-
- VkGraphicsPipelineCreateInfo cinfo = {
- .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
- .stageCount = 2,
- .pStages = (VkPipelineShaderStageCreateInfo[]) {
- {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_VERTEX_BIT,
- .module = vert_shader,
- .pName = "main",
- }, {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
- .module = frag_shader,
- .pName = "main",
- }
- },
- .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
- .vertexBindingDescriptionCount = 1,
- .pVertexBindingDescriptions = &(VkVertexInputBindingDescription) {
- .binding = 0,
- .stride = params->vertex_stride,
- .inputRate = VK_VERTEX_INPUT_RATE_VERTEX,
- },
- .vertexAttributeDescriptionCount = params->num_vertex_attribs,
- .pVertexAttributeDescriptions = attrs,
- },
- .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
- .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
- },
- .pViewportState = &(VkPipelineViewportStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .viewportCount = 1,
- .scissorCount = 1,
- },
- .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
- .polygonMode = VK_POLYGON_MODE_FILL,
- .cullMode = VK_CULL_MODE_NONE,
- .lineWidth = 1.0f,
- },
- .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
- .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
- },
- .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
- .attachmentCount = 1,
- .pAttachments = &(VkPipelineColorBlendAttachmentState) {
- .blendEnable = params->enable_blend,
- .colorBlendOp = VK_BLEND_OP_ADD,
- .srcColorBlendFactor = blendFactors[params->blend_src_rgb],
- .dstColorBlendFactor = blendFactors[params->blend_dst_rgb],
- .alphaBlendOp = VK_BLEND_OP_ADD,
- .srcAlphaBlendFactor = blendFactors[params->blend_src_alpha],
- .dstAlphaBlendFactor = blendFactors[params->blend_dst_alpha],
- .colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
- VK_COLOR_COMPONENT_G_BIT |
- VK_COLOR_COMPONENT_B_BIT |
- VK_COLOR_COMPONENT_A_BIT,
- },
- },
- .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .dynamicStateCount = 2,
- .pDynamicStates = (VkDynamicState[]){
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
- },
- },
- .layout = pass_vk->pipeLayout,
- .renderPass = pass_vk->renderPass,
- };
-
- VK(vkCreateGraphicsPipelines(vk->dev, pipeCache, 1, &cinfo,
- MPVK_ALLOCATOR, &pass_vk->pipe));
- break;
- }
- case RA_RENDERPASS_TYPE_COMPUTE: {
- sinfo.pCode = (uint32_t *)comp.start;
- sinfo.codeSize = comp.len;
- VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &comp_shader));
-
- VkComputePipelineCreateInfo cinfo = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = comp_shader,
- .pName = "main",
- },
- .layout = pass_vk->pipeLayout,
- };
-
- VK(vkCreateComputePipelines(vk->dev, pipeCache, 1, &cinfo,
- MPVK_ALLOCATOR, &pass_vk->pipe));
- break;
- }
- }
-
- // Update params->cached_program
- struct bstr cache = {0};
- VK(vkGetPipelineCacheData(vk->dev, pipeCache, &cache.len, NULL));
- cache.start = talloc_size(tmp, cache.len);
- VK(vkGetPipelineCacheData(vk->dev, pipeCache, &cache.len, cache.start));
-
- struct vk_cache_header header = {
- .cache_version = vk_cache_version,
- .compiler_version = vk->spirv->compiler_version,
- .vert_spirv_len = vert.len,
- .frag_spirv_len = frag.len,
- .comp_spirv_len = comp.len,
- .pipecache_len = cache.len,
- };
-
- for (int i = 0; i < MP_ARRAY_SIZE(header.magic); i++)
- header.magic[i] = vk_cache_magic[i];
- for (int i = 0; i < sizeof(vk->spirv->name); i++)
- header.compiler[i] = vk->spirv->name[i];
-
- struct bstr *prog = &pass->params.cached_program;
- bstr_xappend(pass, prog, (struct bstr){ (char *) &header, sizeof(header) });
- bstr_xappend(pass, prog, vert);
- bstr_xappend(pass, prog, frag);
- bstr_xappend(pass, prog, comp);
- bstr_xappend(pass, prog, cache);
-
- success = true;
-
-error:
- if (!success) {
- vk_renderpass_destroy(ra, pass);
- pass = NULL;
- }
-
- vkDestroyShaderModule(vk->dev, vert_shader, MPVK_ALLOCATOR);
- vkDestroyShaderModule(vk->dev, frag_shader, MPVK_ALLOCATOR);
- vkDestroyShaderModule(vk->dev, comp_shader, MPVK_ALLOCATOR);
- vkDestroyPipelineCache(vk->dev, pipeCache, MPVK_ALLOCATOR);
- talloc_free(tmp);
- return pass;
-}
-
-static const VkPipelineStageFlags passStages[] = {
- [RA_RENDERPASS_TYPE_RASTER] = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
- [RA_RENDERPASS_TYPE_COMPUTE] = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
-};
-
-static void vk_update_descriptor(struct ra *ra, struct vk_cmd *cmd,
- struct ra_renderpass *pass,
- struct ra_renderpass_input_val val,
- VkDescriptorSet ds, int idx)
-{
- struct ra_renderpass_vk *pass_vk = pass->priv;
- struct ra_renderpass_input *inp = &pass->params.inputs[val.index];
-
- VkWriteDescriptorSet *wds = &pass_vk->dswrite[idx];
- *wds = (VkWriteDescriptorSet) {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstSet = ds,
- .dstBinding = inp->binding,
- .descriptorCount = 1,
- .descriptorType = dsType[inp->type],
- };
-
- switch (inp->type) {
- case RA_VARTYPE_TEX: {
- struct ra_tex *tex = *(struct ra_tex **)val.data;
- struct ra_tex_vk *tex_vk = tex->priv;
-
- assert(tex->params.render_src);
- tex_barrier(ra, cmd, tex, passStages[pass->params.type],
- VK_ACCESS_SHADER_READ_BIT,
- VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, false);
-
- VkDescriptorImageInfo *iinfo = &pass_vk->dsiinfo[idx];
- *iinfo = (VkDescriptorImageInfo) {
- .sampler = tex_vk->sampler,
- .imageView = tex_vk->view,
- .imageLayout = tex_vk->current_layout,
- };
-
- wds->pImageInfo = iinfo;
- break;
- }
- case RA_VARTYPE_IMG_W: {
- struct ra_tex *tex = *(struct ra_tex **)val.data;
- struct ra_tex_vk *tex_vk = tex->priv;
-
- assert(tex->params.storage_dst);
- tex_barrier(ra, cmd, tex, passStages[pass->params.type],
- VK_ACCESS_SHADER_WRITE_BIT,
- VK_IMAGE_LAYOUT_GENERAL, false);
-
- VkDescriptorImageInfo *iinfo = &pass_vk->dsiinfo[idx];
- *iinfo = (VkDescriptorImageInfo) {
- .imageView = tex_vk->view,
- .imageLayout = tex_vk->current_layout,
- };
-
- wds->pImageInfo = iinfo;
- break;
- }
- case RA_VARTYPE_BUF_RO:
- case RA_VARTYPE_BUF_RW: {
- struct ra_buf *buf = *(struct ra_buf **)val.data;
- struct ra_buf_vk *buf_vk = buf->priv;
-
- VkBufferUsageFlags access = VK_ACCESS_SHADER_READ_BIT;
- if (inp->type == RA_VARTYPE_BUF_RW)
- access |= VK_ACCESS_SHADER_WRITE_BIT;
-
- buf_barrier(ra, cmd, buf, passStages[pass->params.type],
- access, buf_vk->slice.mem.offset, buf->params.size);
-
- VkDescriptorBufferInfo *binfo = &pass_vk->dsbinfo[idx];
- *binfo = (VkDescriptorBufferInfo) {
- .buffer = buf_vk->slice.buf,
- .offset = buf_vk->slice.mem.offset,
- .range = buf->params.size,
- };
-
- wds->pBufferInfo = binfo;
- break;
- }
- }
-}
-
-static void vk_release_descriptor(struct ra *ra, struct vk_cmd *cmd,
- struct ra_renderpass *pass,
- struct ra_renderpass_input_val val)
-{
- struct ra_renderpass_input *inp = &pass->params.inputs[val.index];
-
- switch (inp->type) {
- case RA_VARTYPE_IMG_W:
- case RA_VARTYPE_TEX: {
- struct ra_tex *tex = *(struct ra_tex **)val.data;
- tex_signal(ra, cmd, tex, passStages[pass->params.type]);
- break;
- }
- }
-}
-
-static void vk_renderpass_run(struct ra *ra,
- const struct ra_renderpass_run_params *params)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
- struct ra_renderpass *pass = params->pass;
- struct ra_renderpass_vk *pass_vk = pass->priv;
-
- static const enum queue_type types[] = {
- [RA_RENDERPASS_TYPE_RASTER] = GRAPHICS,
- [RA_RENDERPASS_TYPE_COMPUTE] = COMPUTE,
- };
-
- struct vk_cmd *cmd = vk_require_cmd(ra, types[pass->params.type]);
- if (!cmd)
- goto error;
-
- static const VkPipelineBindPoint bindPoint[] = {
- [RA_RENDERPASS_TYPE_RASTER] = VK_PIPELINE_BIND_POINT_GRAPHICS,
- [RA_RENDERPASS_TYPE_COMPUTE] = VK_PIPELINE_BIND_POINT_COMPUTE,
- };
-
- vkCmdBindPipeline(cmd->buf, bindPoint[pass->params.type], pass_vk->pipe);
-
- VkDescriptorSet ds = pass_vk->dss[pass_vk->dindex++];
- pass_vk->dindex %= MPVK_NUM_DS;
-
- for (int i = 0; i < params->num_values; i++)
- vk_update_descriptor(ra, cmd, pass, params->values[i], ds, i);
-
- if (params->num_values > 0) {
- vkUpdateDescriptorSets(vk->dev, params->num_values, pass_vk->dswrite,
- 0, NULL);
- }
-
- vkCmdBindDescriptorSets(cmd->buf, bindPoint[pass->params.type],
- pass_vk->pipeLayout, 0, 1, &ds, 0, NULL);
-
- if (pass->params.push_constants_size) {
- vkCmdPushConstants(cmd->buf, pass_vk->pipeLayout,
- stageFlags[pass->params.type], 0,
- pass->params.push_constants_size,
- params->push_constants);
- }
-
- switch (pass->params.type) {
- case RA_RENDERPASS_TYPE_COMPUTE:
- vkCmdDispatch(cmd->buf, params->compute_groups[0],
- params->compute_groups[1],
- params->compute_groups[2]);
- break;
- case RA_RENDERPASS_TYPE_RASTER: {
- struct ra_tex *tex = params->target;
- struct ra_tex_vk *tex_vk = tex->priv;
- assert(tex->params.render_dst);
-
- struct ra_buf_params buf_params = {
- .type = RA_BUF_TYPE_VERTEX,
- .size = params->vertex_count * pass->params.vertex_stride,
- .host_mutable = true,
- };
-
- struct ra_buf *buf = ra_buf_pool_get(ra, &pass_vk->vbo, &buf_params);
- if (!buf) {
- MP_ERR(ra, "Failed allocating vertex buffer!\n");
- goto error;
- }
- struct ra_buf_vk *buf_vk = buf->priv;
-
- vk_buf_update(ra, buf, 0, params->vertex_data, buf_params.size);
-
- buf_barrier(ra, cmd, buf, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
- VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
- buf_vk->slice.mem.offset, buf_params.size);
-
- vkCmdBindVertexBuffers(cmd->buf, 0, 1, &buf_vk->slice.buf,
- &buf_vk->slice.mem.offset);
-
- tex_barrier(ra, cmd, tex, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
- VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, pass_vk->initialLayout,
- pass->params.invalidate_target);
-
- VkViewport viewport = {
- .x = params->viewport.x0,
- .y = params->viewport.y0,
- .width = mp_rect_w(params->viewport),
- .height = mp_rect_h(params->viewport),
- };
-
- VkRect2D scissor = {
- .offset = {params->scissors.x0, params->scissors.y0},
- .extent = {mp_rect_w(params->scissors), mp_rect_h(params->scissors)},
- };
-
- vkCmdSetViewport(cmd->buf, 0, 1, &viewport);
- vkCmdSetScissor(cmd->buf, 0, 1, &scissor);
-
- VkRenderPassBeginInfo binfo = {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
- .renderPass = pass_vk->renderPass,
- .framebuffer = tex_vk->framebuffer,
- .renderArea = (VkRect2D){{0, 0}, {tex->params.w, tex->params.h}},
- };
-
- vkCmdBeginRenderPass(cmd->buf, &binfo, VK_SUBPASS_CONTENTS_INLINE);
- vkCmdDraw(cmd->buf, params->vertex_count, 1, 0, 0);
- vkCmdEndRenderPass(cmd->buf);
-
- // The renderPass implicitly transitions the texture to this layout
- tex_vk->current_layout = pass_vk->finalLayout;
- tex_vk->current_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
- tex_signal(ra, cmd, tex, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT);
- break;
- }
- default: abort();
- };
-
- for (int i = 0; i < params->num_values; i++)
- vk_release_descriptor(ra, cmd, pass, params->values[i]);
-
- // flush the work so far into its own command buffer, for better cross-frame
- // granularity
- vk_submit(ra);
-
-error:
- return;
-}
-
-static void vk_blit(struct ra *ra, struct ra_tex *dst, struct ra_tex *src,
- struct mp_rect *dst_rc, struct mp_rect *src_rc)
-{
- assert(src->params.blit_src);
- assert(dst->params.blit_dst);
-
- struct ra_tex_vk *src_vk = src->priv;
- struct ra_tex_vk *dst_vk = dst->priv;
-
- struct vk_cmd *cmd = vk_require_cmd(ra, GRAPHICS);
- if (!cmd)
- return;
-
- tex_barrier(ra, cmd, src, VK_PIPELINE_STAGE_TRANSFER_BIT,
- VK_ACCESS_TRANSFER_READ_BIT,
- VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
- false);
-
- bool discard = dst_rc->x0 == 0 &&
- dst_rc->y0 == 0 &&
- dst_rc->x1 == dst->params.w &&
- dst_rc->y1 == dst->params.h;
-
- tex_barrier(ra, cmd, dst, VK_PIPELINE_STAGE_TRANSFER_BIT,
- VK_ACCESS_TRANSFER_WRITE_BIT,
- VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
- discard);
-
- // Under certain conditions we can use vkCmdCopyImage instead of
- // vkCmdBlitImage, namely when the blit operation does not require
- // scaling. and the formats are compatible.
- if (src->params.format->pixel_size == dst->params.format->pixel_size &&
- mp_rect_w(*src_rc) == mp_rect_w(*dst_rc) &&
- mp_rect_h(*src_rc) == mp_rect_h(*dst_rc) &&
- mp_rect_w(*src_rc) >= 0 && mp_rect_h(*src_rc) >= 0)
- {
- VkImageCopy region = {
- .srcSubresource = vk_layers,
- .dstSubresource = vk_layers,
- .srcOffset = {src_rc->x0, src_rc->y0, 0},
- .dstOffset = {dst_rc->x0, dst_rc->y0, 0},
- .extent = {mp_rect_w(*src_rc), mp_rect_h(*src_rc), 1},
- };
-
- vkCmdCopyImage(cmd->buf, src_vk->img, src_vk->current_layout,
- dst_vk->img, dst_vk->current_layout, 1, &region);
- } else {
- VkImageBlit region = {
- .srcSubresource = vk_layers,
- .dstSubresource = vk_layers,
- .srcOffsets = {{src_rc->x0, src_rc->y0, 0},
- {src_rc->x1, src_rc->y1, 1}},
- .dstOffsets = {{dst_rc->x0, dst_rc->y0, 0},
- {dst_rc->x1, dst_rc->y1, 1}},
- };
-
- vkCmdBlitImage(cmd->buf, src_vk->img, src_vk->current_layout,
- dst_vk->img, dst_vk->current_layout, 1, &region,
- VK_FILTER_NEAREST);
- }
-
- tex_signal(ra, cmd, src, VK_PIPELINE_STAGE_TRANSFER_BIT);
- tex_signal(ra, cmd, dst, VK_PIPELINE_STAGE_TRANSFER_BIT);
-}
-
-static void vk_clear(struct ra *ra, struct ra_tex *tex, float color[4],
- struct mp_rect *rc)
-{
- struct ra_vk *p = ra->priv;
- struct ra_tex_vk *tex_vk = tex->priv;
- assert(tex->params.blit_dst);
-
- struct vk_cmd *cmd = vk_require_cmd(ra, GRAPHICS);
- if (!cmd)
- return;
-
- struct mp_rect full = {0, 0, tex->params.w, tex->params.h};
- if (!rc || mp_rect_equals(rc, &full)) {
- // To clear the entire image, we can use the efficient clear command
- tex_barrier(ra, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT,
- VK_ACCESS_TRANSFER_WRITE_BIT,
- VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, true);
-
- VkClearColorValue clearColor = {0};
- for (int c = 0; c < 4; c++)
- clearColor.float32[c] = color[c];
-
- vkCmdClearColorImage(cmd->buf, tex_vk->img, tex_vk->current_layout,
- &clearColor, 1, &vk_range);
-
- tex_signal(ra, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT);
- } else {
- // To simulate per-region clearing, we blit from a 1x1 texture instead
- struct ra_tex_upload_params ul_params = {
- .tex = p->clear_tex,
- .invalidate = true,
- .src = &color[0],
- };
- vk_tex_upload(ra, &ul_params);
- vk_blit(ra, tex, p->clear_tex, rc, &(struct mp_rect){0, 0, 1, 1});
- }
-}
-
-static int vk_desc_namespace(struct ra *ra, enum ra_vartype type)
-{
- return 0;
-}
-
-#define VK_QUERY_POOL_SIZE (MPVK_MAX_STREAMING_DEPTH * 4)
-
-struct vk_timer {
- VkQueryPool pool;
- int index_seen; // keeps track of which indices have been used at least once
- int index;
- uint64_t result;
-};
-
-static void vk_timer_destroy(struct ra *ra, ra_timer *ratimer)
-{
- if (!ratimer)
- return;
-
- struct mpvk_ctx *vk = ra_vk_get(ra);
- struct vk_timer *timer = ratimer;
-
- vkDestroyQueryPool(vk->dev, timer->pool, MPVK_ALLOCATOR);
-
- talloc_free(timer);
-}
-
-MAKE_LAZY_DESTRUCTOR(vk_timer_destroy, ra_timer);
-
-static ra_timer *vk_timer_create(struct ra *ra)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
-
- struct vk_timer *timer = talloc_zero(NULL, struct vk_timer);
- timer->index_seen = -1;
-
- struct VkQueryPoolCreateInfo qinfo = {
- .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
- .queryType = VK_QUERY_TYPE_TIMESTAMP,
- .queryCount = VK_QUERY_POOL_SIZE,
- };
-
- VK(vkCreateQueryPool(vk->dev, &qinfo, MPVK_ALLOCATOR, &timer->pool));
-
- return (ra_timer *)timer;
-
-error:
- vk_timer_destroy(ra, timer);
- return NULL;
-}
-
-static void vk_timer_record(struct ra *ra, VkQueryPool pool, int index,
- VkPipelineStageFlags stage)
-{
- struct vk_cmd *cmd = vk_require_cmd(ra, GRAPHICS);
- if (!cmd)
- return;
-
- vkCmdWriteTimestamp(cmd->buf, stage, pool, index);
-}
-
-static void vk_timer_start(struct ra *ra, ra_timer *ratimer)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
- struct vk_timer *timer = ratimer;
-
- VkResult res = VK_NOT_READY;
- uint64_t out[2];
-
- if (timer->index <= timer->index_seen) {
- res = vkGetQueryPoolResults(vk->dev, timer->pool, timer->index, 2,
- sizeof(out), &out[0], sizeof(uint64_t),
- VK_QUERY_RESULT_64_BIT);
- }
-
- switch (res) {
- case VK_SUCCESS:
- timer->result = (out[1] - out[0]) * vk->limits.timestampPeriod;
- break;
- case VK_NOT_READY:
- timer->result = 0;
- break;
- default:
- MP_WARN(vk, "Failed reading timer query result: %s\n", vk_err(res));
- return;
- };
-
- vk_timer_record(ra, timer->pool, timer->index,
- VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
-}
-
-static uint64_t vk_timer_stop(struct ra *ra, ra_timer *ratimer)
-{
- struct vk_timer *timer = ratimer;
- vk_timer_record(ra, timer->pool, timer->index + 1,
- VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
-
- timer->index_seen = MPMAX(timer->index_seen, timer->index);
- timer->index = (timer->index + 2) % VK_QUERY_POOL_SIZE;
-
- return timer->result;
-}
-
-static struct ra_fns ra_fns_vk = {
- .destroy = vk_destroy_ra,
- .tex_create = vk_tex_create,
- .tex_destroy = vk_tex_destroy_lazy,
- .tex_upload = vk_tex_upload,
- .buf_create = vk_buf_create,
- .buf_destroy = vk_buf_destroy_lazy,
- .buf_update = vk_buf_update,
- .buf_poll = vk_buf_poll,
- .clear = vk_clear,
- .blit = vk_blit,
- .uniform_layout = std140_layout,
- .push_constant_layout = std430_layout,
- .desc_namespace = vk_desc_namespace,
- .renderpass_create = vk_renderpass_create,
- .renderpass_destroy = vk_renderpass_destroy_lazy,
- .renderpass_run = vk_renderpass_run,
- .timer_create = vk_timer_create,
- .timer_destroy = vk_timer_destroy_lazy,
- .timer_start = vk_timer_start,
- .timer_stop = vk_timer_stop,
-};
-
-struct vk_cmd *ra_vk_submit(struct ra *ra, struct ra_tex *tex)
-{
- struct ra_vk *p = ra->priv;
- struct vk_cmd *cmd = vk_require_cmd(ra, GRAPHICS);
- if (!cmd)
- return NULL;
-
- struct ra_tex_vk *tex_vk = tex->priv;
- assert(tex_vk->external_img);
- tex_barrier(ra, cmd, tex, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- VK_ACCESS_MEMORY_READ_BIT, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
- false);
-
- // Return this directly instead of going through vk_submit
- p->cmd = NULL;
- return cmd;
-}
diff --git a/video/out/vulkan/ra_vk.h b/video/out/vulkan/ra_vk.h
deleted file mode 100644
index 393c01a3c1..0000000000
--- a/video/out/vulkan/ra_vk.h
+++ /dev/null
@@ -1,51 +0,0 @@
-#pragma once
-
-#include "video/out/gpu/ra.h"
-
-#include "common.h"
-#include "utils.h"
-
-struct ra *ra_create_vk(struct mpvk_ctx *vk, struct mp_log *log);
-
-// Access to the VkDevice is needed for swapchain creation
-VkDevice ra_vk_get_dev(struct ra *ra);
-
-// Allocates a ra_tex that wraps a swapchain image. The contents of the image
-// will be invalidated, and access to it will only be internally synchronized.
-// So the calling could should not do anything else with the VkImage.
-struct ra_tex *ra_vk_wrap_swapchain_img(struct ra *ra, VkImage vkimg,
- VkSwapchainCreateInfoKHR info);
-
-// Associates an external semaphore (dependency) with a ra_tex, such that this
-// ra_tex will not be used by the ra_vk until the external semaphore fires.
-void ra_tex_vk_external_dep(struct ra *ra, struct ra_tex *tex, VkSemaphore dep);
-
-// This function finalizes rendering, transitions `tex` (which must be a
-// wrapped swapchain image) into a format suitable for presentation, and returns
-// the resulting command buffer (or NULL on error). The caller may add their
-// own semaphores to this command buffer, and must submit it afterwards.
-struct vk_cmd *ra_vk_submit(struct ra *ra, struct ra_tex *tex);
-
-// May be called on a struct ra of any type. Returns NULL if the ra is not
-// a vulkan ra.
-struct mpvk_ctx *ra_vk_get(struct ra *ra);
-
-struct vk_external_mem {
-#if HAVE_WIN32_DESKTOP
- HANDLE mem_handle;
-#else
- int mem_fd;
-#endif
- size_t mem_size;
- size_t size;
- size_t offset;
-};
-
-// Export an ra_buf for importing by another api.
-bool ra_vk_buf_get_external_info(struct ra *ra, struct ra_buf *buf, struct vk_external_mem *ret);
-
-// Set the buffer user data
-void ra_vk_buf_set_user_data(struct ra_buf *buf, void *priv);
-
-// Get the buffer user data
-void *ra_vk_buf_get_user_data(struct ra_buf *buf);
diff --git a/video/out/vulkan/utils.c b/video/out/vulkan/utils.c
index 62ac3e87f9..7d9c519c12 100644
--- a/video/out/vulkan/utils.c
+++ b/video/out/vulkan/utils.c
@@ -1,986 +1,46 @@
-#include <libavutil/macros.h>
-
-#include "video/out/gpu/spirv.h"
+#include "video/out/placebo/utils.h"
#include "utils.h"
-#include "malloc.h"
-
-const char* vk_err(VkResult res)
-{
- switch (res) {
- // These are technically success codes, but include them nonetheless
- case VK_SUCCESS: return "VK_SUCCESS";
- case VK_NOT_READY: return "VK_NOT_READY";
- case VK_TIMEOUT: return "VK_TIMEOUT";
- case VK_EVENT_SET: return "VK_EVENT_SET";
- case VK_EVENT_RESET: return "VK_EVENT_RESET";
- case VK_INCOMPLETE: return "VK_INCOMPLETE";
- case VK_SUBOPTIMAL_KHR: return "VK_SUBOPTIMAL_KHR";
-
- // Actual error codes
- case VK_ERROR_OUT_OF_HOST_MEMORY: return "VK_ERROR_OUT_OF_HOST_MEMORY";
- case VK_ERROR_OUT_OF_DEVICE_MEMORY: return "VK_ERROR_OUT_OF_DEVICE_MEMORY";
- case VK_ERROR_INITIALIZATION_FAILED: return "VK_ERROR_INITIALIZATION_FAILED";
- case VK_ERROR_DEVICE_LOST: return "VK_ERROR_DEVICE_LOST";
- case VK_ERROR_MEMORY_MAP_FAILED: return "VK_ERROR_MEMORY_MAP_FAILED";
- case VK_ERROR_LAYER_NOT_PRESENT: return "VK_ERROR_LAYER_NOT_PRESENT";
- case VK_ERROR_EXTENSION_NOT_PRESENT: return "VK_ERROR_EXTENSION_NOT_PRESENT";
- case VK_ERROR_FEATURE_NOT_PRESENT: return "VK_ERROR_FEATURE_NOT_PRESENT";
- case VK_ERROR_INCOMPATIBLE_DRIVER: return "VK_ERROR_INCOMPATIBLE_DRIVER";
- case VK_ERROR_TOO_MANY_OBJECTS: return "VK_ERROR_TOO_MANY_OBJECTS";
- case VK_ERROR_FORMAT_NOT_SUPPORTED: return "VK_ERROR_FORMAT_NOT_SUPPORTED";
- case VK_ERROR_FRAGMENTED_POOL: return "VK_ERROR_FRAGMENTED_POOL";
- case VK_ERROR_INVALID_SHADER_NV: return "VK_ERROR_INVALID_SHADER_NV";
- case VK_ERROR_OUT_OF_DATE_KHR: return "VK_ERROR_OUT_OF_DATE_KHR";
- case VK_ERROR_SURFACE_LOST_KHR: return "VK_ERROR_SURFACE_LOST_KHR";
- }
-
- return "Unknown error!";
-}
-
-static const char* vk_dbg_type(VkDebugReportObjectTypeEXT type)
-{
- switch (type) {
- case VK_DEBUG_REPORT_OBJECT_TYPE_INSTANCE_EXT:
- return "VkInstance";
- case VK_DEBUG_REPORT_OBJECT_TYPE_PHYSICAL_DEVICE_EXT:
- return "VkPhysicalDevice";
- case VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT:
- return "VkDevice";
- case VK_DEBUG_REPORT_OBJECT_TYPE_QUEUE_EXT:
- return "VkQueue";
- case VK_DEBUG_REPORT_OBJECT_TYPE_SEMAPHORE_EXT:
- return "VkSemaphore";
- case VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_BUFFER_EXT:
- return "VkCommandBuffer";
- case VK_DEBUG_REPORT_OBJECT_TYPE_FENCE_EXT:
- return "VkFence";
- case VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT:
- return "VkDeviceMemory";
- case VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_EXT:
- return "VkBuffer";
- case VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT:
- return "VkImage";
- case VK_DEBUG_REPORT_OBJECT_TYPE_EVENT_EXT:
- return "VkEvent";
- case VK_DEBUG_REPORT_OBJECT_TYPE_QUERY_POOL_EXT:
- return "VkQueryPool";
- case VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_VIEW_EXT:
- return "VkBufferView";
- case VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_VIEW_EXT:
- return "VkImageView";
- case VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT:
- return "VkShaderModule";
- case VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_CACHE_EXT:
- return "VkPipelineCache";
- case VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_LAYOUT_EXT:
- return "VkPipelineLayout";
- case VK_DEBUG_REPORT_OBJECT_TYPE_RENDER_PASS_EXT:
- return "VkRenderPass";
- case VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_EXT:
- return "VkPipeline";
- case VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT_EXT:
- return "VkDescriptorSetLayout";
- case VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_EXT:
- return "VkSampler";
- case VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_POOL_EXT:
- return "VkDescriptorPool";
- case VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_EXT:
- return "VkDescriptorSet";
- case VK_DEBUG_REPORT_OBJECT_TYPE_FRAMEBUFFER_EXT:
- return "VkFramebuffer";
- case VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_POOL_EXT:
- return "VkCommandPool";
- case VK_DEBUG_REPORT_OBJECT_TYPE_SURFACE_KHR_EXT:
- return "VkSurfaceKHR";
- case VK_DEBUG_REPORT_OBJECT_TYPE_SWAPCHAIN_KHR_EXT:
- return "VkSwapchainKHR";
- case VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_EXT:
- return "VkDebugReportCallbackEXT";
- case VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT:
- default:
- return "unknown object";
- }
-}
-
-static VkBool32 vk_dbg_callback(VkDebugReportFlagsEXT flags,
- VkDebugReportObjectTypeEXT objType,
- uint64_t obj, size_t loc, int32_t msgCode,
- const char *layer, const char *msg, void *priv)
-{
- struct mpvk_ctx *vk = priv;
- int lev = MSGL_V;
-
- switch (flags) {
- case VK_DEBUG_REPORT_ERROR_BIT_EXT: lev = MSGL_ERR; break;
- case VK_DEBUG_REPORT_WARNING_BIT_EXT: lev = MSGL_WARN; break;
- case VK_DEBUG_REPORT_INFORMATION_BIT_EXT: lev = MSGL_TRACE; break;
- case VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT: lev = MSGL_WARN; break;
- case VK_DEBUG_REPORT_DEBUG_BIT_EXT: lev = MSGL_DEBUG; break;
- };
-
- MP_MSG(vk, lev, "vk [%s] %d: %s (obj 0x%llx (%s), loc 0x%zx)\n",
- layer, (int)msgCode, msg, (unsigned long long)obj,
- vk_dbg_type(objType), loc);
-
- // The return value of this function determines whether the call will
- // be explicitly aborted (to prevent GPU errors) or not. In this case,
- // we generally want this to be on for the errors.
- return (flags & VK_DEBUG_REPORT_ERROR_BIT_EXT);
-}
-static void vk_cmdpool_destroy(struct mpvk_ctx *vk, struct vk_cmdpool *pool);
-static struct vk_cmdpool *vk_cmdpool_create(struct mpvk_ctx *vk,
- VkDeviceQueueCreateInfo qinfo,
- VkQueueFamilyProperties props);
-
-void mpvk_uninit(struct mpvk_ctx *vk)
-{
- if (!vk->inst)
- return;
-
- if (vk->dev) {
- mpvk_flush_commands(vk);
- mpvk_poll_commands(vk, UINT64_MAX);
- assert(vk->num_cmds_queued == 0);
- assert(vk->num_cmds_pending == 0);
- talloc_free(vk->cmds_queued);
- talloc_free(vk->cmds_pending);
- for (int i = 0; i < vk->num_pools; i++)
- vk_cmdpool_destroy(vk, vk->pools[i]);
- talloc_free(vk->pools);
- for (int i = 0; i < vk->num_signals; i++)
- vk_signal_destroy(vk, &vk->signals[i]);
- talloc_free(vk->signals);
- vk_malloc_uninit(vk);
- vkDestroyDevice(vk->dev, MPVK_ALLOCATOR);
- }
-
- if (vk->dbg) {
- // Same deal as creating the debug callback, we need to load this
- // first.
- VK_LOAD_PFN(vkDestroyDebugReportCallbackEXT)
- pfn_vkDestroyDebugReportCallbackEXT(vk->inst, vk->dbg, MPVK_ALLOCATOR);
- }
-
- vkDestroySurfaceKHR(vk->inst, vk->surf, MPVK_ALLOCATOR);
- vkDestroyInstance(vk->inst, MPVK_ALLOCATOR);
-
- *vk = (struct mpvk_ctx){0};
-}
-
-bool mpvk_instance_init(struct mpvk_ctx *vk, struct mp_log *log,
- const char *surf_ext_name, bool debug)
+bool mpvk_init(struct mpvk_ctx *vk, struct ra_ctx *ctx, const char *surface_ext)
{
- *vk = (struct mpvk_ctx) {
- .log = log,
- };
-
- VkInstanceCreateInfo info = {
- .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
- };
-
- if (debug) {
- // Enables the LunarG standard validation layer, which
- // is a meta-layer that loads lots of other validators
- static const char* layers[] = {
- "VK_LAYER_LUNARG_standard_validation",
- };
-
- info.ppEnabledLayerNames = layers;
- info.enabledLayerCount = MP_ARRAY_SIZE(layers);
- }
-
- // Enable whatever extensions were compiled in.
- const char *extensions[] = {
- VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
- VK_KHR_SURFACE_EXTENSION_NAME,
- surf_ext_name,
-
- // Extra extensions only used for debugging. These are toggled by
- // decreasing the enabledExtensionCount, so the number needs to be
- // synchronized with the code below.
- VK_EXT_DEBUG_REPORT_EXTENSION_NAME,
- };
-
- const int debugExtensionCount = 1;
-
- info.ppEnabledExtensionNames = extensions;
- info.enabledExtensionCount = MP_ARRAY_SIZE(extensions);
-
- if (!debug)
- info.enabledExtensionCount -= debugExtensionCount;
-
- MP_VERBOSE(vk, "Creating instance with extensions:\n");
- for (int i = 0; i < info.enabledExtensionCount; i++)
- MP_VERBOSE(vk, " %s\n", info.ppEnabledExtensionNames[i]);
-
- VkResult res = vkCreateInstance(&info, MPVK_ALLOCATOR, &vk->inst);
- if (res != VK_SUCCESS) {
- MP_VERBOSE(vk, "Failed creating instance: %s\n", vk_err(res));
- return false;
- }
-
- if (debug) {
- // Set up a debug callback to catch validation messages
- VkDebugReportCallbackCreateInfoEXT dinfo = {
- .sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT,
- .flags = VK_DEBUG_REPORT_INFORMATION_BIT_EXT |
- VK_DEBUG_REPORT_WARNING_BIT_EXT |
- VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT |
- VK_DEBUG_REPORT_ERROR_BIT_EXT |
- VK_DEBUG_REPORT_DEBUG_BIT_EXT,
- .pfnCallback = vk_dbg_callback,
- .pUserData = vk,
- };
-
- // Since this is not part of the core spec, we need to load it. This
- // can't fail because we've already successfully created an instance
- // with this extension enabled.
- VK_LOAD_PFN(vkCreateDebugReportCallbackEXT)
- pfn_vkCreateDebugReportCallbackEXT(vk->inst, &dinfo, MPVK_ALLOCATOR,
- &vk->dbg);
- }
-
- return true;
-}
-
-#define MPVK_MAX_DEVICES 16
-
-static bool physd_supports_surface(struct mpvk_ctx *vk, VkPhysicalDevice physd)
-{
- uint32_t qfnum;
- vkGetPhysicalDeviceQueueFamilyProperties(physd, &qfnum, NULL);
-
- for (int i = 0; i < qfnum; i++) {
- VkBool32 sup;
- VK(vkGetPhysicalDeviceSurfaceSupportKHR(physd, i, vk->surf, &sup));
- if (sup)
- return true;
- }
-
-error:
- return false;
-}
-
-bool mpvk_find_phys_device(struct mpvk_ctx *vk, const char *name, bool sw)
-{
- assert(vk->surf);
-
- MP_VERBOSE(vk, "Probing for vulkan devices:\n");
-
- VkPhysicalDevice *devices = NULL;
- uint32_t num = 0;
- VK(vkEnumeratePhysicalDevices(vk->inst, &num, NULL));
- devices = talloc_array(NULL, VkPhysicalDevice, num);
- VK(vkEnumeratePhysicalDevices(vk->inst, &num, devices));
-
- // Sorted by "priority". Reuses some m_opt code for convenience
- static const struct m_opt_choice_alternatives types[] = {
- {"discrete", VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU},
- {"integrated", VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU},
- {"virtual", VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU},
- {"software", VK_PHYSICAL_DEVICE_TYPE_CPU},
- {"unknown", VK_PHYSICAL_DEVICE_TYPE_OTHER},
- {0}
- };
-
- VkPhysicalDeviceProperties props[MPVK_MAX_DEVICES];
- for (int i = 0; i < num; i++) {
- vkGetPhysicalDeviceProperties(devices[i], &props[i]);
- MP_VERBOSE(vk, " GPU %d: %s (%s)\n", i, props[i].deviceName,
- m_opt_choice_str(types, props[i].deviceType));
- }
-
- // Iterate through each type in order of decreasing preference
- for (int t = 0; types[t].name; t++) {
- // Disallow SW rendering unless explicitly enabled
- if (types[t].value == VK_PHYSICAL_DEVICE_TYPE_CPU && !sw)
- continue;
-
- for (int i = 0; i < num; i++) {
- VkPhysicalDeviceProperties prop = props[i];
- if (prop.deviceType != types[t].value)
- continue;
- if (name && strcmp(name, prop.deviceName) != 0)
- continue;
- if (!physd_supports_surface(vk, devices[i]))
- continue;
-
- MP_VERBOSE(vk, "Chose device:\n");
- MP_VERBOSE(vk, " Device Name: %s\n", prop.deviceName);
- MP_VERBOSE(vk, " Device ID: %x:%x\n",
- (unsigned)prop.vendorID, (unsigned)prop.deviceID);
- MP_VERBOSE(vk, " Driver version: %d\n", (int)prop.driverVersion);
- MP_VERBOSE(vk, " API version: %d.%d.%d\n",
- (int)VK_VERSION_MAJOR(prop.apiVersion),
- (int)VK_VERSION_MINOR(prop.apiVersion),
- (int)VK_VERSION_PATCH(prop.apiVersion));
- vk->physd = devices[i];
- vk->limits = prop.limits;
- vkGetPhysicalDeviceFeatures(vk->physd, &vk->features);
- talloc_free(devices);
- return true;
- }
- }
-
-error:
- MP_VERBOSE(vk, "Found no suitable device, giving up.\n");
- talloc_free(devices);
- return false;
-}
-
-bool mpvk_get_phys_device_uuid(struct mpvk_ctx *vk, uint8_t uuid_out[VK_UUID_SIZE])
-{
- assert(vk->physd);
-
- VkPhysicalDeviceIDPropertiesKHR idprops = {
- .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR,
- };
-
- VkPhysicalDeviceProperties2KHR props = {
- .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR,
- .pNext = &idprops,
- };
-
- VK_LOAD_PFN(vkGetPhysicalDeviceProperties2KHR);
- pfn_vkGetPhysicalDeviceProperties2KHR(vk->physd, &props);
-
- memcpy(uuid_out, idprops.deviceUUID, VK_UUID_SIZE);
-
- return true;
-}
-
-bool mpvk_pick_surface_format(struct mpvk_ctx *vk)
-{
- assert(vk->physd);
-
- VkSurfaceFormatKHR *formats = NULL;
- int num;
-
- // Enumerate through the surface formats and find one that we can map to
- // a ra_format
- VK(vkGetPhysicalDeviceSurfaceFormatsKHR(vk->physd, vk->surf, &num, NULL));
- formats = talloc_array(NULL, VkSurfaceFormatKHR, num);
- VK(vkGetPhysicalDeviceSurfaceFormatsKHR(vk->physd, vk->surf, &num, formats));
-
- for (int i = 0; i < num; i++) {
- // A value of VK_FORMAT_UNDEFINED means we can pick anything we want
- if (formats[i].format == VK_FORMAT_UNDEFINED) {
- vk->surf_format = (VkSurfaceFormatKHR) {
- .colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR,
- .format = VK_FORMAT_R16G16B16A16_UNORM,
- };
- break;
- }
-
- if (formats[i].colorSpace != VK_COLOR_SPACE_SRGB_NONLINEAR_KHR)
- continue;
-
- // Format whitelist, since we want only >= 8 bit _UNORM formats
- switch (formats[i].format) {
- case VK_FORMAT_R8G8B8_UNORM:
- case VK_FORMAT_B8G8R8_UNORM:
- case VK_FORMAT_R8G8B8A8_UNORM:
- case VK_FORMAT_B8G8R8A8_UNORM:
- case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
- case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
- case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
- case VK_FORMAT_R16G16B16_UNORM:
- case VK_FORMAT_R16G16B16A16_UNORM:
- break; // accept
- default: continue;
- }
-
- vk->surf_format = formats[i];
- break;
- }
-
- talloc_free(formats);
-
- if (!vk->surf_format.format)
- goto error;
-
- return true;
-
-error:
- MP_ERR(vk, "Failed picking surface format!\n");
- talloc_free(formats);
- return false;
-}
-
-// Find the most specialized queue supported a combination of flags. In cases
-// where there are multiple queue families at the same specialization level,
-// this finds the one with the most queues. Returns -1 if no queue was found.
-static int find_qf(VkQueueFamilyProperties *qfs, int qfnum, VkQueueFlags flags)
-{
- int idx = -1;
- for (int i = 0; i < qfnum; i++) {
- if (!(qfs[i].queueFlags & flags))
- continue;
-
- // QF is more specialized. Since we don't care about other bits like
- // SPARSE_BIT, mask the ones we're interestew in
- const VkQueueFlags mask = VK_QUEUE_GRAPHICS_BIT |
- VK_QUEUE_TRANSFER_BIT |
- VK_QUEUE_COMPUTE_BIT;
-
- if (idx < 0 || (qfs[i].queueFlags & mask) < (qfs[idx].queueFlags & mask))
- idx = i;
-
- // QF has more queues (at the same specialization level)
- if (qfs[i].queueFlags == qfs[idx].queueFlags &&
- qfs[i].queueCount > qfs[idx].queueCount)
- idx = i;
- }
-
- return idx;
-}
-
-static void add_qinfo(void *tactx, VkDeviceQueueCreateInfo **qinfos,
- int *num_qinfos, VkQueueFamilyProperties *qfs, int idx,
- int qcount)
-{
- if (idx < 0)
- return;
-
- // Check to see if we've already added this queue family
- for (int i = 0; i < *num_qinfos; i++) {
- if ((*qinfos)[i].queueFamilyIndex == idx)
- return;
- }
-
- float *priorities = talloc_zero_array(tactx, float, qcount);
- VkDeviceQueueCreateInfo qinfo = {
- .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
- .queueFamilyIndex = idx,
- .queueCount = MPMIN(qcount, qfs[idx].queueCount),
- .pQueuePriorities = priorities,
- };
-
- MP_TARRAY_APPEND(tactx, *qinfos, *num_qinfos, qinfo);
-}
-
-static bool detect_device_extensions(struct mpvk_ctx *vk)
-{
- bool ret = false;
- VkExtensionProperties *props = NULL;
-
- uint32_t num_exts;
- VK(vkEnumerateDeviceExtensionProperties(vk->physd, NULL,
- &num_exts, NULL));
-
- props = talloc_array(NULL, VkExtensionProperties, num_exts);
- VK(vkEnumerateDeviceExtensionProperties(vk->physd,
- NULL, &num_exts, props));
-
- for (uint32_t i = 0; i < num_exts; i++) {
- if (!strcmp(VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME,
- props[i].extensionName)) {
- vk->has_ext_external_memory = true;
- continue;
- }
- if (!strcmp(MP_VK_EXTERNAL_MEMORY_EXPORT_EXTENSION_NAME,
- props[i].extensionName)) {
- vk->has_ext_external_memory_export = true;
- continue;
- }
- }
-
- ret = true;
-error:
- talloc_free(props);
- return ret;
-}
-
-bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts)
-{
- assert(vk->physd);
- void *tmp = talloc_new(NULL);
-
- // Enumerate the queue families and find suitable families for each task
- int qfnum;
- vkGetPhysicalDeviceQueueFamilyProperties(vk->physd, &qfnum, NULL);
- VkQueueFamilyProperties *qfs = talloc_array(tmp, VkQueueFamilyProperties, qfnum);
- vkGetPhysicalDeviceQueueFamilyProperties(vk->physd, &qfnum, qfs);
-
- MP_VERBOSE(vk, "Queue families supported by device:\n");
-
- for (int i = 0; i < qfnum; i++) {
- MP_VERBOSE(vk, " QF %d: flags 0x%x num %d\n", i,
- (unsigned)qfs[i].queueFlags, (int)qfs[i].queueCount);
- }
-
- int idx_gfx = -1, idx_comp = -1, idx_tf = -1;
- idx_gfx = find_qf(qfs, qfnum, VK_QUEUE_GRAPHICS_BIT);
- if (opts.async_compute)
- idx_comp = find_qf(qfs, qfnum, VK_QUEUE_COMPUTE_BIT);
- if (opts.async_transfer)
- idx_tf = find_qf(qfs, qfnum, VK_QUEUE_TRANSFER_BIT);
-
- // Vulkan requires at least one GRAPHICS queue, so if this fails something
- // is horribly wrong.
- assert(idx_gfx >= 0);
- MP_VERBOSE(vk, "Using graphics queue (QF %d)\n", idx_gfx);
-
- // Ensure we can actually present to the surface using this queue
- VkBool32 sup;
- VK(vkGetPhysicalDeviceSurfaceSupportKHR(vk->physd, idx_gfx, vk->surf, &sup));
- if (!sup) {
- MP_ERR(vk, "Queue family does not support surface presentation!\n");
+ vk->ctx = pl_context_create(PL_API_VER, NULL);
+ if (!vk->ctx)
goto error;
- }
-
- if (idx_tf >= 0 && idx_tf != idx_gfx)
- MP_VERBOSE(vk, "Using async transfer (QF %d)\n", idx_tf);
- if (idx_comp >= 0 && idx_comp != idx_gfx)
- MP_VERBOSE(vk, "Using async compute (QF %d)\n", idx_comp);
- // Fall back to supporting compute shaders via the graphics pool for
- // devices which support compute shaders but not async compute.
- if (idx_comp < 0 && qfs[idx_gfx].queueFlags & VK_QUEUE_COMPUTE_BIT)
- idx_comp = idx_gfx;
+ vk->pl_log = mp_log_new(ctx, ctx->log, "libplacebo");
+ mppl_ctx_set_log(vk->ctx, vk->pl_log, true);
- // Now that we know which QFs we want, we can create the logical device
- VkDeviceQueueCreateInfo *qinfos = NULL;
- int num_qinfos = 0;
- add_qinfo(tmp, &qinfos, &num_qinfos, qfs, idx_gfx, opts.queue_count);
- add_qinfo(tmp, &qinfos, &num_qinfos, qfs, idx_comp, opts.queue_count);
- add_qinfo(tmp, &qinfos, &num_qinfos, qfs, idx_tf, opts.queue_count);
-
- if (!detect_device_extensions(vk)) {
- MP_WARN(vk, "Failed to enumerate device extensions. "
- "Some features may be disabled.\n");
- }
-
- const char **exts = NULL;
- int num_exts = 0;
- MP_TARRAY_APPEND(tmp, exts, num_exts, VK_KHR_SWAPCHAIN_EXTENSION_NAME);
- if (vk->has_ext_external_memory)
- MP_TARRAY_APPEND(tmp, exts, num_exts, VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME);
- if (vk->has_ext_external_memory_export)
- MP_TARRAY_APPEND(tmp, exts, num_exts, MP_VK_EXTERNAL_MEMORY_EXPORT_EXTENSION_NAME);
- if (vk->spirv->required_ext)
- MP_TARRAY_APPEND(tmp, exts, num_exts, vk->spirv->required_ext);
-
- // Enable all features we optionally use
-#define FEATURE(name) .name = vk->features.name
- VkPhysicalDeviceFeatures feats = {
- FEATURE(shaderImageGatherExtended),
- FEATURE(shaderStorageImageExtendedFormats),
- };
-#undef FEATURE
-
- VkDeviceCreateInfo dinfo = {
- .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
- .pQueueCreateInfos = qinfos,
- .queueCreateInfoCount = num_qinfos,
- .ppEnabledExtensionNames = exts,
- .enabledExtensionCount = num_exts,
- .pEnabledFeatures = &feats,
- };
-
- MP_VERBOSE(vk, "Creating vulkan device with extensions:\n");
- for (int i = 0; i < num_exts; i++)
- MP_VERBOSE(vk, " %s\n", exts[i]);
-
- VK(vkCreateDevice(vk->physd, &dinfo, MPVK_ALLOCATOR, &vk->dev));
-
- // Create the command pools and memory allocator
- for (int i = 0; i < num_qinfos; i++) {
- int qf = qinfos[i].queueFamilyIndex;
- struct vk_cmdpool *pool = vk_cmdpool_create(vk, qinfos[i], qfs[qf]);
- if (!pool)
- goto error;
- MP_TARRAY_APPEND(NULL, vk->pools, vk->num_pools, pool);
-
- // Update the pool_* pointers based on the corresponding QF index
- if (qf == idx_gfx)
- vk->pool_graphics = pool;
- if (qf == idx_comp)
- vk->pool_compute = pool;
- if (qf == idx_tf)
- vk->pool_transfer = pool;
- }
-
- vk_malloc_init(vk);
- talloc_free(tmp);
- return true;
-
-error:
- MP_ERR(vk, "Failed creating logical device!\n");
- talloc_free(tmp);
- return false;
-}
-
-// returns VK_SUCCESS (completed), VK_TIMEOUT (not yet completed) or an error
-static VkResult vk_cmd_poll(struct mpvk_ctx *vk, struct vk_cmd *cmd,
- uint64_t timeout)
-{
- return vkWaitForFences(vk->dev, 1, &cmd->fence, false, timeout);
-}
-
-static void vk_cmd_reset(struct mpvk_ctx *vk, struct vk_cmd *cmd)
-{
- for (int i = 0; i < cmd->num_callbacks; i++) {
- struct vk_callback *cb = &cmd->callbacks[i];
- cb->run(cb->priv, cb->arg);
- }
-
- cmd->num_callbacks = 0;
- cmd->num_deps = 0;
- cmd->num_sigs = 0;
-
- // also make sure to reset vk->last_cmd in case this was the last command
- if (vk->last_cmd == cmd)
- vk->last_cmd = NULL;
-}
-
-static void vk_cmd_destroy(struct mpvk_ctx *vk, struct vk_cmd *cmd)
-{
- if (!cmd)
- return;
-
- vk_cmd_poll(vk, cmd, UINT64_MAX);
- vk_cmd_reset(vk, cmd);
- vkDestroyFence(vk->dev, cmd->fence, MPVK_ALLOCATOR);
- vkFreeCommandBuffers(vk->dev, cmd->pool->pool, 1, &cmd->buf);
-
- talloc_free(cmd);
-}
-
-static struct vk_cmd *vk_cmd_create(struct mpvk_ctx *vk, struct vk_cmdpool *pool)
-{
- struct vk_cmd *cmd = talloc_zero(NULL, struct vk_cmd);
- cmd->pool = pool;
-
- VkCommandBufferAllocateInfo ainfo = {
- .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
- .commandPool = pool->pool,
- .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
- .commandBufferCount = 1,
- };
-
- VK(vkAllocateCommandBuffers(vk->dev, &ainfo, &cmd->buf));
-
- VkFenceCreateInfo finfo = {
- .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
- .flags = VK_FENCE_CREATE_SIGNALED_BIT,
+ const char *exts[] = {
+ VK_KHR_SURFACE_EXTENSION_NAME,
+ surface_ext,
};
- VK(vkCreateFence(vk->dev, &finfo, MPVK_ALLOCATOR, &cmd->fence));
-
- return cmd;
-
-error:
- vk_cmd_destroy(vk, cmd);
- return NULL;
-}
-
-void vk_cmd_callback(struct vk_cmd *cmd, vk_cb callback, void *p, void *arg)
-{
- MP_TARRAY_APPEND(cmd, cmd->callbacks, cmd->num_callbacks, (struct vk_callback) {
- .run = callback,
- .priv = p,
- .arg = arg,
+ vk->vkinst = pl_vk_inst_create(vk->ctx, &(struct pl_vk_inst_params) {
+ .debug = ctx->opts.debug,
+ .extensions = exts,
+ .num_extensions = MP_ARRAY_SIZE(exts),
});
-}
-
-void vk_cmd_dep(struct vk_cmd *cmd, VkSemaphore dep, VkPipelineStageFlags stage)
-{
- int idx = cmd->num_deps++;
- MP_TARRAY_GROW(cmd, cmd->deps, idx);
- MP_TARRAY_GROW(cmd, cmd->depstages, idx);
- cmd->deps[idx] = dep;
- cmd->depstages[idx] = stage;
-}
-
-void vk_cmd_sig(struct vk_cmd *cmd, VkSemaphore sig)
-{
- MP_TARRAY_APPEND(cmd, cmd->sigs, cmd->num_sigs, sig);
-}
-
-static void vk_cmdpool_destroy(struct mpvk_ctx *vk, struct vk_cmdpool *pool)
-{
- if (!pool)
- return;
- for (int i = 0; i < pool->num_cmds; i++)
- vk_cmd_destroy(vk, pool->cmds[i]);
-
- vkDestroyCommandPool(vk->dev, pool->pool, MPVK_ALLOCATOR);
- talloc_free(pool);
-}
-
-static struct vk_cmdpool *vk_cmdpool_create(struct mpvk_ctx *vk,
- VkDeviceQueueCreateInfo qinfo,
- VkQueueFamilyProperties props)
-{
- struct vk_cmdpool *pool = talloc_ptrtype(NULL, pool);
- *pool = (struct vk_cmdpool) {
- .props = props,
- .qf = qinfo.queueFamilyIndex,
- .queues = talloc_array(pool, VkQueue, qinfo.queueCount),
- .num_queues = qinfo.queueCount,
- };
-
- for (int n = 0; n < pool->num_queues; n++)
- vkGetDeviceQueue(vk->dev, pool->qf, n, &pool->queues[n]);
-
- VkCommandPoolCreateInfo cinfo = {
- .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
- .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT |
- VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
- .queueFamilyIndex = pool->qf,
- };
-
- VK(vkCreateCommandPool(vk->dev, &cinfo, MPVK_ALLOCATOR, &pool->pool));
-
- return pool;
-
-error:
- vk_cmdpool_destroy(vk, pool);
- return NULL;
-}
-
-void mpvk_poll_commands(struct mpvk_ctx *vk, uint64_t timeout)
-{
- while (vk->num_cmds_pending > 0) {
- struct vk_cmd *cmd = vk->cmds_pending[0];
- struct vk_cmdpool *pool = cmd->pool;
- VkResult res = vk_cmd_poll(vk, cmd, timeout);
- if (res == VK_TIMEOUT)
- break;
- vk_cmd_reset(vk, cmd);
- MP_TARRAY_REMOVE_AT(vk->cmds_pending, vk->num_cmds_pending, 0);
- MP_TARRAY_APPEND(pool, pool->cmds, pool->num_cmds, cmd);
- }
-}
-
-bool mpvk_flush_commands(struct mpvk_ctx *vk)
-{
- bool ret = true;
-
- for (int i = 0; i < vk->num_cmds_queued; i++) {
- struct vk_cmd *cmd = vk->cmds_queued[i];
- struct vk_cmdpool *pool = cmd->pool;
-
- VkSubmitInfo sinfo = {
- .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
- .commandBufferCount = 1,
- .pCommandBuffers = &cmd->buf,
- .waitSemaphoreCount = cmd->num_deps,
- .pWaitSemaphores = cmd->deps,
- .pWaitDstStageMask = cmd->depstages,
- .signalSemaphoreCount = cmd->num_sigs,
- .pSignalSemaphores = cmd->sigs,
- };
-
- VK(vkQueueSubmit(cmd->queue, 1, &sinfo, cmd->fence));
- MP_TARRAY_APPEND(NULL, vk->cmds_pending, vk->num_cmds_pending, cmd);
-
- if (mp_msg_test(vk->log, MSGL_TRACE)) {
- MP_TRACE(vk, "Submitted command on queue %p (QF %d):\n",
- (void *)cmd->queue, pool->qf);
- for (int n = 0; n < cmd->num_deps; n++)
- MP_TRACE(vk, " waits on semaphore %p\n", (void *)cmd->deps[n]);
- for (int n = 0; n < cmd->num_sigs; n++)
- MP_TRACE(vk, " signals semaphore %p\n", (void *)cmd->sigs[n]);
- }
- continue;
-
-error:
- vk_cmd_reset(vk, cmd);
- MP_TARRAY_APPEND(pool, pool->cmds, pool->num_cmds, cmd);
- ret = false;
- }
-
- vk->num_cmds_queued = 0;
-
- return ret;
-}
-
-void vk_dev_callback(struct mpvk_ctx *vk, vk_cb callback, void *p, void *arg)
-{
- if (vk->last_cmd) {
- vk_cmd_callback(vk->last_cmd, callback, p, arg);
- } else {
- // The device was already idle, so we can just immediately call it
- callback(p, arg);
- }
-}
-
-struct vk_cmd *vk_cmd_begin(struct mpvk_ctx *vk, struct vk_cmdpool *pool)
-{
- // garbage collect the cmdpool first, to increase the chances of getting
- // an already-available command buffer
- mpvk_poll_commands(vk, 0);
-
- struct vk_cmd *cmd = NULL;
- if (MP_TARRAY_POP(pool->cmds, pool->num_cmds, &cmd))
- goto done;
-
- // No free command buffers => allocate another one
- cmd = vk_cmd_create(vk, pool);
- if (!cmd)
+ if (!vk->vkinst)
goto error;
-done: ;
-
- VkCommandBufferBeginInfo binfo = {
- .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
- .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
- };
-
- VK(vkBeginCommandBuffer(cmd->buf, &binfo));
-
- cmd->queue = pool->queues[pool->idx_queues];
- return cmd;
-
-error:
- // Something has to be seriously messed up if we get to this point
- vk_cmd_destroy(vk, cmd);
- return NULL;
-}
-
-void vk_cmd_queue(struct mpvk_ctx *vk, struct vk_cmd *cmd)
-{
- struct vk_cmdpool *pool = cmd->pool;
-
- VK(vkEndCommandBuffer(cmd->buf));
-
- VK(vkResetFences(vk->dev, 1, &cmd->fence));
- MP_TARRAY_APPEND(NULL, vk->cmds_queued, vk->num_cmds_queued, cmd);
- vk->last_cmd = cmd;
- return;
-
-error:
- vk_cmd_reset(vk, cmd);
- MP_TARRAY_APPEND(pool, pool->cmds, pool->num_cmds, cmd);
-}
-
-void vk_signal_destroy(struct mpvk_ctx *vk, struct vk_signal **sig)
-{
- if (!*sig)
- return;
-
- vkDestroySemaphore(vk->dev, (*sig)->semaphore, MPVK_ALLOCATOR);
- vkDestroyEvent(vk->dev, (*sig)->event, MPVK_ALLOCATOR);
- talloc_free(*sig);
- *sig = NULL;
-}
-
-struct vk_signal *vk_cmd_signal(struct mpvk_ctx *vk, struct vk_cmd *cmd,
- VkPipelineStageFlags stage)
-{
- struct vk_signal *sig = NULL;
- if (MP_TARRAY_POP(vk->signals, vk->num_signals, &sig))
- goto done;
-
- // no available signal => initialize a new one
- sig = talloc_zero(NULL, struct vk_signal);
- static const VkSemaphoreCreateInfo sinfo = {
- .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
- };
-
- VK(vkCreateSemaphore(vk->dev, &sinfo, MPVK_ALLOCATOR, &sig->semaphore));
-
- static const VkEventCreateInfo einfo = {
- .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO,
- };
-
- VK(vkCreateEvent(vk->dev, &einfo, MPVK_ALLOCATOR, &sig->event));
-
-done:
- // Signal both the semaphore and the event if possible. (We will only
- // end up using one or the other)
- vk_cmd_sig(cmd, sig->semaphore);
-
- VkQueueFlags req = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT;
- if (cmd->pool->props.queueFlags & req) {
- vkCmdSetEvent(cmd->buf, sig->event, stage);
- sig->event_source = cmd->queue;
- }
-
- return sig;
+ mppl_ctx_set_log(vk->ctx, vk->pl_log, false); // disable probing
+ return true;
error:
- vk_signal_destroy(vk, &sig);
- return NULL;
-}
-
-static bool unsignal_cmd(struct vk_cmd *cmd, VkSemaphore sem)
-{
- for (int n = 0; n < cmd->num_sigs; n++) {
- if (cmd->sigs[n] == sem) {
- MP_TARRAY_REMOVE_AT(cmd->sigs, cmd->num_sigs, n);
- return true;
- }
- }
-
+ mpvk_uninit(vk);
return false;
}
-// Attempts to remove a queued signal operation. Returns true if sucessful,
-// i.e. the signal could be removed before it ever got fired.
-static bool unsignal(struct mpvk_ctx *vk, struct vk_cmd *cmd, VkSemaphore sem)
-{
- if (unsignal_cmd(cmd, sem))
- return true;
-
- // Attempt to remove it from any queued commands
- for (int i = 0; i < vk->num_cmds_queued; i++) {
- if (unsignal_cmd(vk->cmds_queued[i], sem))
- return true;
- }
-
- return false;
-}
-
-static void release_signal(struct mpvk_ctx *vk, struct vk_signal *sig)
-{
- // The semaphore never needs to be recreated, because it's either
- // unsignaled while still queued, or unsignaled as a result of a device
- // wait. But the event *may* need to be reset, so just always reset it.
- if (sig->event_source)
- vkResetEvent(vk->dev, sig->event);
- sig->event_source = NULL;
- MP_TARRAY_APPEND(NULL, vk->signals, vk->num_signals, sig);
-}
-
-void vk_cmd_wait(struct mpvk_ctx *vk, struct vk_cmd *cmd,
- struct vk_signal **sigptr, VkPipelineStageFlags stage,
- VkEvent *out_event)
+void mpvk_uninit(struct mpvk_ctx *vk)
{
- struct vk_signal *sig = *sigptr;
- if (!sig)
- return;
-
- if (out_event && sig->event && sig->event_source == cmd->queue &&
- unsignal(vk, cmd, sig->semaphore))
- {
- // If we can remove the semaphore signal operation from the history and
- // pretend it never happened, then we get to use the VkEvent. This also
- // requires that the VkEvent was signalled from the same VkQueue.
- *out_event = sig->event;
- } else if (sig->semaphore) {
- // Otherwise, we use the semaphore. (This also unsignals it as a result
- // of the command execution)
- vk_cmd_dep(cmd, sig->semaphore, stage);
+ if (vk->surface) {
+ assert(vk->vkinst);
+ vkDestroySurfaceKHR(vk->vkinst->instance, vk->surface, NULL);
+ vk->surface = NULL;
}
- // In either case, once the command completes, we can release the signal
- // resource back to the pool.
- vk_cmd_callback(cmd, (vk_cb) release_signal, vk, sig);
- *sigptr = NULL;
+ pl_vk_inst_destroy(&vk->vkinst);
+ pl_context_destroy(&vk->ctx);
+ TA_FREEP(&vk->pl_log);
}
-
-const VkImageSubresourceRange vk_range = {
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .levelCount = 1,
- .layerCount = 1,
-};
-
-const VkImageSubresourceLayers vk_layers = {
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .layerCount = 1,
-};
diff --git a/video/out/vulkan/utils.h b/video/out/vulkan/utils.h
index 9af59e4b50..a98e1477b6 100644
--- a/video/out/vulkan/utils.h
+++ b/video/out/vulkan/utils.h
@@ -1,192 +1,6 @@
#pragma once
-
-#include "video/out/vo.h"
-#include "video/out/gpu/context.h"
-#include "video/mp_image.h"
-
#include "common.h"
-#include "formats.h"
-
-#define VK_LOAD_PFN(name) PFN_##name pfn_##name = (PFN_##name) \
- vkGetInstanceProcAddr(vk->inst, #name);
-
-#if HAVE_WIN32_DESKTOP
- #define MP_VK_EXTERNAL_MEMORY_EXPORT_EXTENSION_NAME VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME
-#else
- #define MP_VK_EXTERNAL_MEMORY_EXPORT_EXTENSION_NAME VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME
-#endif
-
-// Return a human-readable name for various struct mpvk_ctx enums
-const char* vk_err(VkResult res);
-
-// Convenience macros to simplify a lot of common boilerplate
-#define VK_ASSERT(res, str) \
- do { \
- if (res != VK_SUCCESS) { \
- MP_ERR(vk, str ": %s\n", vk_err(res)); \
- goto error; \
- } \
- } while (0)
-
-#define VK(cmd) \
- do { \
- MP_TRACE(vk, #cmd "\n"); \
- VkResult res ## __LINE__ = (cmd); \
- VK_ASSERT(res ## __LINE__, #cmd); \
- } while (0)
+#include "video/out/gpu/context.h"
-// Uninits everything in the correct order
+bool mpvk_init(struct mpvk_ctx *vk, struct ra_ctx *ctx, const char *surface_ext);
void mpvk_uninit(struct mpvk_ctx *vk);
-
-// Initialization functions: As a rule of thumb, these need to be called in
-// this order, followed by vk_malloc_init, followed by RA initialization, and
-// finally followed by vk_swchain initialization.
-
-// Create a vulkan instance. Returns VK_NULL_HANDLE on failure
-bool mpvk_instance_init(struct mpvk_ctx *vk, struct mp_log *log,
- const char *surf_ext_name, bool debug);
-
-// Generate a VkSurfaceKHR usable for video output. Returns VK_NULL_HANDLE on
-// failure. Must be called after mpvk_instance_init.
-bool mpvk_surface_init(struct vo *vo, struct mpvk_ctx *vk);
-
-// Find a suitable physical device for use with rendering and which supports
-// the surface.
-// name: only match a device with this name
-// sw: also allow software/virtual devices
-bool mpvk_find_phys_device(struct mpvk_ctx *vk, const char *name, bool sw);
-
-// Get the UUID for the selected physical device
-bool mpvk_get_phys_device_uuid(struct mpvk_ctx *vk, uint8_t uuid_out[VK_UUID_SIZE]);
-
-// Pick a suitable surface format that's supported by this physical device.
-bool mpvk_pick_surface_format(struct mpvk_ctx *vk);
-
-struct mpvk_device_opts {
- int queue_count; // number of queues to use
- int async_transfer; // enable async transfer
- int async_compute; // enable async compute
-};
-
-// Create a logical device and initialize the vk_cmdpools
-bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts);
-
-// Wait for all currently pending commands to have completed. This is the only
-// function that actually processes the callbacks. Will wait at most `timeout`
-// nanoseconds for the completion of each command. Using it with a value of
-// UINT64_MAX effectively means waiting until the pool/device is idle. The
-// timeout may also be passed as 0, in which case this function will not block,
-// but only poll for completed commands.
-void mpvk_poll_commands(struct mpvk_ctx *vk, uint64_t timeout);
-
-// Flush all currently queued commands. Call this once per frame, after
-// submitting all of the command buffers for that frame. Calling this more
-// often than that is possible but bad for performance.
-// Returns whether successful. Failed commands will be implicitly dropped.
-bool mpvk_flush_commands(struct mpvk_ctx *vk);
-
-// Since lots of vulkan operations need to be done lazily once the affected
-// resources are no longer in use, provide an abstraction for tracking these.
-// In practice, these are only checked and run when submitting new commands, so
-// the actual execution may be delayed by a frame.
-typedef void (*vk_cb)(void *priv, void *arg);
-
-struct vk_callback {
- vk_cb run;
- void *priv;
- void *arg; // as a convenience, you also get to pass an arg for "free"
-};
-
-// Associate a callback with the completion of all currently pending commands.
-// This will essentially run once the device is completely idle.
-void vk_dev_callback(struct mpvk_ctx *vk, vk_cb callback, void *p, void *arg);
-
-// Helper wrapper around command buffers that also track dependencies,
-// callbacks and synchronization primitives
-struct vk_cmd {
- struct vk_cmdpool *pool; // pool it was allocated from
- VkQueue queue; // the submission queue (for recording/pending)
- VkCommandBuffer buf; // the command buffer itself
- VkFence fence; // the fence guards cmd buffer reuse
- // The semaphores represent dependencies that need to complete before
- // this command can be executed. These are *not* owned by the vk_cmd
- VkSemaphore *deps;
- VkPipelineStageFlags *depstages;
- int num_deps;
- // The signals represent semaphores that fire once the command finishes
- // executing. These are also not owned by the vk_cmd
- VkSemaphore *sigs;
- int num_sigs;
- // Since VkFences are useless, we have to manually track "callbacks"
- // to fire once the VkFence completes. These are used for multiple purposes,
- // ranging from garbage collection (resource deallocation) to fencing.
- struct vk_callback *callbacks;
- int num_callbacks;
-};
-
-// Associate a callback with the completion of the current command. This
-// bool will be set to `true` once the command completes, or shortly thereafter.
-void vk_cmd_callback(struct vk_cmd *cmd, vk_cb callback, void *p, void *arg);
-
-// Associate a raw dependency for the current command. This semaphore must
-// signal by the corresponding stage before the command may execute.
-void vk_cmd_dep(struct vk_cmd *cmd, VkSemaphore dep, VkPipelineStageFlags stage);
-
-// Associate a raw signal with the current command. This semaphore will signal
-// after the command completes.
-void vk_cmd_sig(struct vk_cmd *cmd, VkSemaphore sig);
-
-// Signal abstraction: represents an abstract synchronization mechanism.
-// Internally, this may either resolve as a semaphore or an event depending
-// on whether the appropriate conditions are met.
-struct vk_signal {
- VkSemaphore semaphore;
- VkEvent event;
- VkQueue event_source;
-};
-
-// Generates a signal after the execution of all previous commands matching the
-// given the pipeline stage. The signal is owned by the caller, and must be
-// consumed eith vk_cmd_wait or released with vk_signal_cancel in order to
-// free the resources.
-struct vk_signal *vk_cmd_signal(struct mpvk_ctx *vk, struct vk_cmd *cmd,
- VkPipelineStageFlags stage);
-
-// Consumes a previously generated signal. This signal must fire by the
-// indicated stage before the command can run. If *event is not NULL, then it
-// MAY be set to a VkEvent which the caller MUST manually wait on in the most
-// appropriate way. This function takes over ownership of the signal (and the
-// signal will be released/reused automatically)
-void vk_cmd_wait(struct mpvk_ctx *vk, struct vk_cmd *cmd,
- struct vk_signal **sigptr, VkPipelineStageFlags stage,
- VkEvent *out_event);
-
-// Destroys a currently pending signal, for example if the resource is no
-// longer relevant.
-void vk_signal_destroy(struct mpvk_ctx *vk, struct vk_signal **sig);
-
-// Command pool / queue family hybrid abstraction
-struct vk_cmdpool {
- VkQueueFamilyProperties props;
- int qf; // queue family index
- VkCommandPool pool;
- VkQueue *queues;
- int num_queues;
- int idx_queues;
- // Command buffers associated with this queue. These are available for
- // re-recording
- struct vk_cmd **cmds;
- int num_cmds;
-};
-
-// Fetch a command buffer from a command pool and begin recording to it.
-// Returns NULL on failure.
-struct vk_cmd *vk_cmd_begin(struct mpvk_ctx *vk, struct vk_cmdpool *pool);
-
-// Finish recording a command buffer and queue it for execution. This function
-// takes over ownership of *cmd, i.e. the caller should not touch it again.
-void vk_cmd_queue(struct mpvk_ctx *vk, struct vk_cmd *cmd);
-
-// Predefined structs for a simple non-layered, non-mipped image
-extern const VkImageSubresourceRange vk_range;
-extern const VkImageSubresourceLayers vk_layers;
diff --git a/wscript b/wscript
index d3d49090c8..09eb5da529 100644
--- a/wscript
+++ b/wscript
@@ -805,11 +805,14 @@ video_output_features = [
"Aborting. If you really mean to compile without OpenGL " +
"video outputs use --disable-gl.",
}, {
+ 'name': '--libplacebo',
+ 'desc': 'libplacebo support',
+ 'func': check_pkg_config('libplacebo >= 1.18.0'),
+ }, {
'name': '--vulkan',
- 'desc': 'Vulkan context support',
- 'deps': 'shaderc',
- # Lowest version tested, Ubuntu 16.04's
- 'func': check_pkg_config('vulkan >= 1.0.61'),
+ 'desc': 'Vulkan context support',
+ 'deps': 'libplacebo',
+ 'func': check_pkg_config('vulkan'),
}, {
'name': 'egl-helpers',
'desc': 'EGL helper functions',
diff --git a/wscript_build.py b/wscript_build.py
index 38f704ef34..99f14990ac 100644
--- a/wscript_build.py
+++ b/wscript_build.py
@@ -445,6 +445,8 @@ def build(ctx):
( "video/out/gpu/utils.c" ),
( "video/out/gpu/video.c" ),
( "video/out/gpu/video_shaders.c" ),
+ ( "video/out/placebo/ra_pl.c", "libplacebo" ),
+ ( "video/out/placebo/utils.c", "libplacebo" ),
( "video/out/opengl/angle_dynamic.c", "egl-angle" ),
( "video/out/opengl/common.c", "gl" ),
( "video/out/opengl/context.c", "gl" ),
@@ -495,11 +497,8 @@ def build(ctx):
( "video/out/vo_xv.c", "xv" ),
( "video/out/vulkan/context.c", "vulkan" ),
( "video/out/vulkan/context_wayland.c", "vulkan && wayland" ),
- ( "video/out/vulkan/context_win.c", "vulkan && win32-desktop" ),
+ #( "video/out/vulkan/context_win.c", "vulkan && win32-desktop" ),
( "video/out/vulkan/context_xlib.c", "vulkan && x11" ),
- ( "video/out/vulkan/formats.c", "vulkan" ),
- ( "video/out/vulkan/malloc.c", "vulkan" ),
- ( "video/out/vulkan/ra_vk.c", "vulkan" ),
( "video/out/vulkan/utils.c", "vulkan" ),
( "video/out/w32_common.c", "win32-desktop" ),
( "video/out/wayland/idle-inhibit-v1.c", "wayland" ),