From 7006d6752d7da21870dfdb2b0d7640a3734f748c Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Sat, 10 Nov 2018 12:53:33 +0100 Subject: vo_gpu: vulkan: use libplacebo instead This commit rips out the entire mpv vulkan implementation in favor of exposing lightweight wrappers on top of libplacebo instead, which provides much of the same except in a more up-to-date and polished form. This (finally) unifies the code base between mpv and libplacebo, which is something I've been hoping to do for a long time. Note: The ra_pl wrappers are abstract enough from the actual libplacebo device type that we can in theory re-use them for other devices like d3d11 or even opengl in the future, so I moved them to a separate directory for the time being. However, the rest of the code is still vulkan-specific, so I've kept the "vulkan" naming and file paths, rather than introducing a new `--gpu-api` type. (Which would have been ended up with significantly more code duplicaiton) Plus, the code and functionality is similar enough that for most users this should just be a straight-up drop-in replacement. Note: This commit excludes some changes; specifically, the updates to context_win and hwdec_cuda are deferred to separate commits for authorship reasons. --- common/msg.c | 8 +- common/msg.h | 7 +- video/out/gpu/context.c | 4 +- video/out/opengl/hwdec_cuda.c | 11 +- video/out/placebo/ra_pl.c | 628 ++++++++++++ video/out/placebo/ra_pl.h | 10 + video/out/placebo/utils.c | 62 ++ video/out/placebo/utils.h | 18 + video/out/vulkan/common.h | 58 +- video/out/vulkan/context.c | 482 ++------- video/out/vulkan/context.h | 4 +- video/out/vulkan/context_wayland.c | 18 +- video/out/vulkan/context_win.c | 105 -- video/out/vulkan/context_xlib.c | 11 +- video/out/vulkan/formats.c | 55 - video/out/vulkan/formats.h | 16 - video/out/vulkan/malloc.c | 471 --------- video/out/vulkan/malloc.h | 37 - video/out/vulkan/ra_vk.c | 1982 ------------------------------------ video/out/vulkan/ra_vk.h | 51 - video/out/vulkan/utils.c | 990 +----------------- video/out/vulkan/utils.h | 190 +--- wscript | 11 +- wscript_build.py | 7 +- 24 files changed, 868 insertions(+), 4368 deletions(-) create mode 100644 video/out/placebo/ra_pl.c create mode 100644 video/out/placebo/ra_pl.h create mode 100644 video/out/placebo/utils.c create mode 100644 video/out/placebo/utils.h delete mode 100644 video/out/vulkan/context_win.c delete mode 100644 video/out/vulkan/formats.c delete mode 100644 video/out/vulkan/formats.h delete mode 100644 video/out/vulkan/malloc.c delete mode 100644 video/out/vulkan/malloc.h delete mode 100644 video/out/vulkan/ra_vk.c delete mode 100644 video/out/vulkan/ra_vk.h diff --git a/common/msg.c b/common/msg.c index cb41ea4168..e35b953f7d 100644 --- a/common/msg.c +++ b/common/msg.c @@ -127,19 +127,19 @@ static void update_loglevel(struct mp_log *log) pthread_mutex_unlock(&mp_msg_lock); } -// Return whether the message at this verbosity level would be actually printed. +// Get the current effective msg level. // Thread-safety: see mp_msg(). -bool mp_msg_test(struct mp_log *log, int lev) +int mp_msg_level(struct mp_log *log) { struct mp_log_root *root = log->root; if (!root) - return false; + return -1; if (atomic_load_explicit(&log->reload_counter, memory_order_relaxed) != atomic_load_explicit(&root->reload_counter, memory_order_relaxed)) { update_loglevel(log); } - return lev <= log->level; + return log->level; } // Reposition cursor and clear lines for outputting the status line. In certain diff --git a/common/msg.h b/common/msg.h index 21228870f4..635a85191f 100644 --- a/common/msg.h +++ b/common/msg.h @@ -52,7 +52,12 @@ void mp_msg(struct mp_log *log, int lev, const char *format, ...) PRINTF_ATTRIBUTE(3, 4); void mp_msg_va(struct mp_log *log, int lev, const char *format, va_list va); -bool mp_msg_test(struct mp_log *log, int lev); +int mp_msg_level(struct mp_log *log); + +static inline bool mp_msg_test(struct mp_log *log, int lev) +{ + return lev <= mp_msg_level(log); +} // Convenience macros. #define mp_fatal(log, ...) mp_msg(log, MSGL_FATAL, __VA_ARGS__) diff --git a/video/out/gpu/context.c b/video/out/gpu/context.c index 85f1aa7667..0a46936708 100644 --- a/video/out/gpu/context.c +++ b/video/out/gpu/context.c @@ -50,7 +50,7 @@ extern const struct ra_ctx_fns ra_ctx_vdpauglx; /* Vulkan */ extern const struct ra_ctx_fns ra_ctx_vulkan_wayland; -extern const struct ra_ctx_fns ra_ctx_vulkan_win; +//extern const struct ra_ctx_fns ra_ctx_vulkan_win; extern const struct ra_ctx_fns ra_ctx_vulkan_xlib; /* Direct3D 11 */ @@ -105,9 +105,11 @@ static const struct ra_ctx_fns *contexts[] = { // Vulkan contexts: #if HAVE_VULKAN +/* #if HAVE_WIN32_DESKTOP &ra_ctx_vulkan_win, #endif +*/ #if HAVE_WAYLAND &ra_ctx_vulkan_wayland, #endif diff --git a/video/out/opengl/hwdec_cuda.c b/video/out/opengl/hwdec_cuda.c index fee1f83f98..2e87593c78 100644 --- a/video/out/opengl/hwdec_cuda.c +++ b/video/out/opengl/hwdec_cuda.c @@ -39,9 +39,11 @@ #include "ra_gl.h" #endif #if HAVE_VULKAN +/* #include "video/out/vulkan/formats.h" #include "video/out/vulkan/ra_vk.h" #include "video/out/vulkan/utils.h" +*/ #endif #if HAVE_WIN32_DESKTOP @@ -125,6 +127,8 @@ static int cuda_init(struct ra_hwdec *hw) #endif #if HAVE_VULKAN + return -1; // TODO: reimplement + /* p->is_vk = ra_vk_get(hw->ra) != NULL; if (p->is_vk) { if (!ra_vk_get(hw->ra)->has_ext_external_memory_export) { @@ -133,6 +137,7 @@ static int cuda_init(struct ra_hwdec *hw) return -1; } } + */ #endif if (!p->is_gl && !p->is_vk) { @@ -197,6 +202,7 @@ static int cuda_init(struct ra_hwdec *hw) } } else if (p->is_vk) { #if HAVE_VULKAN + /* uint8_t vk_uuid[VK_UUID_SIZE]; struct mpvk_ctx *vk = ra_vk_get(hw->ra); @@ -236,6 +242,7 @@ static int cuda_init(struct ra_hwdec *hw) return -1; p->decode_ctx = p->display_ctx; + */ #endif } @@ -293,6 +300,7 @@ static void cuda_uninit(struct ra_hwdec *hw) #define CHECK_CU(x) check_cu((mapper)->owner, (x), #x) #if HAVE_VULKAN +/* static struct ra_buf *cuda_buf_pool_get(struct ra_hwdec_mapper *mapper, int n) { struct priv_owner *p_owner = mapper->owner->priv; @@ -390,6 +398,7 @@ static void cuda_buf_pool_uninit(struct ra_hwdec_mapper *mapper, int n) } ra_buf_pool_uninit(mapper->ra, pool); } +*/ #endif // HAVE_VULKAN static int mapper_init(struct ra_hwdec_mapper *mapper) @@ -497,7 +506,7 @@ static void mapper_uninit(struct ra_hwdec_mapper *mapper) ra_tex_free(mapper->ra, &mapper->tex[n]); #if HAVE_VULKAN - cuda_buf_pool_uninit(mapper, n); + //cuda_buf_pool_uninit(mapper, n); #endif } CHECK_CU(cu->cuCtxPopCurrent(&dummy)); diff --git a/video/out/placebo/ra_pl.c b/video/out/placebo/ra_pl.c new file mode 100644 index 0000000000..334f2f135f --- /dev/null +++ b/video/out/placebo/ra_pl.c @@ -0,0 +1,628 @@ +#include "common/common.h" +#include "common/msg.h" + +#include "ra_pl.h" +#include "utils.h" + +struct ra_pl { + const struct pl_gpu *gpu; +}; + +static inline const struct pl_gpu *get_gpu(struct ra *ra) +{ + struct ra_pl *p = ra->priv; + return p->gpu; +} + +static struct ra_fns ra_fns_pl; + +struct ra *ra_create_pl(const struct pl_gpu *gpu, struct mp_log *log) +{ + assert(gpu); + + struct ra *ra = talloc_zero(NULL, struct ra); + ra->log = log; + ra->fns = &ra_fns_pl; + + struct ra_pl *p = ra->priv = talloc_zero(ra, struct ra_pl); + p->gpu = gpu; + + ra->glsl_version = gpu->glsl.version; + ra->glsl_vulkan = gpu->glsl.vulkan; + ra->glsl_es = gpu->glsl.gles; + + ra->caps = RA_CAP_DIRECT_UPLOAD | RA_CAP_NESTED_ARRAY | RA_CAP_FRAGCOORD; + + if (gpu->caps & PL_GPU_CAP_COMPUTE) + ra->caps |= RA_CAP_COMPUTE | RA_CAP_NUM_GROUPS; + if (gpu->caps & PL_GPU_CAP_PARALLEL_COMPUTE) + ra->caps |= RA_CAP_PARALLEL_COMPUTE; + if (gpu->caps & PL_GPU_CAP_INPUT_VARIABLES) + ra->caps |= RA_CAP_GLOBAL_UNIFORM; + + if (gpu->limits.max_tex_1d_dim) + ra->caps |= RA_CAP_TEX_1D; + if (gpu->limits.max_tex_3d_dim) + ra->caps |= RA_CAP_TEX_3D; + if (gpu->limits.max_ubo_size) + ra->caps |= RA_CAP_BUF_RO; + if (gpu->limits.max_ssbo_size) + ra->caps |= RA_CAP_BUF_RW; + if (gpu->limits.min_gather_offset && gpu->limits.max_gather_offset) + ra->caps |= RA_CAP_GATHER; + + // Semi-hack: assume all textures are blittable if r8 is + const struct pl_fmt *r8 = pl_find_named_fmt(gpu, "r8"); + if (r8->caps & PL_FMT_CAP_BLITTABLE) + ra->caps |= RA_CAP_BLIT; + + ra->max_texture_wh = gpu->limits.max_tex_2d_dim; + ra->max_shmem = gpu->limits.max_shmem_size; + ra->max_pushc_size = gpu->limits.max_pushc_size; + + // Set up format wrappers + for (int i = 0; i < gpu->num_formats; i++) { + const struct pl_fmt *plfmt = gpu->formats[i]; + static const enum ra_ctype fmt_type_map[PL_FMT_TYPE_COUNT] = { + [PL_FMT_UNORM] = RA_CTYPE_UNORM, + [PL_FMT_UINT] = RA_CTYPE_UINT, + [PL_FMT_FLOAT] = RA_CTYPE_FLOAT, + }; + + enum ra_ctype type = fmt_type_map[plfmt->type]; + if (!type || !(plfmt->caps & PL_FMT_CAP_SAMPLEABLE)) + continue; + + struct ra_format *rafmt = talloc_zero(ra, struct ra_format); + *rafmt = (struct ra_format) { + .name = plfmt->name, + .priv = (void *) plfmt, + .ctype = type, + .ordered = pl_fmt_is_ordered(plfmt), + .num_components = plfmt->num_components, + .pixel_size = plfmt->texel_size, + .linear_filter = plfmt->caps & PL_FMT_CAP_LINEAR, + .renderable = plfmt->caps & PL_FMT_CAP_RENDERABLE, + .glsl_format = plfmt->glsl_format, + }; + + for (int c = 0; c < plfmt->num_components; c++) { + rafmt->component_size[c] = plfmt->host_bits[c]; + rafmt->component_depth[c] = plfmt->component_depth[c]; + } + + MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, rafmt); + } + + return ra; +} + +static void destroy_ra_pl(struct ra *ra) +{ + talloc_free(ra); +} + +static struct ra_format *map_fmt(struct ra *ra, const struct pl_fmt *plfmt) +{ + for (int i = 0; i < ra->num_formats; i++) { + if (ra->formats[i]->priv == plfmt) + return ra->formats[i]; + } + + MP_ERR(ra, "Failed mapping pl_fmt '%s' to ra_fmt?\n", plfmt->name); + return NULL; +} + +bool mppl_wrap_tex(struct ra *ra, const struct pl_tex *pltex, + struct ra_tex *out_tex) +{ + if (!pltex) + return false; + + *out_tex = (struct ra_tex) { + .params = { + .dimensions = pl_tex_params_dimension(pltex->params), + .w = pltex->params.w, + .h = pltex->params.h, + .d = pltex->params.d, + .format = map_fmt(ra, pltex->params.format), + .render_src = pltex->params.sampleable, + .render_dst = pltex->params.renderable, + .storage_dst = pltex->params.storable, + .blit_src = pltex->params.blit_src, + .blit_dst = pltex->params.blit_dst, + .host_mutable = pltex->params.host_writable, + .downloadable = pltex->params.host_readable, + .src_linear = pltex->params.sample_mode == PL_TEX_SAMPLE_LINEAR, + .src_repeat = pltex->params.address_mode == PL_TEX_ADDRESS_REPEAT, + }, + .priv = (void *) pltex, + }; + + return !!out_tex->params.format; +} + +static struct ra_tex *tex_create_pl(struct ra *ra, + const struct ra_tex_params *params) +{ + const struct pl_gpu *gpu = get_gpu(ra); + + // Check size limits + bool ok = false; + switch (params->dimensions) { + case 1: + ok = params->w <= gpu->limits.max_tex_1d_dim; + break; + + case 2: + ok = params->w <= gpu->limits.max_tex_2d_dim && + params->h <= gpu->limits.max_tex_2d_dim; + break; + + case 3: + ok = params->w <= gpu->limits.max_tex_2d_dim && + params->h <= gpu->limits.max_tex_2d_dim && + params->d <= gpu->limits.max_tex_2d_dim; + break; + }; + + if (!ok) { + MP_ERR(ra, "Texture size %dx%dx%d exceeds dimension limits!\n", + params->w, params->h, params->d); + return NULL; + } + + const struct pl_tex *pltex = pl_tex_create(gpu, &(struct pl_tex_params) { + .w = params->w, + .h = params->dimensions >= 2 ? params->h : 0, + .d = params->dimensions >= 3 ? params->d : 0, + .format = params->format->priv, + .sampleable = params->render_src, + .renderable = params->render_dst, + .storable = params->storage_dst, + .blit_src = params->blit_src, + .blit_dst = params->blit_dst || params->render_dst, + .host_writable = params->host_mutable, + .host_readable = params->downloadable, + .sample_mode = params->src_linear ? PL_TEX_SAMPLE_LINEAR + : PL_TEX_SAMPLE_NEAREST, + .address_mode = params->src_repeat ? PL_TEX_ADDRESS_REPEAT + : PL_TEX_ADDRESS_CLAMP, + .initial_data = params->initial_data, + }); + + struct ra_tex *ratex = talloc_ptrtype(NULL, ratex); + if (!mppl_wrap_tex(ra, pltex, ratex)) { + pl_tex_destroy(gpu, &pltex); + talloc_free(ratex); + return NULL; + } + + return ratex; +} + +static void tex_destroy_pl(struct ra *ra, struct ra_tex *tex) +{ + if (!tex) + return; + + pl_tex_destroy(get_gpu(ra), (const struct pl_tex **) &tex->priv); + talloc_free(tex); +} + +static int texel_stride_w(size_t stride, const struct pl_tex *tex) +{ + size_t texel_size = tex->params.format->texel_size; + int texels = stride / texel_size; + assert(texels * texel_size == stride); + return texels; +} + +static bool tex_upload_pl(struct ra *ra, const struct ra_tex_upload_params *params) +{ + const struct pl_tex *tex = params->tex->priv; + struct pl_tex_transfer_params pl_params = { + .tex = tex, + .buf = params->buf ? params->buf->priv : NULL, + .buf_offset = params->buf_offset, + .ptr = (void *) params->src, + }; + + if (params->tex->params.dimensions == 2) { + pl_params.stride_w = texel_stride_w(params->stride, tex); + if (params->rc) { + pl_params.rc = (struct pl_rect3d) { + .x0 = params->rc->x0, .x1 = params->rc->x1, + .y0 = params->rc->y0, .y1 = params->rc->y1, + }; + } + } + + return pl_tex_upload(get_gpu(ra), &pl_params); +} + +static bool tex_download_pl(struct ra *ra, struct ra_tex_download_params *params) +{ + const struct pl_tex *tex = params->tex->priv; + struct pl_tex_transfer_params pl_params = { + .tex = tex, + .ptr = params->dst, + .stride_w = texel_stride_w(params->stride, tex), + }; + + return pl_tex_download(get_gpu(ra), &pl_params); +} + +static struct ra_buf *buf_create_pl(struct ra *ra, + const struct ra_buf_params *params) +{ + static const enum pl_buf_type buf_type[] = { + [RA_BUF_TYPE_TEX_UPLOAD] = PL_BUF_TEX_TRANSFER, + [RA_BUF_TYPE_SHADER_STORAGE] = PL_BUF_STORAGE, + [RA_BUF_TYPE_UNIFORM] = PL_BUF_UNIFORM, + [RA_BUF_TYPE_SHARED_MEMORY] = 0, + }; + + const struct pl_gpu *gpu = get_gpu(ra); + size_t max_size[] = { + [PL_BUF_TEX_TRANSFER] = gpu->limits.max_xfer_size, + [PL_BUF_UNIFORM] = gpu->limits.max_ubo_size, + [PL_BUF_STORAGE] = gpu->limits.max_ssbo_size, + }; + + if (params->size > max_size[buf_type[params->type]]) { + MP_ERR(ra, "Buffer size %zu exceeds size limits!\n", params->size); + return NULL; + } + + const struct pl_buf *plbuf = pl_buf_create(gpu, &(struct pl_buf_params) { + .type = buf_type[params->type], + .size = params->size, + .host_mapped = params->host_mapped, + .host_writable = params->host_mutable, + .initial_data = params->initial_data, + }); + + if (!plbuf) + return NULL; + + struct ra_buf *rabuf = talloc_ptrtype(NULL, rabuf); + *rabuf = (struct ra_buf) { + .params = *params, + .data = plbuf->data, + .priv = (void *) plbuf, + }; + + rabuf->params.initial_data = NULL; + return rabuf; +} + +static void buf_destroy_pl(struct ra *ra, struct ra_buf *buf) +{ + if (!buf) + return; + + pl_buf_destroy(get_gpu(ra), (const struct pl_buf **) &buf->priv); + talloc_free(buf); +} + +static void buf_update_pl(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset, + const void *data, size_t size) +{ + pl_buf_write(get_gpu(ra), buf->priv, offset, data, size); +} + +static bool buf_poll_pl(struct ra *ra, struct ra_buf *buf) +{ + return !pl_buf_poll(get_gpu(ra), buf->priv, 0); +} + +static void clear_pl(struct ra *ra, struct ra_tex *dst, float color[4], + struct mp_rect *scissor) +{ + // TODO: implement scissor clearing by bltting a 1x1 tex instead + pl_tex_clear(get_gpu(ra), dst->priv, color); +} + +static void blit_pl(struct ra *ra, struct ra_tex *dst, struct ra_tex *src, + struct mp_rect *dst_rc, struct mp_rect *src_rc) +{ + struct pl_rect3d plsrc = {0}, pldst = {0}; + if (src_rc) { + plsrc.x0 = MPMIN(MPMAX(src_rc->x0, 0), src->params.w); + plsrc.y0 = MPMIN(MPMAX(src_rc->y0, 0), src->params.h); + plsrc.x1 = MPMIN(MPMAX(src_rc->x1, 0), src->params.w); + plsrc.y1 = MPMIN(MPMAX(src_rc->y1, 0), src->params.h); + } + + if (dst_rc) { + pldst.x0 = MPMIN(MPMAX(dst_rc->x0, 0), dst->params.w); + pldst.y0 = MPMIN(MPMAX(dst_rc->y0, 0), dst->params.h); + pldst.x1 = MPMIN(MPMAX(dst_rc->x1, 0), dst->params.w); + pldst.y1 = MPMIN(MPMAX(dst_rc->y1, 0), dst->params.h); + } + + pl_tex_blit(get_gpu(ra), dst->priv, src->priv, pldst, plsrc); +} + +static const enum pl_var_type var_type[RA_VARTYPE_COUNT] = { + [RA_VARTYPE_INT] = PL_VAR_SINT, + [RA_VARTYPE_FLOAT] = PL_VAR_FLOAT, +}; + +static const enum pl_desc_type desc_type[RA_VARTYPE_COUNT] = { + [RA_VARTYPE_TEX] = PL_DESC_SAMPLED_TEX, + [RA_VARTYPE_IMG_W] = PL_DESC_STORAGE_IMG, + [RA_VARTYPE_BUF_RO] = PL_DESC_BUF_UNIFORM, + [RA_VARTYPE_BUF_RW] = PL_DESC_BUF_STORAGE, +}; + +static const enum pl_fmt_type fmt_type[RA_VARTYPE_COUNT] = { + [RA_VARTYPE_INT] = PL_FMT_SINT, + [RA_VARTYPE_FLOAT] = PL_FMT_FLOAT, + [RA_VARTYPE_BYTE_UNORM] = PL_FMT_UNORM, +}; + +static const size_t var_size[RA_VARTYPE_COUNT] = { + [RA_VARTYPE_INT] = sizeof(int), + [RA_VARTYPE_FLOAT] = sizeof(float), + [RA_VARTYPE_BYTE_UNORM] = sizeof(uint8_t), +}; + +static struct ra_layout uniform_layout_pl(struct ra_renderpass_input *inp) +{ + // To get the alignment requirements, we try laying this out with + // an offset of 1 and then see where it ends up. This will always be + // the minimum alignment requirement. + struct pl_var_layout layout = pl_buf_uniform_layout(1, &(struct pl_var) { + .name = inp->name, + .type = var_type[inp->type], + .dim_v = inp->dim_v, + .dim_m = inp->dim_m, + .dim_a = 1, + }); + + return (struct ra_layout) { + .align = layout.offset, + .stride = layout.stride, + .size = layout.size, + }; +} + +static struct ra_layout push_constant_layout_pl(struct ra_renderpass_input *inp) +{ + struct pl_var_layout layout = pl_push_constant_layout(1, &(struct pl_var) { + .name = inp->name, + .type = var_type[inp->type], + .dim_v = inp->dim_v, + .dim_m = inp->dim_m, + .dim_a = 1, + }); + + return (struct ra_layout) { + .align = layout.offset, + .stride = layout.stride, + .size = layout.size, + }; +} + +static int desc_namespace_pl(struct ra *ra, enum ra_vartype type) +{ + return pl_desc_namespace(get_gpu(ra), desc_type[type]); +} + +struct pass_priv { + const struct pl_pass *pl_pass; + uint16_t *inp_index; // index translation map + // Space to hold the descriptor bindings and variable updates + struct pl_desc_binding *binds; + struct pl_var_update *varups; + int num_varups; +}; + +static struct ra_renderpass *renderpass_create_pl(struct ra *ra, + const struct ra_renderpass_params *params) +{ + void *tmp = talloc_new(NULL); + const struct pl_gpu *gpu = get_gpu(ra); + struct ra_renderpass *pass = NULL; + + static const enum pl_pass_type pass_type[] = { + [RA_RENDERPASS_TYPE_RASTER] = PL_PASS_RASTER, + [RA_RENDERPASS_TYPE_COMPUTE] = PL_PASS_COMPUTE, + }; + + struct pl_var *vars = NULL; + struct pl_desc *descs = NULL; + int num_vars = 0, num_descs = 0; + + struct pass_priv *priv = talloc_ptrtype(tmp, priv); + priv->inp_index = talloc_zero_array(priv, uint16_t, params->num_inputs); + + for (int i = 0; i < params->num_inputs; i++) { + const struct ra_renderpass_input *inp = ¶ms->inputs[i]; + if (var_type[inp->type]) { + priv->inp_index[i] = num_vars; + MP_TARRAY_APPEND(tmp, vars, num_vars, (struct pl_var) { + .name = inp->name, + .type = var_type[inp->type], + .dim_v = inp->dim_v, + .dim_m = inp->dim_m, + .dim_a = 1, + }); + } else if (desc_type[inp->type]) { + priv->inp_index[i] = num_descs; + MP_TARRAY_APPEND(tmp, descs, num_descs, (struct pl_desc) { + .name = inp->name, + .type = desc_type[inp->type], + .binding = inp->binding, + .access = inp->type == RA_VARTYPE_IMG_W ? PL_DESC_ACCESS_WRITEONLY + : inp->type == RA_VARTYPE_BUF_RW ? PL_DESC_ACCESS_READWRITE + : PL_DESC_ACCESS_READONLY, + }); + } + } + + // Allocate space to store the bindings map persistently + priv->binds = talloc_zero_array(priv, struct pl_desc_binding, num_descs); + + struct pl_pass_params pl_params = { + .type = pass_type[params->type], + .variables = vars, + .num_variables = num_vars, + .descriptors = descs, + .num_descriptors = num_descs, + .push_constants_size = params->push_constants_size, + .glsl_shader = params->type == RA_RENDERPASS_TYPE_COMPUTE + ? params->compute_shader + : params->frag_shader, + .cached_program = params->cached_program.start, + .cached_program_len = params->cached_program.len, + }; + + struct pl_blend_params blend_params; + + if (params->type == RA_RENDERPASS_TYPE_RASTER) { + pl_params.vertex_shader = params->vertex_shader; + pl_params.vertex_type = PL_PRIM_TRIANGLE_LIST; + pl_params.vertex_stride = params->vertex_stride; + pl_params.target_dummy.params.format = params->target_format->priv; + pl_params.load_target = !params->invalidate_target; + + if (params->enable_blend) { + pl_params.blend_params = &blend_params; + blend_params = (struct pl_blend_params) { + // Same enum order as ra_blend + .src_rgb = (enum ra_blend) params->blend_src_rgb, + .dst_rgb = (enum ra_blend) params->blend_dst_rgb, + .src_alpha = (enum ra_blend) params->blend_src_alpha, + .dst_alpha = (enum ra_blend) params->blend_dst_alpha, + }; + } + + for (int i = 0; i < params->num_vertex_attribs; i++) { + const struct ra_renderpass_input *inp = ¶ms->vertex_attribs[i]; + struct pl_vertex_attrib attrib = { + .name = inp->name, + .offset = inp->offset, + .location = i, + .fmt = pl_find_fmt(gpu, fmt_type[inp->type], inp->dim_v, 0, + var_size[inp->type] * 8, PL_FMT_CAP_VERTEX), + }; + + if (!attrib.fmt) { + MP_ERR(ra, "Failed mapping vertex attrib '%s' to pl_fmt?\n", + inp->name); + goto error; + } + + MP_TARRAY_APPEND(tmp, pl_params.vertex_attribs, + pl_params.num_vertex_attribs, attrib); + } + } + + priv->pl_pass = pl_pass_create(gpu, &pl_params); + if (!priv->pl_pass) + goto error; + + pass = talloc_ptrtype(NULL, pass); + *pass = (struct ra_renderpass) { + .params = *ra_renderpass_params_copy(pass, params), + .priv = talloc_steal(pass, priv), + }; + + pass->params.cached_program = (struct bstr) { + .start = (void *) priv->pl_pass->params.cached_program, + .len = priv->pl_pass->params.cached_program_len, + }; + + // fall through +error: + talloc_free(tmp); + return pass; +} + +static void renderpass_destroy_pl(struct ra *ra, struct ra_renderpass *pass) +{ + if (!pass) + return; + + struct pass_priv *priv = pass->priv; + pl_pass_destroy(get_gpu(ra), (const struct pl_pass **) &priv->pl_pass); + talloc_free(pass); +} + +static void renderpass_run_pl(struct ra *ra, + const struct ra_renderpass_run_params *params) +{ + struct pass_priv *p = params->pass->priv; + p->num_varups = 0; + + for (int i = 0; i < params->num_values; i++) { + const struct ra_renderpass_input_val *val = ¶ms->values[i]; + const struct ra_renderpass_input *inp = ¶ms->pass->params.inputs[i]; + if (var_type[inp->type]) { + MP_TARRAY_APPEND(p, p->varups, p->num_varups, (struct pl_var_update) { + .index = p->inp_index[val->index], + .data = val->data, + }); + } else { + struct pl_desc_binding bind; + switch (inp->type) { + case RA_VARTYPE_TEX: + case RA_VARTYPE_IMG_W: + bind.object = (* (struct ra_tex **) val->data)->priv; + break; + case RA_VARTYPE_BUF_RO: + case RA_VARTYPE_BUF_RW: + bind.object = (* (struct ra_buf **) val->data)->priv; + break; + default: abort(); + }; + + p->binds[p->inp_index[val->index]] = bind; + }; + } + + struct pl_pass_run_params pl_params = { + .pass = p->pl_pass, + .var_updates = p->varups, + .num_var_updates = p->num_varups, + .desc_bindings = p->binds, + .push_constants = params->push_constants, + }; + + if (p->pl_pass->params.type == PL_PASS_RASTER) { + pl_params.target = params->target->priv; + pl_params.viewport = mp_rect2d_to_pl(params->viewport); + pl_params.scissors = mp_rect2d_to_pl(params->scissors); + pl_params.vertex_data = params->vertex_data; + pl_params.vertex_count = params->vertex_count; + } else { + for (int i = 0; i < MP_ARRAY_SIZE(pl_params.compute_groups); i++) + pl_params.compute_groups[i] = params->compute_groups[i]; + } + + pl_pass_run(get_gpu(ra), &pl_params); +} + +static struct ra_fns ra_fns_pl = { + .destroy = destroy_ra_pl, + .tex_create = tex_create_pl, + .tex_destroy = tex_destroy_pl, + .tex_upload = tex_upload_pl, + .tex_download = tex_download_pl, + .buf_create = buf_create_pl, + .buf_destroy = buf_destroy_pl, + .buf_update = buf_update_pl, + .buf_poll = buf_poll_pl, + .clear = clear_pl, + .blit = blit_pl, + .uniform_layout = uniform_layout_pl, + .push_constant_layout = push_constant_layout_pl, + .desc_namespace = desc_namespace_pl, + .renderpass_create = renderpass_create_pl, + .renderpass_destroy = renderpass_destroy_pl, + .renderpass_run = renderpass_run_pl, +}; + diff --git a/video/out/placebo/ra_pl.h b/video/out/placebo/ra_pl.h new file mode 100644 index 0000000000..a342892963 --- /dev/null +++ b/video/out/placebo/ra_pl.h @@ -0,0 +1,10 @@ +#pragma once + +#include "video/out/gpu/ra.h" +#include + +struct ra *ra_create_pl(const struct pl_gpu *gpu, struct mp_log *log); + +// Wrap a pl_tex into a ra_tex struct, returns if successful +bool mppl_wrap_tex(struct ra *ra, const struct pl_tex *pltex, + struct ra_tex *out_tex); diff --git a/video/out/placebo/utils.c b/video/out/placebo/utils.c new file mode 100644 index 0000000000..79f313872c --- /dev/null +++ b/video/out/placebo/utils.c @@ -0,0 +1,62 @@ +#include "common/common.h" +#include "utils.h" + +static const int pl_log_to_msg_lev[PL_LOG_ALL+1] = { + [PL_LOG_FATAL] = MSGL_FATAL, + [PL_LOG_ERR] = MSGL_ERR, + [PL_LOG_WARN] = MSGL_WARN, + [PL_LOG_INFO] = MSGL_V, + [PL_LOG_DEBUG] = MSGL_DEBUG, + [PL_LOG_TRACE] = MSGL_TRACE, +}; + +static const enum pl_log_level msg_lev_to_pl_log[MSGL_MAX+1] = { + [MSGL_FATAL] = PL_LOG_FATAL, + [MSGL_ERR] = PL_LOG_ERR, + [MSGL_WARN] = PL_LOG_WARN, + [MSGL_INFO] = PL_LOG_WARN, + [MSGL_STATUS] = PL_LOG_WARN, + [MSGL_V] = PL_LOG_INFO, + [MSGL_DEBUG] = PL_LOG_DEBUG, + [MSGL_TRACE] = PL_LOG_TRACE, + [MSGL_MAX] = PL_LOG_ALL, +}; + +// translates log levels while probing +static const enum pl_log_level probing_map(enum pl_log_level level) +{ + switch (level) { + case PL_LOG_FATAL: + return PL_LOG_ERR; + + case PL_LOG_ERR: + case PL_LOG_WARN: + return PL_LOG_INFO; + + default: + return level; + } +} + +static void log_cb(void *priv, enum pl_log_level level, const char *msg) +{ + struct mp_log *log = priv; + mp_msg(log, pl_log_to_msg_lev[level], "%s\n", msg); +} + +static void log_cb_probing(void *priv, enum pl_log_level level, const char *msg) +{ + struct mp_log *log = priv; + mp_msg(log, pl_log_to_msg_lev[probing_map(level)], "%s\n", msg); +} + +void mppl_ctx_set_log(struct pl_context *ctx, struct mp_log *log, bool probing) +{ + assert(log); + + pl_context_update(ctx, &(struct pl_context_params) { + .log_cb = probing ? log_cb_probing : log_cb, + .log_level = msg_lev_to_pl_log[mp_msg_level(log)], + .log_priv = log, + }); +} diff --git a/video/out/placebo/utils.h b/video/out/placebo/utils.h new file mode 100644 index 0000000000..03bcb0f252 --- /dev/null +++ b/video/out/placebo/utils.h @@ -0,0 +1,18 @@ +#pragma once + +#include "common/common.h" +#include "common/msg.h" + +#include + +void mppl_ctx_set_log(struct pl_context *ctx, struct mp_log *log, bool probing); + +static inline struct pl_rect2d mp_rect2d_to_pl(struct mp_rect rc) +{ + return (struct pl_rect2d) { + .x0 = rc.x0, + .y0 = rc.y0, + .x1 = rc.x1, + .y1 = rc.y1, + }; +} diff --git a/video/out/vulkan/common.h b/video/out/vulkan/common.h index a4284f9055..b085fb462c 100644 --- a/video/out/vulkan/common.h +++ b/video/out/vulkan/common.h @@ -23,58 +23,14 @@ #define VK_USE_PLATFORM_WIN32_KHR #endif -#include - -// Vulkan allows the optional use of a custom allocator. We don't need one but -// mark this parameter with a better name in case we ever decide to change this -// in the future. (And to make the code more readable) -#define MPVK_ALLOCATOR NULL - -// A lot of things depend on streaming resources across frames. Depending on -// how many frames we render ahead of time, we need to pick enough to avoid -// any conflicts, so make all of these tunable relative to this constant in -// order to centralize them. -#define MPVK_MAX_STREAMING_DEPTH 8 +#include // Shared struct used to hold vulkan context information struct mpvk_ctx { - struct mp_log *log; - VkInstance inst; - VkPhysicalDevice physd; - VkDebugReportCallbackEXT dbg; - VkDevice dev; - - // Surface, must be initialized fter the context itself - VkSurfaceKHR surf; - VkSurfaceFormatKHR surf_format; // picked at surface initialization time - - struct vk_malloc *alloc; // memory allocator for this device - struct spirv_compiler *spirv; // GLSL -> SPIR-V compiler - struct vk_cmdpool **pools; // command pools (one per queue family) - int num_pools; - struct vk_cmd *last_cmd; // most recently submitted command - - // Queued/pending commands. These are shared for the entire mpvk_ctx to - // ensure submission and callbacks are FIFO - struct vk_cmd **cmds_queued; // recorded but not yet submitted - struct vk_cmd **cmds_pending; // submitted but not completed - int num_cmds_queued; - int num_cmds_pending; - - // Pointers into *pools - struct vk_cmdpool *pool_graphics; // required - struct vk_cmdpool *pool_compute; // optional - struct vk_cmdpool *pool_transfer; // optional - - // Common pool of signals, to avoid having to re-create these objects often - struct vk_signal **signals; - int num_signals; - - // Cached capabilities - VkPhysicalDeviceLimits limits; - VkPhysicalDeviceFeatures features; - - // Extension availability - bool has_ext_external_memory; - bool has_ext_external_memory_export; + struct mp_log *pl_log; + struct pl_context *ctx; + const struct pl_vk_inst *vkinst; + const struct pl_vulkan *vulkan; + const struct pl_gpu *gpu; // points to vulkan->gpu for convenience + VkSurfaceKHR surface; }; diff --git a/video/out/vulkan/context.c b/video/out/vulkan/context.c index 29a2c9b727..c05a5ac209 100644 --- a/video/out/vulkan/context.c +++ b/video/out/vulkan/context.c @@ -16,25 +16,17 @@ */ #include "options/m_config.h" -#include "video/out/gpu/spirv.h" +#include "video/out/placebo/ra_pl.h" #include "context.h" -#include "ra_vk.h" #include "utils.h" -enum { - SWAP_AUTO = 0, - SWAP_FIFO, - SWAP_FIFO_RELAXED, - SWAP_MAILBOX, - SWAP_IMMEDIATE, - SWAP_COUNT, -}; - struct vulkan_opts { - struct mpvk_device_opts dev_opts; // logical device options char *device; // force a specific GPU int swap_mode; + int queue_count; + int async_transfer; + int async_compute; }; static int vk_validate_dev(struct mp_log *log, const struct m_option *opt, @@ -52,7 +44,7 @@ static int vk_validate_dev(struct mp_log *log, const struct m_option *opt, VkPhysicalDevice *devices = NULL; uint32_t num = 0; - res = vkCreateInstance(&info, MPVK_ALLOCATOR, &inst); + res = vkCreateInstance(&info, NULL, &inst); if (res != VK_SUCCESS) goto done; @@ -97,45 +89,30 @@ const struct m_sub_options vulkan_conf = { .opts = (const struct m_option[]) { OPT_STRING_VALIDATE("vulkan-device", device, 0, vk_validate_dev), OPT_CHOICE("vulkan-swap-mode", swap_mode, 0, - ({"auto", SWAP_AUTO}, - {"fifo", SWAP_FIFO}, - {"fifo-relaxed", SWAP_FIFO_RELAXED}, - {"mailbox", SWAP_MAILBOX}, - {"immediate", SWAP_IMMEDIATE})), - OPT_INTRANGE("vulkan-queue-count", dev_opts.queue_count, 0, 1, 8, - OPTDEF_INT(1)), - OPT_FLAG("vulkan-async-transfer", dev_opts.async_transfer, 0), - OPT_FLAG("vulkan-async-compute", dev_opts.async_compute, 0), + ({"auto", -1}, + {"fifo", VK_PRESENT_MODE_FIFO_KHR}, + {"fifo-relaxed", VK_PRESENT_MODE_FIFO_RELAXED_KHR}, + {"mailbox", VK_PRESENT_MODE_MAILBOX_KHR}, + {"immediate", VK_PRESENT_MODE_IMMEDIATE_KHR})), + OPT_INTRANGE("vulkan-queue-count", queue_count, 0, 1, 8), + OPT_FLAG("vulkan-async-transfer", async_transfer, 0), + OPT_FLAG("vulkan-async-compute", async_compute, 0), {0} }, .size = sizeof(struct vulkan_opts), .defaults = &(struct vulkan_opts) { - .dev_opts = { - .async_transfer = 1, - }, + .swap_mode = -1, + .queue_count = 1, + .async_transfer = true, + .async_compute = true, }, }; struct priv { struct mpvk_ctx *vk; struct vulkan_opts *opts; - // Swapchain metadata: - int w, h; // current size - VkSwapchainCreateInfoKHR protoInfo; // partially filled-in prototype - VkSwapchainKHR swapchain; - VkSwapchainKHR old_swapchain; - int frames_in_flight; - // state of the images: - struct ra_tex **images; // ra_tex wrappers for the vkimages - int num_images; // size of images - VkSemaphore *sems_in; // pool of semaphores used to synchronize images - VkSemaphore *sems_out; // outgoing semaphores (rendering complete) - int num_sems; - int idx_sems; // index of next free semaphore pair - int last_imgidx; // the image index last acquired (for submit) - - // This is used to pre-fetch the next frame at the end of swap_buffers - struct ra_fbo queued_fbo; + const struct pl_swapchain *swapchain; + struct ra_tex proxy_tex; }; static const struct ra_swapchain_fns vulkan_swapchain; @@ -149,133 +126,26 @@ struct mpvk_ctx *ra_vk_ctx_get(struct ra_ctx *ctx) return p->vk; } -static bool update_swapchain_info(struct priv *p, - VkSwapchainCreateInfoKHR *info) -{ - struct mpvk_ctx *vk = p->vk; - - // Query the supported capabilities and update this struct as needed - VkSurfaceCapabilitiesKHR caps; - VK(vkGetPhysicalDeviceSurfaceCapabilitiesKHR(vk->physd, vk->surf, &caps)); - - // Sorted by preference - static const VkCompositeAlphaFlagsKHR alphaModes[] = { - VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR, - VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR, - }; - - for (int i = 0; i < MP_ARRAY_SIZE(alphaModes); i++) { - if (caps.supportedCompositeAlpha & alphaModes[i]) { - info->compositeAlpha = alphaModes[i]; - break; - } - } - - if (!info->compositeAlpha) { - MP_ERR(vk, "Failed picking alpha compositing mode (caps: 0x%x)\n", - caps.supportedCompositeAlpha); - goto error; - } - - static const VkSurfaceTransformFlagsKHR rotModes[] = { - VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR, - VK_SURFACE_TRANSFORM_INHERIT_BIT_KHR, - }; - - for (int i = 0; i < MP_ARRAY_SIZE(rotModes); i++) { - if (caps.supportedTransforms & rotModes[i]) { - info->preTransform = rotModes[i]; - break; - } - } - - if (!info->preTransform) { - MP_ERR(vk, "Failed picking surface transform mode (caps: 0x%x)\n", - caps.supportedTransforms); - goto error; - } - - // Image count as required - MP_VERBOSE(vk, "Requested image count: %d (min %d max %d)\n", - (int)info->minImageCount, (int)caps.minImageCount, - (int)caps.maxImageCount); - - info->minImageCount = MPMAX(info->minImageCount, caps.minImageCount); - if (caps.maxImageCount) - info->minImageCount = MPMIN(info->minImageCount, caps.maxImageCount); - - // Check the extent against the allowed parameters - if (caps.currentExtent.width != info->imageExtent.width && - caps.currentExtent.width != 0xFFFFFFFF) - { - MP_WARN(vk, "Requested width %d does not match current width %d\n", - (int)info->imageExtent.width, (int)caps.currentExtent.width); - info->imageExtent.width = caps.currentExtent.width; - } - - if (caps.currentExtent.height != info->imageExtent.height && - caps.currentExtent.height != 0xFFFFFFFF) - { - MP_WARN(vk, "Requested height %d does not match current height %d\n", - (int)info->imageExtent.height, (int)caps.currentExtent.height); - info->imageExtent.height = caps.currentExtent.height; - } - - if (caps.minImageExtent.width > info->imageExtent.width || - caps.minImageExtent.height > info->imageExtent.height) - { - MP_ERR(vk, "Requested size %dx%d smaller than device minimum %d%d\n", - (int)info->imageExtent.width, (int)info->imageExtent.height, - (int)caps.minImageExtent.width, (int)caps.minImageExtent.height); - goto error; - } - - if (caps.maxImageExtent.width < info->imageExtent.width || - caps.maxImageExtent.height < info->imageExtent.height) - { - MP_ERR(vk, "Requested size %dx%d larger than device maximum %d%d\n", - (int)info->imageExtent.width, (int)info->imageExtent.height, - (int)caps.maxImageExtent.width, (int)caps.maxImageExtent.height); - goto error; - } - - // We just request whatever usage we can, and let the ra_vk decide what - // ra_tex_params that translates to. This makes the images as flexible - // as possible. - info->imageUsage = caps.supportedUsageFlags; - return true; - -error: - return false; -} - void ra_vk_ctx_uninit(struct ra_ctx *ctx) { - if (ctx->ra) { - struct priv *p = ctx->swapchain->priv; - struct mpvk_ctx *vk = p->vk; - - mpvk_flush_commands(vk); - mpvk_poll_commands(vk, UINT64_MAX); + if (!ctx->swapchain) + return; - for (int i = 0; i < p->num_images; i++) - ra_tex_free(ctx->ra, &p->images[i]); - for (int i = 0; i < p->num_sems; i++) { - vkDestroySemaphore(vk->dev, p->sems_in[i], MPVK_ALLOCATOR); - vkDestroySemaphore(vk->dev, p->sems_out[i], MPVK_ALLOCATOR); - } + struct priv *p = ctx->swapchain->priv; + struct mpvk_ctx *vk = p->vk; - vkDestroySwapchainKHR(vk->dev, p->swapchain, MPVK_ALLOCATOR); + if (ctx->ra) { + pl_gpu_finish(vk->gpu); + pl_swapchain_destroy(&p->swapchain); ctx->ra->fns->destroy(ctx->ra); ctx->ra = NULL; } - talloc_free(ctx->swapchain); - ctx->swapchain = NULL; + vk->gpu = NULL; + pl_vulkan_destroy(&vk->vulkan); + TA_FREEP(&ctx->swapchain); } -static const struct ra_swapchain_fns vulkan_swapchain; - bool ra_vk_ctx_init(struct ra_ctx *ctx, struct mpvk_ctx *vk, VkPresentModeKHR preferred_mode) { @@ -287,56 +157,36 @@ bool ra_vk_ctx_init(struct ra_ctx *ctx, struct mpvk_ctx *vk, p->vk = vk; p->opts = mp_get_config_group(p, ctx->global, &vulkan_conf); - if (!mpvk_find_phys_device(vk, p->opts->device, ctx->opts.allow_sw)) - goto error; - if (!spirv_compiler_init(ctx)) - goto error; - vk->spirv = ctx->spirv; - if (!mpvk_pick_surface_format(vk)) - goto error; - if (!mpvk_device_init(vk, p->opts->dev_opts)) + assert(vk->ctx); + assert(vk->vkinst); + vk->vulkan = pl_vulkan_create(vk->ctx, &(struct pl_vulkan_params) { + .instance = vk->vkinst->instance, + .surface = vk->surface, + .async_transfer = p->opts->async_transfer, + .async_compute = p->opts->async_compute, + .queue_count = p->opts->queue_count, + }); + if (!vk->vulkan) goto error; - ctx->ra = ra_create_vk(vk, ctx->log); + vk->gpu = vk->vulkan->gpu; + ctx->ra = ra_create_pl(vk->gpu, ctx->log); if (!ctx->ra) goto error; - static const VkPresentModeKHR present_modes[SWAP_COUNT] = { - [SWAP_FIFO] = VK_PRESENT_MODE_FIFO_KHR, - [SWAP_FIFO_RELAXED] = VK_PRESENT_MODE_FIFO_RELAXED_KHR, - [SWAP_MAILBOX] = VK_PRESENT_MODE_MAILBOX_KHR, - [SWAP_IMMEDIATE] = VK_PRESENT_MODE_IMMEDIATE_KHR, + // Create the swapchain + struct pl_vulkan_swapchain_params params = { + .surface = vk->surface, + .present_mode = preferred_mode, + .swapchain_depth = ctx->opts.swapchain_depth, }; - p->protoInfo = (VkSwapchainCreateInfoKHR) { - .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, - .surface = vk->surf, - .imageFormat = vk->surf_format.format, - .imageColorSpace = vk->surf_format.colorSpace, - .imageArrayLayers = 1, // non-stereoscopic - .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE, - .minImageCount = ctx->opts.swapchain_depth + 1, // +1 for FB - .presentMode = p->opts->swap_mode ? present_modes[p->opts->swap_mode] - : preferred_mode, - .clipped = true, - }; + if (p->opts->swap_mode >= 0) // user override + params.present_mode = p->opts->swap_mode; - // Make sure the swapchain present mode is supported - int num_modes; - VK(vkGetPhysicalDeviceSurfacePresentModesKHR(vk->physd, vk->surf, - &num_modes, NULL)); - VkPresentModeKHR *modes = talloc_array(NULL, VkPresentModeKHR, num_modes); - VK(vkGetPhysicalDeviceSurfacePresentModesKHR(vk->physd, vk->surf, - &num_modes, modes)); - bool supported = false; - for (int i = 0; i < num_modes; i++) - supported |= (modes[i] == p->protoInfo.presentMode); - talloc_free(modes); - - if (!supported) { - MP_ERR(ctx, "Requested swap mode unsupported by this device!\n"); + p->swapchain = pl_vulkan_create_swapchain(vk->vulkan, ¶ms); + if (!p->swapchain) goto error; - } return true; @@ -345,245 +195,49 @@ error: return false; } -static void destroy_swapchain(struct mpvk_ctx *vk, struct priv *p) +bool ra_vk_ctx_resize(struct ra_ctx *ctx, int width, int height) { - assert(p->old_swapchain); - vkDestroySwapchainKHR(vk->dev, p->old_swapchain, MPVK_ALLOCATOR); - p->old_swapchain = NULL; -} - -bool ra_vk_ctx_resize(struct ra_swapchain *sw, int w, int h) -{ - struct priv *p = sw->priv; - if (w == p->w && h == p->h) - return true; - - struct ra *ra = sw->ctx->ra; - struct mpvk_ctx *vk = p->vk; - VkImage *vkimages = NULL; - - // It's invalid to trigger another swapchain recreation while there's - // more than one swapchain already active, so we need to flush any pending - // asynchronous swapchain release operations that may be ongoing. - while (p->old_swapchain) - mpvk_poll_commands(vk, 100000); // 100μs - - VkSwapchainCreateInfoKHR sinfo = p->protoInfo; - sinfo.imageExtent = (VkExtent2D){ w, h }; - sinfo.oldSwapchain = p->swapchain; - - if (!update_swapchain_info(p, &sinfo)) - goto error; - - VK(vkCreateSwapchainKHR(vk->dev, &sinfo, MPVK_ALLOCATOR, &p->swapchain)); - p->w = w; - p->h = h; - - // Freeing the old swapchain while it's still in use is an error, so do - // it asynchronously once the device is idle. - if (sinfo.oldSwapchain) { - p->old_swapchain = sinfo.oldSwapchain; - vk_dev_callback(vk, (vk_cb) destroy_swapchain, vk, p); - } - - // Get the new swapchain images - int num; - VK(vkGetSwapchainImagesKHR(vk->dev, p->swapchain, &num, NULL)); - vkimages = talloc_array(NULL, VkImage, num); - VK(vkGetSwapchainImagesKHR(vk->dev, p->swapchain, &num, vkimages)); - - // If needed, allocate some more semaphores - while (num > p->num_sems) { - VkSemaphore sem_in, sem_out; - static const VkSemaphoreCreateInfo seminfo = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, - }; - VK(vkCreateSemaphore(vk->dev, &seminfo, MPVK_ALLOCATOR, &sem_in)); - VK(vkCreateSemaphore(vk->dev, &seminfo, MPVK_ALLOCATOR, &sem_out)); - - int idx = p->num_sems++; - MP_TARRAY_GROW(p, p->sems_in, idx); - MP_TARRAY_GROW(p, p->sems_out, idx); - p->sems_in[idx] = sem_in; - p->sems_out[idx] = sem_out; - } - - // Invalidate the queued texture - p->queued_fbo = (struct ra_fbo) {0}; - - // Recreate the ra_tex wrappers - for (int i = 0; i < p->num_images; i++) - ra_tex_free(ra, &p->images[i]); - - p->num_images = num; - MP_TARRAY_GROW(p, p->images, p->num_images); - for (int i = 0; i < num; i++) { - p->images[i] = ra_vk_wrap_swapchain_img(ra, vkimages[i], sinfo); - if (!p->images[i]) - goto error; - } + struct priv *p = ctx->swapchain->priv; - talloc_free(vkimages); - return true; + bool ok = pl_swapchain_resize(p->swapchain, &width, &height); + ctx->vo->dwidth = width; + ctx->vo->dheight = height; -error: - talloc_free(vkimages); - vkDestroySwapchainKHR(vk->dev, p->swapchain, MPVK_ALLOCATOR); - p->swapchain = NULL; - return false; + return ok; } static int color_depth(struct ra_swapchain *sw) { - struct priv *p = sw->priv; - int bits = 0; - - if (!p->num_images) - return bits; - - // The channel with the most bits is probably the most authoritative about - // the actual color information (consider e.g. a2bgr10). Slight downside - // in that it results in rounding r/b for e.g. rgb565, but we don't pick - // surfaces with fewer than 8 bits anyway. - const struct ra_format *fmt = p->images[0]->params.format; - for (int i = 0; i < fmt->num_components; i++) { - int depth = fmt->component_depth[i]; - bits = MPMAX(bits, depth ? depth : fmt->component_size[i]); - } - - return bits; + return 0; // TODO: implement this somehow? } static bool start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo) { struct priv *p = sw->priv; - struct mpvk_ctx *vk = p->vk; - if (!p->swapchain) + struct pl_swapchain_frame frame; + if (!pl_swapchain_start_frame(p->swapchain, &frame)) + return false; + if (!mppl_wrap_tex(sw->ctx->ra, frame.fbo, &p->proxy_tex)) return false; - if (p->queued_fbo.tex) { - assert(out_fbo != &p->queued_fbo); - *out_fbo = p->queued_fbo; - p->queued_fbo = (struct ra_fbo) {0}; - return true; - } - - VkSemaphore sem_in = p->sems_in[p->idx_sems]; - MP_TRACE(vk, "vkAcquireNextImageKHR signals %p\n", (void *)sem_in); - - for (int attempts = 0; attempts < 2; attempts++) { - uint32_t imgidx = 0; - VkResult res = vkAcquireNextImageKHR(vk->dev, p->swapchain, UINT64_MAX, - sem_in, NULL, &imgidx); - - switch (res) { - case VK_SUCCESS: - p->last_imgidx = imgidx; - *out_fbo = (struct ra_fbo) { - .tex = p->images[imgidx], - .flip = false, - }; - ra_tex_vk_external_dep(sw->ctx->ra, out_fbo->tex, sem_in); - return true; - - case VK_ERROR_OUT_OF_DATE_KHR: { - // In these cases try recreating the swapchain - int w = p->w, h = p->h; - p->w = p->h = 0; // invalidate the current state - if (!ra_vk_ctx_resize(sw, w, h)) - return false; - continue; - } - - default: - MP_ERR(vk, "Failed acquiring swapchain image: %s\n", vk_err(res)); - return false; - } - } - - // If we've exhausted the number of attempts to recreate the swapchain, - // just give up silently. - return false; -} + *out_fbo = (struct ra_fbo) { + .tex = &p->proxy_tex, + .flip = frame.flipped, + }; -static void present_cb(struct priv *p, void *arg) -{ - p->frames_in_flight--; + return true; } static bool submit_frame(struct ra_swapchain *sw, const struct vo_frame *frame) { struct priv *p = sw->priv; - struct ra *ra = sw->ctx->ra; - struct mpvk_ctx *vk = p->vk; - if (!p->swapchain) - return false; - - struct vk_cmd *cmd = ra_vk_submit(ra, p->images[p->last_imgidx]); - if (!cmd) - return false; - - VkSemaphore sem_out = p->sems_out[p->idx_sems++]; - p->idx_sems %= p->num_sems; - vk_cmd_sig(cmd, sem_out); - - p->frames_in_flight++; - vk_cmd_callback(cmd, (vk_cb) present_cb, p, NULL); - - vk_cmd_queue(vk, cmd); - if (!mpvk_flush_commands(vk)) - return false; - - // Submit to the same queue that we were currently rendering to - struct vk_cmdpool *pool_gfx = vk->pool_graphics; - VkQueue queue = pool_gfx->queues[pool_gfx->idx_queues]; - - // Rotate the queues to ensure good parallelism across frames - for (int i = 0; i < vk->num_pools; i++) { - struct vk_cmdpool *pool = vk->pools[i]; - pool->idx_queues = (pool->idx_queues + 1) % pool->num_queues; - } - - VkPresentInfoKHR pinfo = { - .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, - .waitSemaphoreCount = 1, - .pWaitSemaphores = &sem_out, - .swapchainCount = 1, - .pSwapchains = &p->swapchain, - .pImageIndices = &p->last_imgidx, - }; - - MP_TRACE(vk, "vkQueuePresentKHR waits on %p\n", (void *)sem_out); - VkResult res = vkQueuePresentKHR(queue, &pinfo); - switch (res) { - case VK_SUCCESS: - case VK_SUBOPTIMAL_KHR: - return true; - - case VK_ERROR_OUT_OF_DATE_KHR: - // We can silently ignore this error, since the next start_frame will - // recreate the swapchain automatically. - return true; - - default: - MP_ERR(vk, "Failed presenting to queue %p: %s\n", (void *)queue, - vk_err(res)); - return false; - } + return pl_swapchain_submit_frame(p->swapchain); } static void swap_buffers(struct ra_swapchain *sw) { struct priv *p = sw->priv; - - while (p->frames_in_flight >= sw->ctx->opts.swapchain_depth) - mpvk_poll_commands(p->vk, 100000); // 100μs - - // Also try and block until the next hardware buffer swap early. this - // prevents start_frame from blocking later, thus slightly improving the - // frame timing stats. (since mpv assumes most blocking will happen in - // swap_buffers) - start_frame(sw, &p->queued_fbo); + pl_swapchain_swap_buffers(p->swapchain); } static const struct ra_swapchain_fns vulkan_swapchain = { diff --git a/video/out/vulkan/context.h b/video/out/vulkan/context.h index a64d39f125..30c97cfb4f 100644 --- a/video/out/vulkan/context.h +++ b/video/out/vulkan/context.h @@ -7,7 +7,9 @@ void ra_vk_ctx_uninit(struct ra_ctx *ctx); bool ra_vk_ctx_init(struct ra_ctx *ctx, struct mpvk_ctx *vk, VkPresentModeKHR preferred_mode); -bool ra_vk_ctx_resize(struct ra_swapchain *sw, int w, int h); + +// Handles a resize request, and updates ctx->vo->dwidth/dheight +bool ra_vk_ctx_resize(struct ra_ctx *ctx, int width, int height); // May be called on a ra_ctx of any type. struct mpvk_ctx *ra_vk_ctx_get(struct ra_ctx *ctx); diff --git a/video/out/vulkan/context_wayland.c b/video/out/vulkan/context_wayland.c index 7276775242..f1091a534f 100644 --- a/video/out/vulkan/context_wayland.c +++ b/video/out/vulkan/context_wayland.c @@ -41,8 +41,7 @@ static bool wayland_vk_init(struct ra_ctx *ctx) struct mpvk_ctx *vk = &p->vk; int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR; - if (!mpvk_instance_init(vk, ctx->log, VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME, - ctx->opts.debug)) + if (!mpvk_init(vk, ctx, VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME)) goto error; if (!vo_wayland_init(ctx->vo)) @@ -54,10 +53,10 @@ static bool wayland_vk_init(struct ra_ctx *ctx) .surface = ctx->vo->wl->surface, }; - VkResult res = vkCreateWaylandSurfaceKHR(vk->inst, &wlinfo, MPVK_ALLOCATOR, - &vk->surf); + VkInstance inst = vk->vkinst->instance; + VkResult res = vkCreateWaylandSurfaceKHR(inst, &wlinfo, NULL, &vk->surface); if (res != VK_SUCCESS) { - MP_MSG(ctx, msgl, "Failed creating Wayland surface: %s\n", vk_err(res)); + MP_MSG(ctx, msgl, "Failed creating Wayland surface\n"); goto error; } @@ -77,7 +76,7 @@ error: return false; } -static void resize(struct ra_ctx *ctx) +static bool resize(struct ra_ctx *ctx) { struct vo_wayland_state *wl = ctx->vo->wl; @@ -87,9 +86,7 @@ static void resize(struct ra_ctx *ctx) const int32_t height = wl->scaling*mp_rect_h(wl->geometry); wl_surface_set_buffer_scale(wl->surface, wl->scaling); - - wl->vo->dwidth = width; - wl->vo->dheight = height; + return ra_vk_ctx_resize(ctx, width, height); } static bool wayland_vk_reconfig(struct ra_ctx *ctx) @@ -104,8 +101,7 @@ static int wayland_vk_control(struct ra_ctx *ctx, int *events, int request, void { int ret = vo_wayland_control(ctx->vo, events, request, arg); if (*events & VO_EVENT_RESIZE) { - resize(ctx); - if (ra_vk_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight)) + if (!resize(ctx)) return VO_ERROR; } return ret; diff --git a/video/out/vulkan/context_win.c b/video/out/vulkan/context_win.c deleted file mode 100644 index cf31586d00..0000000000 --- a/video/out/vulkan/context_win.c +++ /dev/null @@ -1,105 +0,0 @@ -/* - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see . - */ - -#include "video/out/gpu/context.h" -#include "video/out/w32_common.h" - -#include "common.h" -#include "context.h" -#include "utils.h" - -EXTERN_C IMAGE_DOS_HEADER __ImageBase; -#define HINST_THISCOMPONENT ((HINSTANCE)&__ImageBase) - -struct priv { - struct mpvk_ctx vk; -}; - -static void win_uninit(struct ra_ctx *ctx) -{ - struct priv *p = ctx->priv; - - ra_vk_ctx_uninit(ctx); - mpvk_uninit(&p->vk); - vo_w32_uninit(ctx->vo); -} - -static bool win_init(struct ra_ctx *ctx) -{ - struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); - struct mpvk_ctx *vk = &p->vk; - int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR; - - if (!mpvk_instance_init(vk, ctx->log, VK_KHR_WIN32_SURFACE_EXTENSION_NAME, - ctx->opts.debug)) - goto error; - - if (!vo_w32_init(ctx->vo)) - goto error; - - VkWin32SurfaceCreateInfoKHR wininfo = { - .sType = VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR, - .hinstance = HINST_THISCOMPONENT, - .hwnd = vo_w32_hwnd(ctx->vo), - }; - - VkResult res = vkCreateWin32SurfaceKHR(vk->inst, &wininfo, MPVK_ALLOCATOR, - &vk->surf); - if (res != VK_SUCCESS) { - MP_MSG(ctx, msgl, "Failed creating Windows surface: %s\n", vk_err(res)); - goto error; - } - - if (!ra_vk_ctx_init(ctx, vk, VK_PRESENT_MODE_FIFO_KHR)) - goto error; - - return true; - -error: - win_uninit(ctx); - return false; -} - -static bool resize(struct ra_ctx *ctx) -{ - return ra_vk_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight); -} - -static bool win_reconfig(struct ra_ctx *ctx) -{ - vo_w32_config(ctx->vo); - return resize(ctx); -} - -static int win_control(struct ra_ctx *ctx, int *events, int request, void *arg) -{ - int ret = vo_w32_control(ctx->vo, events, request, arg); - if (*events & VO_EVENT_RESIZE) { - if (!resize(ctx)) - return VO_ERROR; - } - return ret; -} - -const struct ra_ctx_fns ra_ctx_vulkan_win = { - .type = "vulkan", - .name = "winvk", - .reconfig = win_reconfig, - .control = win_control, - .init = win_init, - .uninit = win_uninit, -}; diff --git a/video/out/vulkan/context_xlib.c b/video/out/vulkan/context_xlib.c index c3bd49f4fb..9baa7c4dc4 100644 --- a/video/out/vulkan/context_xlib.c +++ b/video/out/vulkan/context_xlib.c @@ -41,8 +41,7 @@ static bool xlib_init(struct ra_ctx *ctx) struct mpvk_ctx *vk = &p->vk; int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR; - if (!mpvk_instance_init(vk, ctx->log, VK_KHR_XLIB_SURFACE_EXTENSION_NAME, - ctx->opts.debug)) + if (!mpvk_init(vk, ctx, VK_KHR_XLIB_SURFACE_EXTENSION_NAME)) goto error; if (!vo_x11_init(ctx->vo)) @@ -57,10 +56,10 @@ static bool xlib_init(struct ra_ctx *ctx) .window = ctx->vo->x11->window, }; - VkResult res = vkCreateXlibSurfaceKHR(vk->inst, &xinfo, MPVK_ALLOCATOR, - &vk->surf); + VkInstance inst = vk->vkinst->instance; + VkResult res = vkCreateXlibSurfaceKHR(inst, &xinfo, NULL, &vk->surface); if (res != VK_SUCCESS) { - MP_MSG(ctx, msgl, "Failed creating Xlib surface: %s\n", vk_err(res)); + MP_MSG(ctx, msgl, "Failed creating Xlib surface\n"); goto error; } @@ -76,7 +75,7 @@ error: static bool resize(struct ra_ctx *ctx) { - return ra_vk_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight); + return ra_vk_ctx_resize(ctx, ctx->vo->dwidth, ctx->vo->dheight); } static bool xlib_reconfig(struct ra_ctx *ctx) diff --git a/video/out/vulkan/formats.c b/video/out/vulkan/formats.c deleted file mode 100644 index 327a7ac809..0000000000 --- a/video/out/vulkan/formats.c +++ /dev/null @@ -1,55 +0,0 @@ -#include "formats.h" - -const struct vk_format vk_formats[] = { - // Regular, byte-aligned integer formats - {"r8", VK_FORMAT_R8_UNORM, 1, 1, {8 }, RA_CTYPE_UNORM }, - {"rg8", VK_FORMAT_R8G8_UNORM, 2, 2, {8, 8 }, RA_CTYPE_UNORM }, - {"rgb8", VK_FORMAT_R8G8B8_UNORM, 3, 3, {8, 8, 8 }, RA_CTYPE_UNORM }, - {"rgba8", VK_FORMAT_R8G8B8A8_UNORM, 4, 4, {8, 8, 8, 8 }, RA_CTYPE_UNORM }, - {"r16", VK_FORMAT_R16_UNORM, 1, 2, {16 }, RA_CTYPE_UNORM }, - {"rg16", VK_FORMAT_R16G16_UNORM, 2, 4, {16, 16 }, RA_CTYPE_UNORM }, - {"rgb16", VK_FORMAT_R16G16B16_UNORM, 3, 6, {16, 16, 16 }, RA_CTYPE_UNORM }, - {"rgba16", VK_FORMAT_R16G16B16A16_UNORM, 4, 8, {16, 16, 16, 16}, RA_CTYPE_UNORM }, - - // Special, integer-only formats - {"r32ui", VK_FORMAT_R32_UINT, 1, 4, {32 }, RA_CTYPE_UINT }, - {"rg32ui", VK_FORMAT_R32G32_UINT, 2, 8, {32, 32 }, RA_CTYPE_UINT }, - {"rgb32ui", VK_FORMAT_R32G32B32_UINT, 3, 12, {32, 32, 32 }, RA_CTYPE_UINT }, - {"rgba32ui", VK_FORMAT_R32G32B32A32_UINT, 4, 16, {32, 32, 32, 32}, RA_CTYPE_UINT }, - {"r64ui", VK_FORMAT_R64_UINT, 1, 8, {64 }, RA_CTYPE_UINT }, - {"rg64ui", VK_FORMAT_R64G64_UINT, 2, 16, {64, 64 }, RA_CTYPE_UINT }, - {"rgb64ui", VK_FORMAT_R64G64B64_UINT, 3, 24, {64, 64, 64 }, RA_CTYPE_UINT }, - {"rgba64ui", VK_FORMAT_R64G64B64A64_UINT, 4, 32, {64, 64, 64, 64}, RA_CTYPE_UINT }, - - // Packed integer formats - {"rg4", VK_FORMAT_R4G4_UNORM_PACK8, 2, 1, {4, 4 }, RA_CTYPE_UNORM }, - {"rgba4", VK_FORMAT_R4G4B4A4_UNORM_PACK16, 4, 2, {4, 4, 4, 4 }, RA_CTYPE_UNORM }, - {"rgb565", VK_FORMAT_R5G6B5_UNORM_PACK16, 3, 2, {5, 6, 5 }, RA_CTYPE_UNORM }, - {"rgb5a1", VK_FORMAT_R5G5B5A1_UNORM_PACK16, 4, 2, {5, 5, 5, 1 }, RA_CTYPE_UNORM }, - - // Float formats (native formats, hf = half float, df = double float) - {"r16hf", VK_FORMAT_R16_SFLOAT, 1, 2, {16 }, RA_CTYPE_FLOAT }, - {"rg16hf", VK_FORMAT_R16G16_SFLOAT, 2, 4, {16, 16 }, RA_CTYPE_FLOAT }, - {"rgb16hf", VK_FORMAT_R16G16B16_SFLOAT, 3, 6, {16, 16, 16 }, RA_CTYPE_FLOAT }, - {"rgba16hf", VK_FORMAT_R16G16B16A16_SFLOAT, 4, 8, {16, 16, 16, 16}, RA_CTYPE_FLOAT }, - {"r32f", VK_FORMAT_R32_SFLOAT, 1, 4, {32 }, RA_CTYPE_FLOAT }, - {"rg32f", VK_FORMAT_R32G32_SFLOAT, 2, 8, {32, 32 }, RA_CTYPE_FLOAT }, - {"rgb32f",