diff options
Diffstat (limited to 'video')
-rw-r--r-- | video/out/vulkan/common.h | 19 | ||||
-rw-r--r-- | video/out/vulkan/context.c | 11 | ||||
-rw-r--r-- | video/out/vulkan/malloc.c | 12 | ||||
-rw-r--r-- | video/out/vulkan/ra_vk.c | 107 | ||||
-rw-r--r-- | video/out/vulkan/utils.c | 268 | ||||
-rw-r--r-- | video/out/vulkan/utils.h | 27 |
6 files changed, 281 insertions, 163 deletions
diff --git a/video/out/vulkan/common.h b/video/out/vulkan/common.h index de49c6f1af..b849b6dc0b 100644 --- a/video/out/vulkan/common.h +++ b/video/out/vulkan/common.h @@ -48,10 +48,23 @@ struct mpvk_ctx { VkSurfaceKHR surf; VkSurfaceFormatKHR surf_format; // picked at surface initialization time - struct vk_malloc *alloc; // memory allocator for this device - struct vk_cmdpool *pool; // primary command pool for this device - struct vk_cmd *last_cmd; // most recently submitted (pending) command + struct vk_malloc *alloc; // memory allocator for this device struct spirv_compiler *spirv; // GLSL -> SPIR-V compiler + struct vk_cmdpool **pools; // command pools (one per queue family) + int num_pools; + struct vk_cmd *last_cmd; // most recently submitted command + + // Queued/pending commands. These are shared for the entire mpvk_ctx to + // ensure submission and callbacks are FIFO + struct vk_cmd **cmds_queued; // recorded but not yet submitted + struct vk_cmd **cmds_pending; // submitted but not completed + int num_cmds_queued; + int num_cmds_pending; + + // Pointers into *pools + struct vk_cmdpool *pool_graphics; // required + struct vk_cmdpool *pool_compute; // optional + struct vk_cmdpool *pool_transfer; // optional // Common pool of signals, to avoid having to re-create these objects often struct vk_signal **signals; diff --git a/video/out/vulkan/context.c b/video/out/vulkan/context.c index ddc80be042..4f96440652 100644 --- a/video/out/vulkan/context.c +++ b/video/out/vulkan/context.c @@ -245,7 +245,8 @@ void ra_vk_ctx_uninit(struct ra_ctx *ctx) struct priv *p = ctx->swapchain->priv; struct mpvk_ctx *vk = p->vk; - mpvk_dev_wait_cmds(vk, UINT64_MAX); + mpvk_flush_commands(vk); + mpvk_poll_commands(vk, UINT64_MAX); for (int i = 0; i < p->num_images; i++) ra_tex_free(ctx->ra, &p->images[i]); @@ -355,7 +356,7 @@ bool ra_vk_ctx_resize(struct ra_swapchain *sw, int w, int h) // more than one swapchain already active, so we need to flush any pending // asynchronous swapchain release operations that may be ongoing. while (p->old_swapchain) - mpvk_dev_wait_cmds(vk, 100000); // 100μs + mpvk_poll_commands(vk, 100000); // 100μs VkSwapchainCreateInfoKHR sinfo = p->protoInfo; sinfo.imageExtent = (VkExtent2D){ w, h }; @@ -501,14 +502,14 @@ static bool submit_frame(struct ra_swapchain *sw, const struct vo_frame *frame) vk_cmd_callback(cmd, (vk_cb) present_cb, p, NULL); vk_cmd_queue(vk, cmd); - if (!vk_flush_commands(vk)) + if (!mpvk_flush_commands(vk)) goto error; // Older nvidia drivers can spontaneously combust when submitting to the // same queue as we're rendering from, in a multi-queue scenario. Safest // option is to flush the commands first and then submit to the next queue. // We can drop this hack in the future, I suppose. - struct vk_cmdpool *pool = vk->pool; + struct vk_cmdpool *pool = vk->pool_graphics; VkQueue queue = pool->queues[pool->idx_queues]; VkPresentInfoKHR pinfo = { @@ -533,7 +534,7 @@ static void swap_buffers(struct ra_swapchain *sw) struct priv *p = sw->priv; while (p->frames_in_flight >= sw->ctx->opts.swapchain_depth) - mpvk_dev_wait_cmds(p->vk, 100000); // 100μs + mpvk_poll_commands(p->vk, 100000); // 100μs } static const struct ra_swapchain_fns vulkan_swapchain = { diff --git a/video/out/vulkan/malloc.c b/video/out/vulkan/malloc.c index f6cb1143bb..a9aced33d8 100644 --- a/video/out/vulkan/malloc.c +++ b/video/out/vulkan/malloc.c @@ -133,11 +133,23 @@ static struct vk_slab *slab_alloc(struct mpvk_ctx *vk, struct vk_heap *heap, uint32_t typeBits = heap->typeBits ? heap->typeBits : UINT32_MAX; if (heap->usage) { + // FIXME: Since we can't keep track of queue family ownership properly, + // and we don't know in advance what types of queue families this buffer + // will belong to, we're forced to share all of our buffers between all + // command pools. + uint32_t qfs[3] = {0}; + for (int i = 0; i < vk->num_pools; i++) + qfs[i] = vk->pools[i]->qf; + VkBufferCreateInfo binfo = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .size = slab->size, .usage = heap->usage, + .sharingMode = vk->num_pools > 1 ? VK_SHARING_MODE_CONCURRENT + : VK_SHARING_MODE_EXCLUSIVE, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = vk->num_pools, + .pQueueFamilyIndices = qfs, }; VK(vkCreateBuffer(vk->dev, &binfo, MPVK_ALLOCATOR, &slab->buffer)); diff --git a/video/out/vulkan/ra_vk.c b/video/out/vulkan/ra_vk.c index 42e6c2f64c..905fc89596 100644 --- a/video/out/vulkan/ra_vk.c +++ b/video/out/vulkan/ra_vk.c @@ -6,6 +6,12 @@ static struct ra_fns ra_fns_vk; +enum queue_type { + GRAPHICS, + COMPUTE, + TRANSFER, +}; + // For ra.priv struct ra_vk { struct mpvk_ctx *vk; @@ -22,18 +28,6 @@ struct mpvk_ctx *ra_vk_get(struct ra *ra) return p->vk; } -// Returns a command buffer, or NULL on error -static struct vk_cmd *vk_require_cmd(struct ra *ra) -{ - struct ra_vk *p = ra->priv; - struct mpvk_ctx *vk = ra_vk_get(ra); - - if (!p->cmd) - p->cmd = vk_cmd_begin(vk, vk->pool); - - return p->cmd; -} - static void vk_submit(struct ra *ra) { struct ra_vk *p = ra->priv; @@ -45,22 +39,46 @@ static void vk_submit(struct ra *ra) } } -// The callback's *priv will always be set to `ra` -static void vk_callback(struct ra *ra, vk_cb callback, void *arg) +// Returns a command buffer, or NULL on error +static struct vk_cmd *vk_require_cmd(struct ra *ra, enum queue_type type) { struct ra_vk *p = ra->priv; struct mpvk_ctx *vk = ra_vk_get(ra); - if (p->cmd) { - vk_cmd_callback(p->cmd, callback, ra, arg); - } else { - vk_dev_callback(vk, callback, ra, arg); + struct vk_cmdpool *pool; + switch (type) { + case GRAPHICS: pool = vk->pool_graphics; break; + case COMPUTE: pool = vk->pool_compute; break; + + // GRAPHICS and COMPUTE also imply TRANSFER capability (vulkan spec) + case TRANSFER: + pool = vk->pool_transfer; + if (!pool) + pool = vk->pool_compute; + if (!pool) + pool = vk->pool_graphics; + break; + default: abort(); } + + assert(pool); + if (p->cmd && p->cmd->pool == pool) + return p->cmd; + + vk_submit(ra); + p->cmd = vk_cmd_begin(vk, pool); + return p->cmd; } #define MAKE_LAZY_DESTRUCTOR(fun, argtype) \ static void fun##_lazy(struct ra *ra, argtype *arg) { \ - vk_callback(ra, (vk_cb) fun, arg); \ + struct ra_vk *p = ra->priv; \ + struct mpvk_ctx *vk = ra_vk_get(ra); \ + if (p->cmd) { \ + vk_cmd_callback(p->cmd, (vk_cb) fun, ra, arg); \ + } else { \ + vk_dev_callback(vk, (vk_cb) fun, ra, arg); \ + } \ } static void vk_destroy_ra(struct ra *ra) @@ -69,8 +87,8 @@ static void vk_destroy_ra(struct ra *ra) struct mpvk_ctx *vk = ra_vk_get(ra); vk_submit(ra); - vk_flush_commands(vk); - mpvk_dev_wait_cmds(vk, UINT64_MAX); + mpvk_flush_commands(vk); + mpvk_poll_commands(vk, UINT64_MAX); ra_tex_free(ra, &p->clear_tex); talloc_free(ra); @@ -190,7 +208,7 @@ struct ra *ra_create_vk(struct mpvk_ctx *vk, struct mp_log *log) ra->max_shmem = vk->limits.maxComputeSharedMemorySize; ra->max_pushc_size = vk->limits.maxPushConstantsSize; - if (vk->pool->props.queueFlags & VK_QUEUE_COMPUTE_BIT) + if (vk->pool_compute) ra->caps |= RA_CAP_COMPUTE; if (!vk_setup_formats(ra)) @@ -280,6 +298,7 @@ static VkResult vk_create_render_pass(VkDevice dev, const struct ra_format *fmt, // For ra_tex.priv struct ra_tex_vk { bool external_img; + enum queue_type upload_queue; VkImageType type; VkImage img; struct vk_memslice mem; @@ -480,6 +499,7 @@ static struct ra_tex *vk_tex_create(struct ra *ra, tex->params.initial_data = NULL; struct ra_tex_vk *tex_vk = tex->priv = talloc_zero(tex, struct ra_tex_vk); + tex_vk->upload_queue = GRAPHICS; const struct vk_format *fmt = params->format->priv; switch (params->dimensions) { @@ -501,6 +521,10 @@ static struct ra_tex *vk_tex_create(struct ra *ra, if (params->host_mutable || params->blit_dst || params->initial_data) usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; + // Always use the transfer pool if available, for efficiency + if (params->host_mutable && vk->pool_transfer) + tex_vk->upload_queue = TRANSFER; + // Double-check image usage support and fail immediately if invalid VkImageFormatProperties iprop; VkResult res = vkGetPhysicalDeviceImageFormatProperties(vk->physd, @@ -528,6 +552,14 @@ static struct ra_tex *vk_tex_create(struct ra *ra, return NULL; } + // FIXME: Since we can't keep track of queue family ownership properly, + // and we don't know in advance what types of queue families this image + // will belong to, we're forced to share all of our images between all + // command pools. + uint32_t qfs[3] = {0}; + for (int i = 0; i < vk->num_pools; i++) + qfs[i] = vk->pools[i]->qf; + VkImageCreateInfo iinfo = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .imageType = tex_vk->type, @@ -539,9 +571,10 @@ static struct ra_tex *vk_tex_create(struct ra *ra, .tiling = VK_IMAGE_TILING_OPTIMAL, .usage = usage, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 1, - .pQueueFamilyIndices = &vk->pool->qf, + .sharingMode = vk->num_pools > 1 ? VK_SHARING_MODE_CONCURRENT + : VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = vk->num_pools, + .pQueueFamilyIndices = qfs, }; VK(vkCreateImage(vk->dev, &iinfo, MPVK_ALLOCATOR, &tex_vk->img)); @@ -632,6 +665,7 @@ struct ra_buf_vk { struct vk_bufslice slice; int refcount; // 1 = object allocated but not in use, > 1 = in use bool needsflush; + enum queue_type update_queue; // "current" metadata, can change during course of execution VkPipelineStageFlags current_stage; VkAccessFlags current_access; @@ -700,7 +734,7 @@ static void vk_buf_update(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset, memcpy((void *)addr, data, size); buf_vk->needsflush = true; } else { - struct vk_cmd *cmd = vk_require_cmd(ra); + struct vk_cmd *cmd = vk_require_cmd(ra, buf_vk->update_queue); if (!cmd) { MP_ERR(ra, "Failed updating buffer!\n"); return; @@ -736,6 +770,9 @@ static struct ra_buf *vk_buf_create(struct ra *ra, case RA_BUF_TYPE_TEX_UPLOAD: bufFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT; memFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + // Use TRANSFER-style updates for large enough buffers for efficiency + if (params->size > 1024*1024) // 1 MB + buf_vk->update_queue = TRANSFER; break; case RA_BUF_TYPE_UNIFORM: bufFlags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; @@ -746,6 +783,7 @@ static struct ra_buf *vk_buf_create(struct ra *ra, bufFlags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; align = MP_ALIGN_UP(align, vk->limits.minStorageBufferOffsetAlignment); + buf_vk->update_queue = COMPUTE; break; case RA_BUF_TYPE_VERTEX: bufFlags |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; @@ -832,7 +870,7 @@ static bool vk_tex_upload(struct ra *ra, uint64_t size = region.bufferRowLength * region.bufferImageHeight * region.imageExtent.depth; - struct vk_cmd *cmd = vk_require_cmd(ra); + struct vk_cmd *cmd = vk_require_cmd(ra, tex_vk->upload_queue); if (!cmd) goto error; @@ -1478,7 +1516,12 @@ static void vk_renderpass_run(struct ra *ra, struct ra_renderpass *pass = params->pass; struct ra_renderpass_vk *pass_vk = pass->priv; - struct vk_cmd *cmd = vk_require_cmd(ra); + static const enum queue_type types[] = { + [RA_RENDERPASS_TYPE_RASTER] = GRAPHICS, + [RA_RENDERPASS_TYPE_COMPUTE] = COMPUTE, + }; + + struct vk_cmd *cmd = vk_require_cmd(ra, types[pass->params.type]); if (!cmd) goto error; @@ -1603,7 +1646,7 @@ static void vk_blit(struct ra *ra, struct ra_tex *dst, struct ra_tex *src, struct ra_tex_vk *src_vk = src->priv; struct ra_tex_vk *dst_vk = dst->priv; - struct vk_cmd *cmd = vk_require_cmd(ra); + struct vk_cmd *cmd = vk_require_cmd(ra, GRAPHICS); if (!cmd) return; @@ -1643,7 +1686,7 @@ static void vk_clear(struct ra *ra, struct ra_tex *tex, float color[4], struct ra_tex_vk *tex_vk = tex->priv; assert(tex->params.blit_dst); - struct vk_cmd *cmd = vk_require_cmd(ra); + struct vk_cmd *cmd = vk_require_cmd(ra, GRAPHICS); if (!cmd) return; @@ -1726,7 +1769,7 @@ error: static void vk_timer_record(struct ra *ra, VkQueryPool pool, int index, VkPipelineStageFlags stage) { - struct vk_cmd *cmd = vk_require_cmd(ra); + struct vk_cmd *cmd = vk_require_cmd(ra, GRAPHICS); if (!cmd) return; @@ -1795,7 +1838,7 @@ static struct ra_fns ra_fns_vk = { struct vk_cmd *ra_vk_submit(struct ra *ra, struct ra_tex *tex) { struct ra_vk *p = ra->priv; - struct vk_cmd *cmd = vk_require_cmd(ra); + struct vk_cmd *cmd = vk_require_cmd(ra, GRAPHICS); if (!cmd) return NULL; diff --git a/video/out/vulkan/utils.c b/video/out/vulkan/utils.c index 9d9d8d9820..cb73e7d8ac 100644 --- a/video/out/vulkan/utils.c +++ b/video/out/vulkan/utils.c @@ -139,7 +139,15 @@ void mpvk_uninit(struct mpvk_ctx *vk) return; if (vk->dev) { - vk_cmdpool_destroy(vk, vk->pool); + mpvk_flush_commands(vk); + mpvk_poll_commands(vk, UINT64_MAX); + assert(vk->num_cmds_queued == 0); + assert(vk->num_cmds_pending == 0); + talloc_free(vk->cmds_queued); + talloc_free(vk->cmds_pending); + for (int i = 0; i < vk->num_pools; i++) + vk_cmdpool_destroy(vk, vk->pools[i]); + talloc_free(vk->pools); for (int i = 0; i < vk->num_signals; i++) vk_signal_destroy(vk, &vk->signals[i]); talloc_free(vk->signals); @@ -377,6 +385,53 @@ error: return false; } +// Find the most specialized queue supported a combination of flags. In cases +// where there are multiple queue families at the same specialization level, +// this finds the one with the most queues. Returns -1 if no queue was found. +static int find_qf(VkQueueFamilyProperties *qfs, int qfnum, VkQueueFlags flags) +{ + int idx = -1; + for (int i = 0; i < qfnum; i++) { + if (!(qfs[i].queueFlags & flags)) + continue; + + // QF is more specialized + if (idx < 0 || qfs[i].queueFlags < qfs[idx].queueFlags) + idx = i; + + // QF has more queues (at the same specialization level) + if (qfs[i].queueFlags == qfs[idx].queueFlags && + qfs[i].queueCount > qfs[idx].queueCount) + idx = i; + } + + return idx; +} + +static void add_qinfo(void *tactx, VkDeviceQueueCreateInfo **qinfos, + int *num_qinfos, VkQueueFamilyProperties *qfs, int idx, + int qcount) +{ + if (idx < 0) + return; + + // Check to see if we've already added this queue family + for (int i = 0; i < *num_qinfos; i++) { + if ((*qinfos)[i].queueFamilyIndex == idx) + return; + } + + float *priorities = talloc_zero_array(tactx, float, qcount); + VkDeviceQueueCreateInfo qinfo = { + .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, + .queueFamilyIndex = idx, + .queueCount = MPMIN(qcount, qfs[idx].queueCount), + .pQueuePriorities = priorities, + }; + + MP_TARRAY_APPEND(tactx, *qinfos, *num_qinfos, qinfo); +} + bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts) { assert(vk->physd); @@ -395,45 +450,34 @@ bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts) (unsigned)qfs[i].queueFlags, (int)qfs[i].queueCount); } - // For most of our rendering operations, we want to use one "primary" pool, - // so just pick the queue family with the most features. - int idx = -1; - for (int i = 0; i < qfnum; i++) { - if (!(qfs[i].queueFlags & VK_QUEUE_GRAPHICS_BIT)) - continue; - - // QF supports more features - if (idx < 0 || qfs[i].queueFlags > qfs[idx].queueFlags) - idx = i; - - // QF supports more queues (at the same specialization level) - if (qfs[i].queueFlags == qfs[idx].queueFlags && - qfs[i].queueCount > qfs[idx].queueCount) - { - idx = i; - } - } + int idx_gfx = find_qf(qfs, qfnum, VK_QUEUE_GRAPHICS_BIT), + idx_comp = find_qf(qfs, qfnum, VK_QUEUE_COMPUTE_BIT), + idx_tf = find_qf(qfs, qfnum, VK_QUEUE_TRANSFER_BIT); // Vulkan requires at least one GRAPHICS queue, so if this fails something // is horribly wrong. - assert(idx >= 0); + assert(idx_gfx >= 0); + MP_VERBOSE(vk, "Using graphics queue (QF %d)\n", idx_gfx); // Ensure we can actually present to the surface using this queue VkBool32 sup; - VK(vkGetPhysicalDeviceSurfaceSupportKHR(vk->physd, idx, vk->surf, &sup)); + VK(vkGetPhysicalDeviceSurfaceSupportKHR(vk->physd, idx_gfx, vk->surf, &sup)); if (!sup) { MP_ERR(vk, "Queue family does not support surface presentation!\n"); goto error; } + if (idx_tf >= 0 && idx_tf != idx_gfx) + MP_VERBOSE(vk, "Using async transfer (QF %d)\n", idx_tf); + if (idx_comp >= 0 && idx_comp != idx_gfx) + MP_VERBOSE(vk, "Using async compute (QF %d)\n", idx_comp); + // Now that we know which QFs we want, we can create the logical device - float *priorities = talloc_zero_array(tmp, float, opts.queue_count); - VkDeviceQueueCreateInfo qinfo = { - .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, - .queueFamilyIndex = idx, - .queueCount = MPMIN(qfs[idx].queueCount, opts.queue_count), - .pQueuePriorities = priorities, - }; + VkDeviceQueueCreateInfo *qinfos = NULL; + int num_qinfos = 0; + add_qinfo(tmp, &qinfos, &num_qinfos, qfs, idx_gfx, opts.queue_count); + add_qinfo(tmp, &qinfos, &num_qinfos, qfs, idx_comp, opts.queue_count); + add_qinfo(tmp, &qinfos, &num_qinfos, qfs, idx_tf, opts.queue_count); const char **exts = NULL; int num_exts = 0; @@ -443,8 +487,8 @@ bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts) VkDeviceCreateInfo dinfo = { .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, - .queueCreateInfoCount = 1, - .pQueueCreateInfos = &qinfo, + .pQueueCreateInfos = qinfos, + .queueCreateInfoCount = num_qinfos, .ppEnabledExtensionNames = exts, .enabledExtensionCount = num_exts, }; @@ -455,12 +499,20 @@ bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts) VK(vkCreateDevice(vk->physd, &dinfo, MPVK_ALLOCATOR, &vk->dev)); - vk_malloc_init(vk); + // Create the command pools and memory allocator + for (int i = 0; i < num_qinfos; i++) { + int qf = qinfos[i].queueFamilyIndex; + struct vk_cmdpool *pool = vk_cmdpool_create(vk, qinfos[i], qfs[qf]); + if (!pool) + goto error; + MP_TARRAY_APPEND(NULL, vk->pools, vk->num_pools, pool); + } - // Create the command pool(s) - vk->pool = vk_cmdpool_create(vk, qinfo, qfs[idx]); - if (!vk->pool) - goto error; + vk->pool_graphics = vk->pools[idx_gfx]; + vk->pool_compute = idx_comp >= 0 ? vk->pools[idx_comp] : NULL; + vk->pool_transfer = idx_tf >= 0 ? vk->pools[idx_tf] : NULL; + + vk_malloc_init(vk); talloc_free(tmp); return true; @@ -563,10 +615,8 @@ static void vk_cmdpool_destroy(struct mpvk_ctx *vk, struct vk_cmdpool *pool) if (!pool) return; - for (int i = 0; i < pool->num_cmds_available; i++) - vk_cmd_destroy(vk, pool->cmds_available[i]); - for (int i = 0; i < pool->num_cmds_pending; i++) - vk_cmd_destroy(vk, pool->cmds_pending[i]); + for (int i = 0; i < pool->num_cmds; i++) + vk_cmd_destroy(vk, pool->cmds[i]); vkDestroyCommandPool(vk->dev, pool->pool, MPVK_ALLOCATOR); talloc_free(pool); @@ -603,26 +653,67 @@ error: return NULL; } -void mpvk_pool_wait_cmds(struct mpvk_ctx *vk, struct vk_cmdpool *pool, - uint64_t timeout) +void mpvk_poll_commands(struct mpvk_ctx *vk, uint64_t timeout) { - if (!pool) - return; - - while (pool->num_cmds_pending > 0) { - struct vk_cmd *cmd = pool->cmds_pending[0]; + while (vk->num_cmds_pending > 0) { + struct vk_cmd *cmd = vk->cmds_pending[0]; + struct vk_cmdpool *pool = cmd->pool; VkResult res = vk_cmd_poll(vk, cmd, timeout); if (res == VK_TIMEOUT) break; vk_cmd_reset(vk, cmd); - MP_TARRAY_REMOVE_AT(pool->cmds_pending, pool->num_cmds_pending, 0); - MP_TARRAY_APPEND(pool, pool->cmds_available, pool->num_cmds_available, cmd); + MP_TARRAY_REMOVE_AT(vk->cmds_pending, vk->num_cmds_pending, 0); + MP_TARRAY_APPEND(pool, pool->cmds, pool->num_cmds, cmd); } } -void mpvk_dev_wait_cmds(struct mpvk_ctx *vk, uint64_t timeout) +bool mpvk_flush_commands(struct mpvk_ctx *vk) { - mpvk_pool_wait_cmds(vk, vk->pool, timeout); + bool ret = true; + + for (int i = 0; i < vk->num_cmds_queued; i++) { + struct vk_cmd *cmd = vk->cmds_queued[i]; + struct vk_cmdpool *pool = cmd->pool; + + VkSubmitInfo sinfo = { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = &cmd->buf, + .waitSemaphoreCount = cmd->num_deps, + .pWaitSemaphores = cmd->deps, + .pWaitDstStageMask = cmd->depstages, + .signalSemaphoreCount = cmd->num_sigs, + .pSignalSemaphores = cmd->sigs, + }; + + VK(vkQueueSubmit(cmd->queue, 1, &sinfo, cmd->fence)); + MP_TARRAY_APPEND(NULL, vk->cmds_pending, vk->num_cmds_pending, cmd); + + if (mp_msg_test(vk->log, MSGL_TRACE)) { + MP_TRACE(vk, "Submitted command on queue %p (QF %d):\n", + (void *)cmd->queue, pool->qf); + for (int n = 0; n < cmd->num_deps; n++) + MP_TRACE(vk, " waits on semaphore %p\n", (void *)cmd->deps[n]); + for (int n = 0; n < cmd->num_sigs; n++) + MP_TRACE(vk, " signals semaphore %p\n", (void *)cmd->sigs[n]); + } + continue; + +error: + vk_cmd_reset(vk, cmd); + MP_TARRAY_APPEND(pool, pool->cmds, pool->num_cmds, cmd); + ret = false; + } + + vk->num_cmds_queued = 0; + + // Rotate the queues to ensure good parallelism across frames + for (int i = 0; i < vk->num_pools; i++) { + struct vk_cmdpool *pool = vk->pools[i]; + pool->idx_queues = (pool->idx_queues + 1) % pool->num_queues; + } + + return ret; } void vk_dev_callback(struct mpvk_ctx *vk, vk_cb callback, void *p, void *arg) @@ -639,10 +730,10 @@ struct vk_cmd *vk_cmd_begin(struct mpvk_ctx *vk, struct vk_cmdpool *pool) { // garbage collect the cmdpool first, to increase the chances of getting // an already-available command buffer - mpvk_pool_wait_cmds(vk, pool, 0); + mpvk_poll_commands(vk, 0); struct vk_cmd *cmd = NULL; - if (MP_TARRAY_POP(pool->cmds_available, pool->num_cmds_available, &cmd)) + if (MP_TARRAY_POP(pool->cmds, pool->num_cmds, &cmd)) goto done; // No free command buffers => allocate another one @@ -675,58 +766,13 @@ void vk_cmd_queue(struct mpvk_ctx *vk, struct vk_cmd *cmd) VK(vkEndCommandBuffer(cmd->buf)); VK(vkResetFences(vk->dev, 1, &cmd->fence)); - MP_TARRAY_APPEND(pool, pool->cmds_queued, pool->num_cmds_queued, cmd); + MP_TARRAY_APPEND(NULL, vk->cmds_queued, vk->num_cmds_queued, cmd); vk->last_cmd = cmd; return; error: vk_cmd_reset(vk, cmd); - MP_TARRAY_APPEND(pool, pool->cmds_available, pool->num_cmds_available, cmd); -} - -bool vk_flush_commands(struct mpvk_ctx *vk) -{ - bool ret = true; - - struct vk_cmdpool *pool = vk->pool; - for (int i = 0; i < pool->num_cmds_queued; i++) { - struct vk_cmd *cmd = pool->cmds_queued[i]; - - VkSubmitInfo sinfo = { - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .commandBufferCount = 1, - .pCommandBuffers = &cmd->buf, - .waitSemaphoreCount = cmd->num_deps, - .pWaitSemaphores = cmd->deps, - .pWaitDstStageMask = cmd->depstages, - .signalSemaphoreCount = cmd->num_sigs, - .pSignalSemaphores = cmd->sigs, - }; - - VK(vkQueueSubmit(cmd->queue, 1, &sinfo, cmd->fence)); - MP_TARRAY_APPEND(pool, pool->cmds_pending, pool->num_cmds_pending, cmd); - - if (mp_msg_test(vk->log, MSGL_TRACE)) { - MP_TRACE(vk, "Submitted command on queue %p (QF %d):\n", - (void *)cmd->queue, pool->qf); - for (int n = 0; n < cmd->num_deps; n++) - MP_TRACE(vk, " waits on semaphore %p\n", (void *)cmd->deps[n]); - for (int n = 0; n < cmd->num_sigs; n++) - MP_TRACE(vk, " signals semaphore %p\n", (void *)cmd->sigs[n]); - } - continue; - -error: - vk_cmd_reset(vk, cmd); - MP_TARRAY_APPEND(pool, pool->cmds_available, pool->num_cmds_available, cmd); - ret = false; - } - - pool->num_cmds_queued = 0; - - // Rotate the queues to ensure good parallelism across frames - pool->idx_queues = (pool->idx_queues + 1) % pool->num_queues; - return ret; + MP_TARRAY_APPEND(pool, pool->cmds, pool->num_cmds, cmd); } void vk_signal_destroy(struct mpvk_ctx *vk, struct vk_signal **sig) @@ -762,10 +808,16 @@ struct vk_signal *vk_cmd_signal(struct mpvk_ctx *vk, struct vk_cmd *cmd, VK(vkCreateEvent(vk->dev, &einfo, MPVK_ALLOCATOR, &sig->event)); done: - // Signal both the semaphore and the event. (We will only end up using one) + // Signal both the semaphore and the event if possible. (We will only + // end up using one or the other) vk_cmd_sig(cmd, sig->semaphore); - vkCmdSetEvent(cmd->buf, sig->event, stage); - sig->event_source = cmd->queue; + + VkQueueFlags req = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT; + if (cmd->pool->props.queueFlags & req) { + vkCmdSetEvent(cmd->buf, sig->event, stage); + sig->event_source = cmd->queue; + } + return sig; error: @@ -787,14 +839,14 @@ static bool unsignal_cmd(struct vk_cmd *cmd, VkSemaphore sem) // Attempts to remove a queued signal operation. Returns true if sucessful, // i.e. the signal could be removed before it ever got fired. -static bool unsignal(struct vk_cmd *cmd, VkSemaphore sem) +static bool unsignal(struct mpvk_ctx *vk, struct vk_cmd *cmd, VkSemaphore sem) { if (unsignal_cmd(cmd, sem)) return true; // Attempt to remove it from any queued commands - for (int i = 0; i < cmd->pool->num_cmds_queued; i++) { - if (unsignal_cmd(cmd->pool->cmds_queued[i], sem)) + for (int i = 0; i < vk->num_cmds_queued; i++) { + if (unsignal_cmd(vk->cmds_queued[i], sem)) return true; } @@ -806,7 +858,9 @@ static void release_signal(struct mpvk_ctx *vk, struct vk_signal *sig) // The semaphore never needs to be recreated, because it's either // unsignaled while still queued, or unsignaled as a result of a device // wait. But the event *may* need to be reset, so just always reset it. - vkResetEvent(vk->dev, sig->event); + if (sig->event_source) + vkResetEvent(vk->dev, sig->event); + sig->event_source = NULL; MP_TARRAY_APPEND(NULL, vk->signals, vk->num_signals, sig); } @@ -819,7 +873,7 @@ void vk_cmd_wait(struct mpvk_ctx *vk, struct vk_cmd *cmd, return; if (out_event && sig->event && sig->event_source == cmd->queue && - unsignal(cmd, sig->semaphore)) + unsignal(vk, cmd, sig->semaphore)) { // If we can remove the semaphore signal operation from the history and // pretend it never happened, then we get to use the VkEvent. This also diff --git a/video/out/vulkan/utils.h b/video/out/vulkan/utils.h index 538897afae..de3a757be3 100644 --- a/video/out/vulkan/utils.h +++ b/video/out/vulkan/utils.h @@ -66,9 +66,13 @@ bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts); // UINT64_MAX effectively means waiting until the pool/device is idle. The // timeout may also be passed as 0, in which case this function will not block, // but only poll for completed commands. -void mpvk_pool_wait_cmds(struct mpvk_ctx *vk, struct vk_cmdpool *pool, - uint64_t timeout); -void mpvk_dev_wait_cmds(struct mpvk_ctx *vk, uint64_t timeout); +void mpvk_poll_commands(struct mpvk_ctx *vk, uint64_t timeout); + +// Flush all currently queued commands. Call this once per frame, after +// submitting all of the command buffers for that frame. Calling this more +// often than that is possible but bad for performance. +// Returns whether successful. Failed commands will be implicitly dropped. +bool mpvk_flush_commands(struct mpvk_ctx *vk); // Since lots of vulkan operations need to be done lazily once the affected // resources are no longer in use, provide an abstraction for tracking these. @@ -158,13 +162,10 @@ struct vk_cmdpool { VkQueue *queues; int num_queues; int idx_queues; - // Command buffers associated with this queue - struct vk_cmd **cmds_available; // available for re-recording - struct vk_cmd **cmds_queued; // recorded but not yet submitted - struct vk_cmd **cmds_pending; // submitted but not completed - int num_cmds_available; - int num_cmds_queued; - int num_cmds_pending; + // Command buffers associated with this queue. These are available for + // re-recording + struct vk_cmd **cmds; + int num_cmds; }; // Fetch a command buffer from a command pool and begin recording to it. @@ -175,12 +176,6 @@ struct vk_cmd *vk_cmd_begin(struct mpvk_ctx *vk, struct vk_cmdpool *pool); // takes over ownership of *cmd, i.e. the caller should not touch it again. void vk_cmd_queue(struct mpvk_ctx *vk, struct vk_cmd *cmd); -// Flush all currently queued commands. Call this once per frame, after -// submitting all of the command buffers for that frame. Calling this more -// often than that is possible but bad for performance. -// Returns whether successful. Failed commands will be implicitly dropped. -bool vk_flush_commands(struct mpvk_ctx *vk); - // Predefined structs for a simple non-layered, non-mipped image extern const VkImageSubresourceRange vk_range; extern const VkImageSubresourceLayers vk_layers; |