summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--DOCS/man/options.rst3
-rw-r--r--video/out/vulkan/common.h19
-rw-r--r--video/out/vulkan/context.c11
-rw-r--r--video/out/vulkan/malloc.c12
-rw-r--r--video/out/vulkan/ra_vk.c107
-rw-r--r--video/out/vulkan/utils.c268
-rw-r--r--video/out/vulkan/utils.h27
7 files changed, 283 insertions, 164 deletions
diff --git a/DOCS/man/options.rst b/DOCS/man/options.rst
index a92a8df962..f50f67a203 100644
--- a/DOCS/man/options.rst
+++ b/DOCS/man/options.rst
@@ -4295,7 +4295,8 @@ The following video options are currently all specific to ``--vo=gpu`` and
Controls the number of VkQueues used for rendering (limited by how many
your device supports). In theory, using more queues could enable some
parallelism between frames (when using a ``--swapchain-depth`` higher than
- 1). (Default: 1)
+ 1), but it can also slow things down on hardware where there's no true
+ parallelism between queues. (Default: 1)
``--d3d11-warp=<yes|no|auto>``
Use WARP (Windows Advanced Rasterization Platform) with the D3D11 GPU
diff --git a/video/out/vulkan/common.h b/video/out/vulkan/common.h
index de49c6f1af..b849b6dc0b 100644
--- a/video/out/vulkan/common.h
+++ b/video/out/vulkan/common.h
@@ -48,10 +48,23 @@ struct mpvk_ctx {
VkSurfaceKHR surf;
VkSurfaceFormatKHR surf_format; // picked at surface initialization time
- struct vk_malloc *alloc; // memory allocator for this device
- struct vk_cmdpool *pool; // primary command pool for this device
- struct vk_cmd *last_cmd; // most recently submitted (pending) command
+ struct vk_malloc *alloc; // memory allocator for this device
struct spirv_compiler *spirv; // GLSL -> SPIR-V compiler
+ struct vk_cmdpool **pools; // command pools (one per queue family)
+ int num_pools;
+ struct vk_cmd *last_cmd; // most recently submitted command
+
+ // Queued/pending commands. These are shared for the entire mpvk_ctx to
+ // ensure submission and callbacks are FIFO
+ struct vk_cmd **cmds_queued; // recorded but not yet submitted
+ struct vk_cmd **cmds_pending; // submitted but not completed
+ int num_cmds_queued;
+ int num_cmds_pending;
+
+ // Pointers into *pools
+ struct vk_cmdpool *pool_graphics; // required
+ struct vk_cmdpool *pool_compute; // optional
+ struct vk_cmdpool *pool_transfer; // optional
// Common pool of signals, to avoid having to re-create these objects often
struct vk_signal **signals;
diff --git a/video/out/vulkan/context.c b/video/out/vulkan/context.c
index ddc80be042..4f96440652 100644
--- a/video/out/vulkan/context.c
+++ b/video/out/vulkan/context.c
@@ -245,7 +245,8 @@ void ra_vk_ctx_uninit(struct ra_ctx *ctx)
struct priv *p = ctx->swapchain->priv;
struct mpvk_ctx *vk = p->vk;
- mpvk_dev_wait_cmds(vk, UINT64_MAX);
+ mpvk_flush_commands(vk);
+ mpvk_poll_commands(vk, UINT64_MAX);
for (int i = 0; i < p->num_images; i++)
ra_tex_free(ctx->ra, &p->images[i]);
@@ -355,7 +356,7 @@ bool ra_vk_ctx_resize(struct ra_swapchain *sw, int w, int h)
// more than one swapchain already active, so we need to flush any pending
// asynchronous swapchain release operations that may be ongoing.
while (p->old_swapchain)
- mpvk_dev_wait_cmds(vk, 100000); // 100μs
+ mpvk_poll_commands(vk, 100000); // 100μs
VkSwapchainCreateInfoKHR sinfo = p->protoInfo;
sinfo.imageExtent = (VkExtent2D){ w, h };
@@ -501,14 +502,14 @@ static bool submit_frame(struct ra_swapchain *sw, const struct vo_frame *frame)
vk_cmd_callback(cmd, (vk_cb) present_cb, p, NULL);
vk_cmd_queue(vk, cmd);
- if (!vk_flush_commands(vk))
+ if (!mpvk_flush_commands(vk))
goto error;
// Older nvidia drivers can spontaneously combust when submitting to the
// same queue as we're rendering from, in a multi-queue scenario. Safest
// option is to flush the commands first and then submit to the next queue.
// We can drop this hack in the future, I suppose.
- struct vk_cmdpool *pool = vk->pool;
+ struct vk_cmdpool *pool = vk->pool_graphics;
VkQueue queue = pool->queues[pool->idx_queues];
VkPresentInfoKHR pinfo = {
@@ -533,7 +534,7 @@ static void swap_buffers(struct ra_swapchain *sw)
struct priv *p = sw->priv;
while (p->frames_in_flight >= sw->ctx->opts.swapchain_depth)
- mpvk_dev_wait_cmds(p->vk, 100000); // 100μs
+ mpvk_poll_commands(p->vk, 100000); // 100μs
}
static const struct ra_swapchain_fns vulkan_swapchain = {
diff --git a/video/out/vulkan/malloc.c b/video/out/vulkan/malloc.c
index f6cb1143bb..a9aced33d8 100644
--- a/video/out/vulkan/malloc.c
+++ b/video/out/vulkan/malloc.c
@@ -133,11 +133,23 @@ static struct vk_slab *slab_alloc(struct mpvk_ctx *vk, struct vk_heap *heap,
uint32_t typeBits = heap->typeBits ? heap->typeBits : UINT32_MAX;
if (heap->usage) {
+ // FIXME: Since we can't keep track of queue family ownership properly,
+ // and we don't know in advance what types of queue families this buffer
+ // will belong to, we're forced to share all of our buffers between all
+ // command pools.
+ uint32_t qfs[3] = {0};
+ for (int i = 0; i < vk->num_pools; i++)
+ qfs[i] = vk->pools[i]->qf;
+
VkBufferCreateInfo binfo = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.size = slab->size,
.usage = heap->usage,
+ .sharingMode = vk->num_pools > 1 ? VK_SHARING_MODE_CONCURRENT
+ : VK_SHARING_MODE_EXCLUSIVE,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = vk->num_pools,
+ .pQueueFamilyIndices = qfs,
};
VK(vkCreateBuffer(vk->dev, &binfo, MPVK_ALLOCATOR, &slab->buffer));
diff --git a/video/out/vulkan/ra_vk.c b/video/out/vulkan/ra_vk.c
index 42e6c2f64c..905fc89596 100644
--- a/video/out/vulkan/ra_vk.c
+++ b/video/out/vulkan/ra_vk.c
@@ -6,6 +6,12 @@
static struct ra_fns ra_fns_vk;
+enum queue_type {
+ GRAPHICS,
+ COMPUTE,
+ TRANSFER,
+};
+
// For ra.priv
struct ra_vk {
struct mpvk_ctx *vk;
@@ -22,18 +28,6 @@ struct mpvk_ctx *ra_vk_get(struct ra *ra)
return p->vk;
}
-// Returns a command buffer, or NULL on error
-static struct vk_cmd *vk_require_cmd(struct ra *ra)
-{
- struct ra_vk *p = ra->priv;
- struct mpvk_ctx *vk = ra_vk_get(ra);
-
- if (!p->cmd)
- p->cmd = vk_cmd_begin(vk, vk->pool);
-
- return p->cmd;
-}
-
static void vk_submit(struct ra *ra)
{
struct ra_vk *p = ra->priv;
@@ -45,22 +39,46 @@ static void vk_submit(struct ra *ra)
}
}
-// The callback's *priv will always be set to `ra`
-static void vk_callback(struct ra *ra, vk_cb callback, void *arg)
+// Returns a command buffer, or NULL on error
+static struct vk_cmd *vk_require_cmd(struct ra *ra, enum queue_type type)
{
struct ra_vk *p = ra->priv;
struct mpvk_ctx *vk = ra_vk_get(ra);
- if (p->cmd) {
- vk_cmd_callback(p->cmd, callback, ra, arg);
- } else {
- vk_dev_callback(vk, callback, ra, arg);
+ struct vk_cmdpool *pool;
+ switch (type) {
+ case GRAPHICS: pool = vk->pool_graphics; break;
+ case COMPUTE: pool = vk->pool_compute; break;
+
+ // GRAPHICS and COMPUTE also imply TRANSFER capability (vulkan spec)
+ case TRANSFER:
+ pool = vk->pool_transfer;
+ if (!pool)
+ pool = vk->pool_compute;
+ if (!pool)
+ pool = vk->pool_graphics;
+ break;
+ default: abort();
}
+
+ assert(pool);
+ if (p->cmd && p->cmd->pool == pool)
+ return p->cmd;
+
+ vk_submit(ra);
+ p->cmd = vk_cmd_begin(vk, pool);
+ return p->cmd;
}
#define MAKE_LAZY_DESTRUCTOR(fun, argtype) \
static void fun##_lazy(struct ra *ra, argtype *arg) { \
- vk_callback(ra, (vk_cb) fun, arg); \
+ struct ra_vk *p = ra->priv; \
+ struct mpvk_ctx *vk = ra_vk_get(ra); \
+ if (p->cmd) { \
+ vk_cmd_callback(p->cmd, (vk_cb) fun, ra, arg); \
+ } else { \
+ vk_dev_callback(vk, (vk_cb) fun, ra, arg); \
+ } \
}
static void vk_destroy_ra(struct ra *ra)
@@ -69,8 +87,8 @@ static void vk_destroy_ra(struct ra *ra)
struct mpvk_ctx *vk = ra_vk_get(ra);
vk_submit(ra);
- vk_flush_commands(vk);
- mpvk_dev_wait_cmds(vk, UINT64_MAX);
+ mpvk_flush_commands(vk);
+ mpvk_poll_commands(vk, UINT64_MAX);
ra_tex_free(ra, &p->clear_tex);
talloc_free(ra);
@@ -190,7 +208,7 @@ struct ra *ra_create_vk(struct mpvk_ctx *vk, struct mp_log *log)
ra->max_shmem = vk->limits.maxComputeSharedMemorySize;
ra->max_pushc_size = vk->limits.maxPushConstantsSize;
- if (vk->pool->props.queueFlags & VK_QUEUE_COMPUTE_BIT)
+ if (vk->pool_compute)
ra->caps |= RA_CAP_COMPUTE;
if (!vk_setup_formats(ra))
@@ -280,6 +298,7 @@ static VkResult vk_create_render_pass(VkDevice dev, const struct ra_format *fmt,
// For ra_tex.priv
struct ra_tex_vk {
bool external_img;
+ enum queue_type upload_queue;
VkImageType type;
VkImage img;
struct vk_memslice mem;
@@ -480,6 +499,7 @@ static struct ra_tex *vk_tex_create(struct ra *ra,
tex->params.initial_data = NULL;
struct ra_tex_vk *tex_vk = tex->priv = talloc_zero(tex, struct ra_tex_vk);
+ tex_vk->upload_queue = GRAPHICS;
const struct vk_format *fmt = params->format->priv;
switch (params->dimensions) {
@@ -501,6 +521,10 @@ static struct ra_tex *vk_tex_create(struct ra *ra,
if (params->host_mutable || params->blit_dst || params->initial_data)
usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
+ // Always use the transfer pool if available, for efficiency
+ if (params->host_mutable && vk->pool_transfer)
+ tex_vk->upload_queue = TRANSFER;
+
// Double-check image usage support and fail immediately if invalid
VkImageFormatProperties iprop;
VkResult res = vkGetPhysicalDeviceImageFormatProperties(vk->physd,
@@ -528,6 +552,14 @@ static struct ra_tex *vk_tex_create(struct ra *ra,
return NULL;
}
+ // FIXME: Since we can't keep track of queue family ownership properly,
+ // and we don't know in advance what types of queue families this image
+ // will belong to, we're forced to share all of our images between all
+ // command pools.
+ uint32_t qfs[3] = {0};
+ for (int i = 0; i < vk->num_pools; i++)
+ qfs[i] = vk->pools[i]->qf;
+
VkImageCreateInfo iinfo = {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.imageType = tex_vk->type,
@@ -539,9 +571,10 @@ static struct ra_tex *vk_tex_create(struct ra *ra,
.tiling = VK_IMAGE_TILING_OPTIMAL,
.usage = usage,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
- .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
- .queueFamilyIndexCount = 1,
- .pQueueFamilyIndices = &vk->pool->qf,
+ .sharingMode = vk->num_pools > 1 ? VK_SHARING_MODE_CONCURRENT
+ : VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = vk->num_pools,
+ .pQueueFamilyIndices = qfs,
};
VK(vkCreateImage(vk->dev, &iinfo, MPVK_ALLOCATOR, &tex_vk->img));
@@ -632,6 +665,7 @@ struct ra_buf_vk {
struct vk_bufslice slice;
int refcount; // 1 = object allocated but not in use, > 1 = in use
bool needsflush;
+ enum queue_type update_queue;
// "current" metadata, can change during course of execution
VkPipelineStageFlags current_stage;
VkAccessFlags current_access;
@@ -700,7 +734,7 @@ static void vk_buf_update(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset,
memcpy((void *)addr, data, size);
buf_vk->needsflush = true;
} else {
- struct vk_cmd *cmd = vk_require_cmd(ra);
+ struct vk_cmd *cmd = vk_require_cmd(ra, buf_vk->update_queue);
if (!cmd) {
MP_ERR(ra, "Failed updating buffer!\n");
return;
@@ -736,6 +770,9 @@ static struct ra_buf *vk_buf_create(struct ra *ra,
case RA_BUF_TYPE_TEX_UPLOAD:
bufFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
memFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
+ // Use TRANSFER-style updates for large enough buffers for efficiency
+ if (params->size > 1024*1024) // 1 MB
+ buf_vk->update_queue = TRANSFER;
break;
case RA_BUF_TYPE_UNIFORM:
bufFlags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
@@ -746,6 +783,7 @@ static struct ra_buf *vk_buf_create(struct ra *ra,
bufFlags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
align = MP_ALIGN_UP(align, vk->limits.minStorageBufferOffsetAlignment);
+ buf_vk->update_queue = COMPUTE;
break;
case RA_BUF_TYPE_VERTEX:
bufFlags |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
@@ -832,7 +870,7 @@ static bool vk_tex_upload(struct ra *ra,
uint64_t size = region.bufferRowLength * region.bufferImageHeight *
region.imageExtent.depth;
- struct vk_cmd *cmd = vk_require_cmd(ra);
+ struct vk_cmd *cmd = vk_require_cmd(ra, tex_vk->upload_queue);
if (!cmd)
goto error;
@@ -1478,7 +1516,12 @@ static void vk_renderpass_run(struct ra *ra,
struct ra_renderpass *pass = params->pass;
struct ra_renderpass_vk *pass_vk = pass->priv;
- struct vk_cmd *cmd = vk_require_cmd(ra);
+ static const enum queue_type types[] = {
+ [RA_RENDERPASS_TYPE_RASTER] = GRAPHICS,
+ [RA_RENDERPASS_TYPE_COMPUTE] = COMPUTE,
+ };
+
+ struct vk_cmd *cmd = vk_require_cmd(ra, types[pass->params.type]);
if (!cmd)
goto error;
@@ -1603,7 +1646,7 @@ static void vk_blit(struct ra *ra, struct ra_tex *dst, struct ra_tex *src,
struct ra_tex_vk *src_vk = src->priv;
struct ra_tex_vk *dst_vk = dst->priv;
- struct vk_cmd *cmd = vk_require_cmd(ra);
+ struct vk_cmd *cmd = vk_require_cmd(ra, GRAPHICS);
if (!cmd)
return;
@@ -1643,7 +1686,7 @@ static void vk_clear(struct ra *ra, struct ra_tex *tex, float color[4],
struct ra_tex_vk *tex_vk = tex->priv;
assert(tex->params.blit_dst);
- struct vk_cmd *cmd = vk_require_cmd(ra);
+ struct vk_cmd *cmd = vk_require_cmd(ra, GRAPHICS);
if (!cmd)
return;
@@ -1726,7 +1769,7 @@ error:
static void vk_timer_record(struct ra *ra, VkQueryPool pool, int index,
VkPipelineStageFlags stage)
{
- struct vk_cmd *cmd = vk_require_cmd(ra);
+ struct vk_cmd *cmd = vk_require_cmd(ra, GRAPHICS);
if (!cmd)
return;
@@ -1795,7 +1838,7 @@ static struct ra_fns ra_fns_vk = {
struct vk_cmd *ra_vk_submit(struct ra *ra, struct ra_tex *tex)
{
struct ra_vk *p = ra->priv;
- struct vk_cmd *cmd = vk_require_cmd(ra);
+ struct vk_cmd *cmd = vk_require_cmd(ra, GRAPHICS);
if (!cmd)
return NULL;
diff --git a/video/out/vulkan/utils.c b/video/out/vulkan/utils.c
index 9d9d8d9820..cb73e7d8ac 100644
--- a/video/out/vulkan/utils.c
+++ b/video/out/vulkan/utils.c
@@ -139,7 +139,15 @@ void mpvk_uninit(struct mpvk_ctx *vk)
return;
if (vk->dev) {
- vk_cmdpool_destroy(vk, vk->pool);
+ mpvk_flush_commands(vk);
+ mpvk_poll_commands(vk, UINT64_MAX);
+ assert(vk->num_cmds_queued == 0);
+ assert(vk->num_cmds_pending == 0);
+ talloc_free(vk->cmds_queued);
+ talloc_free(vk->cmds_pending);
+ for (int i = 0; i < vk->num_pools; i++)
+ vk_cmdpool_destroy(vk, vk->pools[i]);
+ talloc_free(vk->pools);
for (int i = 0; i < vk->num_signals; i++)
vk_signal_destroy(vk, &vk->signals[i]);
talloc_free(vk->signals);
@@ -377,6 +385,53 @@ error:
return false;
}
+// Find the most specialized queue supported a combination of flags. In cases
+// where there are multiple queue families at the same specialization level,
+// this finds the one with the most queues. Returns -1 if no queue was found.
+static int find_qf(VkQueueFamilyProperties *qfs, int qfnum, VkQueueFlags flags)
+{
+ int idx = -1;
+ for (int i = 0; i < qfnum; i++) {
+ if (!(qfs[i].queueFlags & flags))
+ continue;
+
+ // QF is more specialized
+ if (idx < 0 || qfs[i].queueFlags < qfs[idx].queueFlags)
+ idx = i;
+
+ // QF has more queues (at the same specialization level)
+ if (qfs[i].queueFlags == qfs[idx].queueFlags &&
+ qfs[i].queueCount > qfs[idx].queueCount)
+ idx = i;
+ }
+
+ return idx;
+}
+
+static void add_qinfo(void *tactx, VkDeviceQueueCreateInfo **qinfos,
+ int *num_qinfos, VkQueueFamilyProperties *qfs, int idx,
+ int qcount)
+{
+ if (idx < 0)
+ return;
+
+ // Check to see if we've already added this queue family
+ for (int i = 0; i < *num_qinfos; i++) {
+ if ((*qinfos)[i].queueFamilyIndex == idx)
+ return;
+ }
+
+ float *priorities = talloc_zero_array(tactx, float, qcount);
+ VkDeviceQueueCreateInfo qinfo = {
+ .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+ .queueFamilyIndex = idx,
+ .queueCount = MPMIN(qcount, qfs[idx].queueCount),
+ .pQueuePriorities = priorities,
+ };
+
+ MP_TARRAY_APPEND(tactx, *qinfos, *num_qinfos, qinfo);
+}
+
bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts)
{
assert(vk->physd);
@@ -395,45 +450,34 @@ bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts)
(unsigned)qfs[i].queueFlags, (int)qfs[i].queueCount);
}
- // For most of our rendering operations, we want to use one "primary" pool,
- // so just pick the queue family with the most features.
- int idx = -1;
- for (int i = 0; i < qfnum; i++) {
- if (!(qfs[i].queueFlags & VK_QUEUE_GRAPHICS_BIT))
- continue;
-
- // QF supports more features
- if (idx < 0 || qfs[i].queueFlags > qfs[idx].queueFlags)
- idx = i;
-
- // QF supports more queues (at the same specialization level)
- if (qfs[i].queueFlags == qfs[idx].queueFlags &&
- qfs[i].queueCount > qfs[idx].queueCount)
- {
- idx = i;
- }
- }
+ int idx_gfx = find_qf(qfs, qfnum, VK_QUEUE_GRAPHICS_BIT),
+ idx_comp = find_qf(qfs, qfnum, VK_QUEUE_COMPUTE_BIT),
+ idx_tf = find_qf(qfs, qfnum, VK_QUEUE_TRANSFER_BIT);
// Vulkan requires at least one GRAPHICS queue, so if this fails something
// is horribly wrong.
- assert(idx >= 0);
+ assert(idx_gfx >= 0);
+ MP_VERBOSE(vk, "Using graphics queue (QF %d)\n", idx_gfx);
// Ensure we can actually present to the surface using this queue
VkBool32 sup;
- VK(vkGetPhysicalDeviceSurfaceSupportKHR(vk->physd, idx, vk->surf, &sup));
+ VK(vkGetPhysicalDeviceSurfaceSupportKHR(vk->physd, idx_gfx, vk->surf, &sup));
if (!sup) {
MP_ERR(vk, "Queue family does not support surface presentation!\n");
goto error;
}
+ if (idx_tf >= 0 && idx_tf != idx_gfx)
+ MP_VERBOSE(vk, "Using async transfer (QF %d)\n", idx_tf);
+ if (idx_comp >= 0 && idx_comp != idx_gfx)
+ MP_VERBOSE(vk, "Using async compute (QF %d)\n", idx_comp);
+
// Now that we know which QFs we want, we can create the logical device
- float *priorities = talloc_zero_array(tmp, float, opts.queue_count);
- VkDeviceQueueCreateInfo qinfo = {
- .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
- .queueFamilyIndex = idx,
- .queueCount = MPMIN(qfs[idx].queueCount, opts.queue_count),
- .pQueuePriorities = priorities,
- };
+ VkDeviceQueueCreateInfo *qinfos = NULL;
+ int num_qinfos = 0;
+ add_qinfo(tmp, &qinfos, &num_qinfos, qfs, idx_gfx, opts.queue_count);
+ add_qinfo(tmp, &qinfos, &num_qinfos, qfs, idx_comp, opts.queue_count);
+ add_qinfo(tmp, &qinfos, &num_qinfos, qfs, idx_tf, opts.queue_count);
const char **exts = NULL;
int num_exts = 0;
@@ -443,8 +487,8 @@ bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts)
VkDeviceCreateInfo dinfo = {
.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
- .queueCreateInfoCount = 1,
- .pQueueCreateInfos = &qinfo,
+ .pQueueCreateInfos = qinfos,
+ .queueCreateInfoCount = num_qinfos,
.ppEnabledExtensionNames = exts,
.enabledExtensionCount = num_exts,
};
@@ -455,12 +499,20 @@ bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts)
VK(vkCreateDevice(vk->physd, &dinfo, MPVK_ALLOCATOR, &vk->dev));
- vk_malloc_init(vk);
+ // Create the command pools and memory allocator
+ for (int i = 0; i < num_qinfos; i++) {
+ int qf = qinfos[i].queueFamilyIndex;
+ struct vk_cmdpool *pool = vk_cmdpool_create(vk, qinfos[i], qfs[qf]);
+ if (!pool)
+ goto error;
+ MP_TARRAY_APPEND(NULL, vk->pools, vk->num_pools, pool);
+ }
- // Create the command pool(s)
- vk->pool = vk_cmdpool_create(vk, qinfo, qfs[idx]);
- if (!vk->pool)
- goto error;
+ vk->pool_graphics = vk->pools[idx_gfx];
+ vk->pool_compute = idx_comp >= 0 ? vk->pools[idx_comp] : NULL;
+ vk->pool_transfer = idx_tf >= 0 ? vk->pools[idx_tf] : NULL;
+
+ vk_malloc_init(vk);
talloc_free(tmp);
return true;
@@ -563,10 +615,8 @@ static void vk_cmdpool_destroy(struct mpvk_ctx *vk, struct vk_cmdpool *pool)
if (!pool)
return;
- for (int i = 0; i < pool->num_cmds_available; i++)
- vk_cmd_destroy(vk, pool->cmds_available[i]);
- for (int i = 0; i < pool->num_cmds_pending; i++)
- vk_cmd_destroy(vk, pool->cmds_pending[i]);
+ for (int i = 0; i < pool->num_cmds; i++)
+ vk_cmd_destroy(vk, pool->cmds[i]);
vkDestroyCommandPool(vk->dev, pool->pool, MPVK_ALLOCATOR);
talloc_free(pool);
@@ -603,26 +653,67 @@ error:
return NULL;
}
-void mpvk_pool_wait_cmds(struct mpvk_ctx *vk, struct vk_cmdpool *pool,
- uint64_t timeout)
+void mpvk_poll_commands(struct mpvk_ctx *vk, uint64_t timeout)
{
- if (!pool)
- return;
-
- while (pool->num_cmds_pending > 0) {
- struct vk_cmd *cmd = pool->cmds_pending[0];
+ while (vk->num_cmds_pending > 0) {
+ struct vk_cmd *cmd = vk->cmds_pending[0];
+ struct vk_cmdpool *pool = cmd->pool;
VkResult res = vk_cmd_poll(vk, cmd, timeout);
if (res == VK_TIMEOUT)
break;
vk_cmd_reset(vk, cmd);
- MP_TARRAY_REMOVE_AT(pool->cmds_pending, pool->num_cmds_pending, 0);
- MP_TARRAY_APPEND(pool, pool->cmds_available, pool->num_cmds_available, cmd);
+ MP_TARRAY_REMOVE_AT(vk->cmds_pending, vk->num_cmds_pending, 0);
+ MP_TARRAY_APPEND(pool, pool->cmds, pool->num_cmds, cmd);
}
}
-void mpvk_dev_wait_cmds(struct mpvk_ctx *vk, uint64_t timeout)
+bool mpvk_flush_commands(struct mpvk_ctx *vk)
{
- mpvk_pool_wait_cmds(vk, vk->pool, timeout);
+ bool ret = true;
+
+ for (int i = 0; i < vk->num_cmds_queued; i++) {
+ struct vk_cmd *cmd = vk->cmds_queued[i];
+ struct vk_cmdpool *pool = cmd->pool;
+
+ VkSubmitInfo sinfo = {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .commandBufferCount = 1,
+ .pCommandBuffers = &cmd->buf,
+ .waitSemaphoreCount = cmd->num_deps,
+ .pWaitSemaphores = cmd->deps,
+ .pWaitDstStageMask = cmd->depstages,
+ .signalSemaphoreCount = cmd->num_sigs,
+ .pSignalSemaphores = cmd->sigs,
+ };
+
+ VK(vkQueueSubmit(cmd->queue, 1, &sinfo, cmd->fence));
+ MP_TARRAY_APPEND(NULL, vk->cmds_pending, vk->num_cmds_pending, cmd);
+
+ if (mp_msg_test(vk->log, MSGL_TRACE)) {
+ MP_TRACE(vk, "Submitted command on queue %p (QF %d):\n",
+ (void *)cmd->queue, pool->qf);
+ for (int n = 0; n < cmd->num_deps; n++)
+ MP_TRACE(vk, " waits on semaphore %p\n", (void *)cmd->deps[n]);
+ for (int n = 0; n < cmd->num_sigs; n++)
+ MP_TRACE(vk, " signals semaphore %p\n", (void *)cmd->sigs[n]);
+ }
+ continue;
+
+error:
+ vk_cmd_reset(vk, cmd);
+ MP_TARRAY_APPEND(pool, pool->cmds, pool->num_cmds, cmd);
+ ret = false;
+ }
+
+ vk->num_cmds_queued = 0;
+
+ // Rotate the queues to ensure good parallelism across frames
+ for (int i = 0; i < vk->num_pools; i++) {
+ struct vk_cmdpool *pool = vk->pools[i];
+ pool->idx_queues = (pool->idx_queues + 1) % pool->num_queues;
+ }
+
+ return ret;
}
void vk_dev_callback(struct mpvk_ctx *vk, vk_cb callback, void *p, void *arg)
@@ -639,10 +730,10 @@ struct vk_cmd *vk_cmd_begin(struct mpvk_ctx *vk, struct vk_cmdpool *pool)
{
// garbage collect the cmdpool first, to increase the chances of getting
// an already-available command buffer
- mpvk_pool_wait_cmds(vk, pool, 0);
+ mpvk_poll_commands(vk, 0);
struct vk_cmd *cmd = NULL;
- if (MP_TARRAY_POP(pool->cmds_available, pool->num_cmds_available, &cmd))
+ if (MP_TARRAY_POP(pool->cmds, pool->num_cmds, &cmd))
goto done;
// No free command buffers => allocate another one
@@ -675,58 +766,13 @@ void vk_cmd_queue(struct mpvk_ctx *vk, struct vk_cmd *cmd)
VK(vkEndCommandBuffer(cmd->buf));
VK(vkResetFences(vk->dev, 1, &cmd->fence));
- MP_TARRAY_APPEND(pool, pool->cmds_queued, pool->num_cmds_queued, cmd);
+ MP_TARRAY_APPEND(NULL, vk->cmds_queued, vk->num_cmds_queued, cmd);
vk->last_cmd = cmd;
return;
error:
vk_cmd_reset(vk, cmd);
- MP_TARRAY_APPEND(pool, pool->cmds_available, pool->num_cmds_available, cmd);
-}
-
-bool vk_flush_commands(struct mpvk_ctx *vk)
-{
- bool ret = true;
-
- struct vk_cmdpool *pool = vk->pool;
- for (int i = 0; i < pool->num_cmds_queued; i++) {
- struct vk_cmd *cmd = pool->cmds_queued[i];
-
- VkSubmitInfo sinfo = {
- .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
- .commandBufferCount = 1,
- .pCommandBuffers = &cmd->buf,
- .waitSemaphoreCount = cmd->num_deps,
- .pWaitSemaphores = cmd->deps,
- .pWaitDstStageMask = cmd->depstages,
- .signalSemaphoreCount = cmd->num_sigs,
- .pSignalSemaphores = cmd->sigs,
- };
-
- VK(vkQueueSubmit(cmd->queue, 1, &sinfo, cmd->fence));
- MP_TARRAY_APPEND(pool, pool->cmds_pending, pool->num_cmds_pending, cmd);
-
- if (mp_msg_test(vk->log, MSGL_TRACE)) {
- MP_TRACE(vk, "Submitted command on queue %p (QF %d):\n",
- (void *)cmd->queue, pool->qf);
- for (int n = 0; n < cmd->num_deps; n++)
- MP_TRACE(vk, " waits on semaphore %p\n", (void *)cmd->deps[n]);
- for (int n = 0; n < cmd->num_sigs; n++)
- MP_TRACE(vk, " signals semaphore %p\n", (void *)cmd->sigs[n]);
- }
- continue;
-
-error:
- vk_cmd_reset(vk, cmd);
- MP_TARRAY_APPEND(pool, pool->cmds_available, pool->num_cmds_available, cmd);
- ret = false;
- }
-
- pool->num_cmds_queued = 0;
-
- // Rotate the queues to ensure good parallelism across frames
- pool->idx_queues = (pool->idx_queues + 1) % pool->num_queues;
- return ret;
+ MP_TARRAY_APPEND(pool, pool->cmds, pool->num_cmds, cmd);
}
void vk_signal_destroy(struct mpvk_ctx *vk, struct vk_signal **sig)
@@ -762,10 +808,16 @@ struct vk_signal *vk_cmd_signal(struct mpvk_ctx *vk, struct vk_cmd *cmd,
VK(vkCreateEvent(vk->dev, &einfo, MPVK_ALLOCATOR, &sig->event));
done:
- // Signal both the semaphore and the event. (We will only end up using one)
+ // Signal both the semaphore and the event if possible. (We will only
+ // end up using one or the other)
vk_cmd_sig(cmd, sig->semaphore);
- vkCmdSetEvent(cmd->buf, sig->event, stage);
- sig->event_source = cmd->queue;
+
+ VkQueueFlags req = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT;
+ if (cmd->pool->props.queueFlags & req) {
+ vkCmdSetEvent(cmd->buf, sig->event, stage);
+ sig->event_source = cmd->queue;
+ }
+
return sig;
error:
@@ -787,14 +839,14 @@ static bool unsignal_cmd(struct vk_cmd *cmd, VkSemaphore sem)
// Attempts to remove a queued signal operation. Returns true if sucessful,
// i.e. the signal could be removed before it ever got fired.
-static bool unsignal(struct vk_cmd *cmd, VkSemaphore sem)
+static bool unsignal(struct mpvk_ctx *vk, struct vk_cmd *cmd, VkSemaphore sem)
{
if (unsignal_cmd(cmd, sem))
return true;
// Attempt to remove it from any queued commands
- for (int i = 0; i < cmd->pool->num_cmds_queued; i++) {
- if (unsignal_cmd(cmd->pool->cmds_queued[i], sem))
+ for (int i = 0; i < vk->num_cmds_queued; i++) {
+ if (unsignal_cmd(vk->cmds_queued[i], sem))
return true;
}
@@ -806,7 +858,9 @@ static void release_signal(struct mpvk_ctx *vk, struct vk_signal *sig)
// The semaphore never needs to be recreated, because it's either
// unsignaled while still queued, or unsignaled as a result of a device
// wait. But the event *may* need to be reset, so just always reset it.
- vkResetEvent(vk->dev, sig->event);
+ if (sig->event_source)
+ vkResetEvent(vk->dev, sig->event);
+ sig->event_source = NULL;
MP_TARRAY_APPEND(NULL, vk->signals, vk->num_signals, sig);
}
@@ -819,7 +873,7 @@ void vk_cmd_wait(struct mpvk_ctx *vk, struct vk_cmd *cmd,
return;
if (out_event && sig->event && sig->event_source == cmd->queue &&
- unsignal(cmd, sig->semaphore))
+ unsignal(vk, cmd, sig->semaphore))
{
// If we can remove the semaphore signal operation from the history and
// pretend it never happened, then we get to use the VkEvent. This also
diff --git a/video/out/vulkan/utils.h b/video/out/vulkan/utils.h
index 538897afae..de3a757be3 100644
--- a/video/out/vulkan/utils.h
+++ b/video/out/vulkan/utils.h
@@ -66,9 +66,13 @@ bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts);
// UINT64_MAX effectively means waiting until the pool/device is idle. The
// timeout may also be passed as 0, in which case this function will not block,
// but only poll for completed commands.
-void mpvk_pool_wait_cmds(struct mpvk_ctx *vk, struct vk_cmdpool *pool,
- uint64_t timeout);
-void mpvk_dev_wait_cmds(struct mpvk_ctx *vk, uint64_t timeout);
+void mpvk_poll_commands(struct mpvk_ctx *vk, uint64_t timeout);
+
+// Flush all currently queued commands. Call this once per frame, after
+// submitting all of the command buffers for that frame. Calling this more
+// often than that is possible but bad for performance.
+// Returns whether successful. Failed commands will be implicitly dropped.
+bool mpvk_flush_commands(struct mpvk_ctx *vk);
// Since lots of vulkan operations need to be done lazily once the affected
// resources are no longer in use, provide an abstraction for tracking these.
@@ -158,13 +162,10 @@ struct vk_cmdpool {
VkQueue *queues;
int num_queues;
int idx_queues;
- // Command buffers associated with this queue
- struct vk_cmd **cmds_available; // available for re-recording
- struct vk_cmd **cmds_queued; // recorded but not yet submitted
- struct vk_cmd **cmds_pending; // submitted but not completed
- int num_cmds_available;
- int num_cmds_queued;
- int num_cmds_pending;
+ // Command buffers associated with this queue. These are available for
+ // re-recording
+ struct vk_cmd **cmds;
+ int num_cmds;
};
// Fetch a command buffer from a command pool and begin recording to it.
@@ -175,12 +176,6 @@ struct vk_cmd *vk_cmd_begin(struct mpvk_ctx *vk, struct vk_cmdpool *pool);
// takes over ownership of *cmd, i.e. the caller should not touch it again.
void vk_cmd_queue(struct mpvk_ctx *vk, struct vk_cmd *cmd);
-// Flush all currently queued commands. Call this once per frame, after
-// submitting all of the command buffers for that frame. Calling this more
-// often than that is possible but bad for performance.
-// Returns whether successful. Failed commands will be implicitly dropped.
-bool vk_flush_commands(struct mpvk_ctx *vk);
-
// Predefined structs for a simple non-layered, non-mipped image
extern const VkImageSubresourceRange vk_range;
extern const VkImageSubresourceLayers vk_layers;