From bded247fb53558dd5cba26560d1f24e9234ae24e Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.xyz>
Date: Sun, 24 Sep 2017 15:05:24 +0200
Subject: vo_gpu: vulkan: support split command pools

Instead of using a single primary queue, we generate multiple
vk_cmdpools and pick the right one dynamically based on the intent.
This has a number of immediate benefits:

1. We can use async texture uploads
2. We can use the DMA engine for buffer updates
3. We can benefit from async compute on AMD GPUs

Unfortunately, the major downside is that due to the lack of QF
ownership tracking, we need to use CONCURRENT sharing for all resources
(buffers *and* images!). In theory, we could try figuring out a way to
get rid of the concurrent sharing for buffers (which is only needed for
compute shader UBOs), but even so, the concurrent sharing mode doesn't
really seem to have a significant impact over here (nvidia). It's
possible that other platforms may disagree.

Our deadlock-avoidance strategy is stupidly simple: Just flush the
command every time we need to switch queues, and make sure all
submission and callbacks happen in FIFO order. This required lifting the
cmds_pending and cmds_queued out from vk_cmdpool to mpvk_ctx, and some
functions died/got moved as a result, but that's a relatively minor
change.

On my hardware this is a fairly significant performance boost, mainly
due to async transfers. (Nvidia doesn't expose separate compute queues
anyway). On AMD, this should be a performance boost as well due to async
compute.
---
 video/out/vulkan/common.h | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

(limited to 'video/out/vulkan/common.h')

diff --git a/video/out/vulkan/common.h b/video/out/vulkan/common.h
index de49c6f1af..b849b6dc0b 100644
--- a/video/out/vulkan/common.h
+++ b/video/out/vulkan/common.h
@@ -48,10 +48,23 @@ struct mpvk_ctx {
     VkSurfaceKHR surf;
     VkSurfaceFormatKHR surf_format; // picked at surface initialization time
 
-    struct vk_malloc *alloc; // memory allocator for this device
-    struct vk_cmdpool *pool; // primary command pool for this device
-    struct vk_cmd *last_cmd; // most recently submitted (pending) command
+    struct vk_malloc *alloc;      // memory allocator for this device
     struct spirv_compiler *spirv; // GLSL -> SPIR-V compiler
+    struct vk_cmdpool **pools;    // command pools (one per queue family)
+    int num_pools;
+    struct vk_cmd *last_cmd;      // most recently submitted command
+
+    // Queued/pending commands. These are shared for the entire mpvk_ctx to
+    // ensure submission and callbacks are FIFO
+    struct vk_cmd **cmds_queued;  // recorded but not yet submitted
+    struct vk_cmd **cmds_pending; // submitted but not completed
+    int num_cmds_queued;
+    int num_cmds_pending;
+
+    // Pointers into *pools
+    struct vk_cmdpool *pool_graphics; // required
+    struct vk_cmdpool *pool_compute;  // optional
+    struct vk_cmdpool *pool_transfer; // optional
 
     // Common pool of signals, to avoid having to re-create these objects often
     struct vk_signal **signals;
-- 
cgit v1.2.3