summaryrefslogtreecommitdiffstats
path: root/video/out/vulkan
diff options
context:
space:
mode:
authorNiklas Haas <git@haasn.xyz>2016-09-14 20:54:18 +0200
committerNiklas Haas <git@haasn.xyz>2017-09-26 17:25:35 +0200
commit91f23c7067af248846420854a0dc78c26ea6e300 (patch)
tree9c17062eafc323eb07399505b7a81d4a4ce31aa0 /video/out/vulkan
parentc82022f34932e22546976ecb8b1e956cf5f12101 (diff)
downloadmpv-91f23c7067af248846420854a0dc78c26ea6e300.tar.bz2
mpv-91f23c7067af248846420854a0dc78c26ea6e300.tar.xz
vo_gpu: vulkan: initial implementation
This time based on ra/vo_gpu. 2017 is the year of the vulkan desktop! Current problems / limitations / improvement opportunities: 1. The swapchain/flipping code violates the vulkan spec, by assuming that the presentation queue will be bounded (in cases where rendering is significantly faster than vsync). But apparently, there's simply no better way to do this right now, to the point where even the stupid cube.c examples from LunarG etc. do it wrong. (cf. https://github.com/KhronosGroup/Vulkan-Docs/issues/370) 2. The memory allocator could be improved. (This is a universal constant) 3. Could explore using push descriptors instead of descriptor sets, especially since we expect to switch descriptors semi-often for some passes (like interpolation). Probably won't make a difference, but the synchronization overhead might be a factor. Who knows. 4. Parallelism across frames / async transfer is not well-defined, we either need to use a better semaphore / command buffer strategy or a resource pooling layer to safely handle cross-frame parallelism. (That said, I gave resource pooling a try and was not happy with the result at all - so I'm still exploring the semaphore strategy) 5. We aggressively use pipeline barriers where events would offer a much more fine-grained synchronization mechanism. As a result of this, we might be suffering from GPU bubbles due to too-short dependencies on objects. (That said, I'm also exploring the use of semaphores as a an ordering tactic which would allow cross-frame time slicing in theory) Some minor changes to the vo_gpu and infrastructure, but nothing consequential. NOTE: For safety, all use of asynchronous commands / multiple command pools is currently disabled completely. There are some left-over relics of this in the code (e.g. the distinction between dev_poll and pool_poll), but that is kept in place mostly because this will be re-extended in the future (vulkan rev 2). The queue count is also currently capped to 1, because of the lack of cross-frame semaphores means we need the implicit synchronization from the same-queue semantics to guarantee a correct result.
Diffstat (limited to 'video/out/vulkan')
-rw-r--r--video/out/vulkan/common.h51
-rw-r--r--video/out/vulkan/context.c501
-rw-r--r--video/out/vulkan/context.h10
-rw-r--r--video/out/vulkan/context_xlib.c116
-rw-r--r--video/out/vulkan/formats.c55
-rw-r--r--video/out/vulkan/formats.h16
-rw-r--r--video/out/vulkan/malloc.c424
-rw-r--r--video/out/vulkan/malloc.h35
-rw-r--r--video/out/vulkan/ra_vk.c1590
-rw-r--r--video/out/vulkan/ra_vk.h31
-rw-r--r--video/out/vulkan/utils.c726
-rw-r--r--video/out/vulkan/utils.h153
12 files changed, 3708 insertions, 0 deletions
diff --git a/video/out/vulkan/common.h b/video/out/vulkan/common.h
new file mode 100644
index 0000000000..4c0e783f0e
--- /dev/null
+++ b/video/out/vulkan/common.h
@@ -0,0 +1,51 @@
+#pragma once
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <assert.h>
+
+#include "config.h"
+
+#include "common/common.h"
+#include "common/msg.h"
+
+// We need to define all platforms we want to support. Since we have
+// our own mechanism for checking this, we re-define the right symbols
+#if HAVE_X11
+#define VK_USE_PLATFORM_XLIB_KHR
+#endif
+
+#include <vulkan/vulkan.h>
+
+// Vulkan allows the optional use of a custom allocator. We don't need one but
+// mark this parameter with a better name in case we ever decide to change this
+// in the future. (And to make the code more readable)
+#define MPVK_ALLOCATOR NULL
+
+// A lot of things depend on streaming resources across frames. Depending on
+// how many frames we render ahead of time, we need to pick enough to avoid
+// any conflicts, so make all of these tunable relative to this constant in
+// order to centralize them.
+#define MPVK_MAX_STREAMING_DEPTH 8
+
+// Shared struct used to hold vulkan context information
+struct mpvk_ctx {
+ struct mp_log *log;
+ VkInstance inst;
+ VkPhysicalDevice physd;
+ VkDebugReportCallbackEXT dbg;
+ VkDevice dev;
+
+ // Surface, must be initialized fter the context itself
+ VkSurfaceKHR surf;
+ VkSurfaceFormatKHR surf_format; // picked at surface initialization time
+
+ struct vk_malloc *alloc; // memory allocator for this device
+ struct vk_cmdpool *pool; // primary command pool for this device
+ struct vk_cmd *last_cmd; // most recently submitted command
+
+ // Cached capabilities
+ VkPhysicalDeviceLimits limits;
+};
diff --git a/video/out/vulkan/context.c b/video/out/vulkan/context.c
new file mode 100644
index 0000000000..bd456d214c
--- /dev/null
+++ b/video/out/vulkan/context.c
@@ -0,0 +1,501 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "options/m_config.h"
+#include "context.h"
+#include "ra_vk.h"
+#include "utils.h"
+
+enum {
+ SWAP_AUTO = 0,
+ SWAP_FIFO,
+ SWAP_FIFO_RELAXED,
+ SWAP_MAILBOX,
+ SWAP_IMMEDIATE,
+ SWAP_COUNT,
+};
+
+struct vulkan_opts {
+ struct mpvk_device_opts dev_opts; // logical device options
+ char *device; // force a specific GPU
+ int swap_mode;
+};
+
+static int vk_validate_dev(struct mp_log *log, const struct m_option *opt,
+ struct bstr name, struct bstr param)
+{
+ int ret = M_OPT_INVALID;
+ VkResult res;
+
+ // Create a dummy instance to validate/list the devices
+ VkInstanceCreateInfo info = {
+ .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
+ };
+
+ VkInstance inst;
+ VkPhysicalDevice *devices = NULL;
+ uint32_t num = 0;
+
+ res = vkCreateInstance(&info, MPVK_ALLOCATOR, &inst);
+ if (res != VK_SUCCESS)
+ goto error;
+
+ res = vkEnumeratePhysicalDevices(inst, &num, NULL);
+ if (res != VK_SUCCESS)
+ goto error;
+
+ devices = talloc_array(NULL, VkPhysicalDevice, num);
+ vkEnumeratePhysicalDevices(inst, &num, devices);
+ if (res != VK_SUCCESS)
+ goto error;
+
+ bool help = bstr_equals0(param, "help");
+ if (help) {
+ mp_info(log, "Available vulkan devices:\n");
+ ret = M_OPT_EXIT;
+ }
+
+ for (int i = 0; i < num; i++) {
+ VkPhysicalDeviceProperties prop;
+ vkGetPhysicalDeviceProperties(devices[i], &prop);
+
+ if (help) {
+ mp_info(log, " '%s' (GPU %d, ID %x:%x)\n", prop.deviceName, i,
+ (unsigned)prop.vendorID, (unsigned)prop.deviceID);
+ } else if (bstr_equals0(param, prop.deviceName)) {
+ ret = 0;
+ break;
+ }
+ }
+
+ if (!help)
+ mp_err(log, "No device with name '%.*s'!\n", BSTR_P(param));
+
+error:
+ talloc_free(devices);
+ return ret;
+}
+
+#define OPT_BASE_STRUCT struct vulkan_opts
+const struct m_sub_options vulkan_conf = {
+ .opts = (const struct m_option[]) {
+ OPT_STRING_VALIDATE("vulkan-device", device, 0, vk_validate_dev),
+ OPT_CHOICE("vulkan-swap-mode", swap_mode, 0,
+ ({"auto", SWAP_AUTO},
+ {"fifo", SWAP_FIFO},
+ {"fifo-relaxed", SWAP_FIFO_RELAXED},
+ {"mailbox", SWAP_MAILBOX},
+ {"immediate", SWAP_IMMEDIATE})),
+ OPT_INTRANGE("vulkan-queue-count", dev_opts.queue_count, 0, 1,
+ MPVK_MAX_QUEUES, OPTDEF_INT(1)),
+ {0}
+ },
+ .size = sizeof(struct vulkan_opts)
+};
+
+struct priv {
+ struct mpvk_ctx *vk;
+ struct vulkan_opts *opts;
+ // Swapchain metadata:
+ int w, h; // current size
+ VkSwapchainCreateInfoKHR protoInfo; // partially filled-in prototype
+ VkSwapchainKHR swapchain;
+ VkSwapchainKHR old_swapchain;
+ int frames_in_flight;
+ // state of the images:
+ struct ra_tex **images; // ra_tex wrappers for the vkimages
+ int num_images; // size of images
+ VkSemaphore *acquired; // pool of semaphores used to synchronize images
+ int num_acquired; // size of this pool
+ int idx_acquired; // index of next free semaphore within this pool
+ int last_imgidx; // the image index last acquired (for submit)
+};
+
+static bool update_swapchain_info(struct priv *p,
+ VkSwapchainCreateInfoKHR *info)
+{
+ struct mpvk_ctx *vk = p->vk;
+
+ // Query the supported capabilities and update this struct as needed
+ VkSurfaceCapabilitiesKHR caps;
+ VK(vkGetPhysicalDeviceSurfaceCapabilitiesKHR(vk->physd, vk->surf, &caps));
+
+ // Sorted by preference
+ static const VkCompositeAlphaFlagBitsKHR alphaModes[] = {
+ VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR,
+ VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR,
+ };
+
+ for (int i = 0; i < MP_ARRAY_SIZE(alphaModes); i++) {
+ if (caps.supportedCompositeAlpha & alphaModes[i]) {
+ info->compositeAlpha = alphaModes[i];
+ break;
+ }
+ }
+
+ if (!info->compositeAlpha) {
+ MP_ERR(vk, "Failed picking alpha compositing mode (caps: 0x%x)\n",
+ caps.supportedCompositeAlpha);
+ goto error;
+ }
+
+ static const VkSurfaceTransformFlagBitsKHR rotModes[] = {
+ VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR,
+ VK_SURFACE_TRANSFORM_INHERIT_BIT_KHR,
+ };
+
+ for (int i = 0; i < MP_ARRAY_SIZE(rotModes); i++) {
+ if (caps.supportedTransforms & rotModes[i]) {
+ info->preTransform = rotModes[i];
+ break;
+ }
+ }
+
+ if (!info->preTransform) {
+ MP_ERR(vk, "Failed picking surface transform mode (caps: 0x%x)\n",
+ caps.supportedTransforms);
+ goto error;
+ }
+
+ // Image count as required
+ MP_VERBOSE(vk, "Requested image count: %d (min %d max %d)\n",
+ (int)info->minImageCount, (int)caps.minImageCount,
+ (int)caps.maxImageCount);
+
+ info->minImageCount = MPMAX(info->minImageCount, caps.minImageCount);
+ if (caps.maxImageCount)
+ info->minImageCount = MPMIN(info->minImageCount, caps.maxImageCount);
+
+ // Check the extent against the allowed parameters
+ if (caps.currentExtent.width != info->imageExtent.width &&
+ caps.currentExtent.width != 0xFFFFFFFF)
+ {
+ MP_WARN(vk, "Requested width %d does not match current width %d\n",
+ (int)info->imageExtent.width, (int)caps.currentExtent.width);
+ info->imageExtent.width = caps.currentExtent.width;
+ }
+
+ if (caps.currentExtent.height != info->imageExtent.height &&
+ caps.currentExtent.height != 0xFFFFFFFF)
+ {
+ MP_WARN(vk, "Requested height %d does not match current height %d\n",
+ (int)info->imageExtent.height, (int)caps.currentExtent.height);
+ info->imageExtent.height = caps.currentExtent.height;
+ }
+
+ if (caps.minImageExtent.width > info->imageExtent.width ||
+ caps.minImageExtent.height > info->imageExtent.height)
+ {
+ MP_ERR(vk, "Requested size %dx%d smaller than device minimum %d%d\n",
+ (int)info->imageExtent.width, (int)info->imageExtent.height,
+ (int)caps.minImageExtent.width, (int)caps.minImageExtent.height);
+ goto error;
+ }
+
+ if (caps.maxImageExtent.width < info->imageExtent.width ||
+ caps.maxImageExtent.height < info->imageExtent.height)
+ {
+ MP_ERR(vk, "Requested size %dx%d larger than device maximum %d%d\n",
+ (int)info->imageExtent.width, (int)info->imageExtent.height,
+ (int)caps.maxImageExtent.width, (int)caps.maxImageExtent.height);
+ goto error;
+ }
+
+ // We just request whatever usage we can, and let the ra_vk decide what
+ // ra_tex_params that translates to. This makes the images as flexible
+ // as possible.
+ info->imageUsage = caps.supportedUsageFlags;
+ return true;
+
+error:
+ return false;
+}
+
+void ra_vk_ctx_uninit(struct ra_ctx *ctx)
+{
+ if (ctx->ra) {
+ struct priv *p = ctx->swapchain->priv;
+ struct mpvk_ctx *vk = p->vk;
+
+ mpvk_pool_wait_idle(vk, vk->pool);
+
+ for (int i = 0; i < p->num_images; i++)
+ ra_tex_free(ctx->ra, &p->images[i]);
+ for (int i = 0; i < p->num_acquired; i++)
+ vkDestroySemaphore(vk->dev, p->acquired[i], MPVK_ALLOCATOR);
+
+ vkDestroySwapchainKHR(vk->dev, p->swapchain, MPVK_ALLOCATOR);
+
+ talloc_free(p->images);
+ talloc_free(p->acquired);
+ ctx->ra->fns->destroy(ctx->ra);
+ ctx->ra = NULL;
+ }
+
+ talloc_free(ctx->swapchain);
+ ctx->swapchain = NULL;
+}
+
+static const struct ra_swapchain_fns vulkan_swapchain;
+
+bool ra_vk_ctx_init(struct ra_ctx *ctx, struct mpvk_ctx *vk,
+ VkPresentModeKHR preferred_mode)
+{
+ struct ra_swapchain *sw = ctx->swapchain = talloc_zero(NULL, struct ra_swapchain);
+ sw->ctx = ctx;
+ sw->fns = &vulkan_swapchain;
+
+ struct priv *p = sw->priv = talloc_zero(sw, struct priv);
+ p->vk = vk;
+ p->opts = mp_get_config_group(p, ctx->global, &vulkan_conf);
+
+ if (!mpvk_find_phys_device(vk, p->opts->device, ctx->opts.allow_sw))
+ goto error;
+ if (!mpvk_pick_surface_format(vk))
+ goto error;
+ if (!mpvk_device_init(vk, p->opts->dev_opts))
+ goto error;
+
+ ctx->ra = ra_create_vk(vk, ctx->log);
+ if (!ctx->ra)
+ goto error;
+
+ static const VkPresentModeKHR present_modes[SWAP_COUNT] = {
+ [SWAP_FIFO] = VK_PRESENT_MODE_FIFO_KHR,
+ [SWAP_FIFO_RELAXED] = VK_PRESENT_MODE_FIFO_RELAXED_KHR,
+ [SWAP_MAILBOX] = VK_PRESENT_MODE_MAILBOX_KHR,
+ [SWAP_IMMEDIATE] = VK_PRESENT_MODE_IMMEDIATE_KHR,
+ };
+
+ p->protoInfo = (VkSwapchainCreateInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
+ .surface = vk->surf,
+ .imageFormat = vk->surf_format.format,
+ .imageColorSpace = vk->surf_format.colorSpace,
+ .imageArrayLayers = 1, // non-stereoscopic
+ .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .minImageCount = ctx->opts.swapchain_depth + 1, // +1 for FB
+ .presentMode = p->opts->swap_mode ? present_modes[p->opts->swap_mode]
+ : preferred_mode,
+ .clipped = true,
+ };
+
+ // Make sure the swapchain present mode is supported
+ int num_modes;
+ VK(vkGetPhysicalDeviceSurfacePresentModesKHR(vk->physd, vk->surf,
+ &num_modes, NULL));
+ VkPresentModeKHR *modes = talloc_array(NULL, VkPresentModeKHR, num_modes);
+ VK(vkGetPhysicalDeviceSurfacePresentModesKHR(vk->physd, vk->surf,
+ &num_modes, modes));
+ bool supported = false;
+ for (int i = 0; i < num_modes; i++)
+ supported |= (modes[i] == p->protoInfo.presentMode);
+ talloc_free(modes);
+
+ if (!supported) {
+ MP_ERR(ctx, "Requested swap mode unsupported by this device!\n");
+ goto error;
+ }
+
+ return true;
+
+error:
+ ra_vk_ctx_uninit(ctx);
+ return false;
+}
+
+static void destroy_swapchain(struct mpvk_ctx *vk, struct priv *p)
+{
+ assert(p->old_swapchain);
+ vkDestroySwapchainKHR(vk->dev, p->old_swapchain, MPVK_ALLOCATOR);
+ p->old_swapchain = NULL;
+}
+
+bool ra_vk_ctx_resize(struct ra_swapchain *sw, int w, int h)
+{
+ struct priv *p = sw->priv;
+ if (w == p->w && h == p->h)
+ return true;
+
+ struct ra *ra = sw->ctx->ra;
+ struct mpvk_ctx *vk = p->vk;
+ VkImage *vkimages = NULL;
+
+ // It's invalid to trigger another swapchain recreation while there's
+ // more than one swapchain already active, so we need to flush any pending
+ // asynchronous swapchain release operations that may be ongoing.
+ while (p->old_swapchain)
+ mpvk_dev_poll_cmds(vk, 100000); // 100μs
+
+ VkSwapchainCreateInfoKHR sinfo = p->protoInfo;
+ sinfo.imageExtent = (VkExtent2D){ w, h };
+ sinfo.oldSwapchain = p->swapchain;
+
+ if (!update_swapchain_info(p, &sinfo))
+ goto error;
+
+ VK(vkCreateSwapchainKHR(vk->dev, &sinfo, MPVK_ALLOCATOR, &p->swapchain));
+ p->w = w;
+ p->h = h;
+
+ // Freeing the old swapchain while it's still in use is an error, so do
+ // it asynchronously once the device is idle.
+ if (sinfo.oldSwapchain) {
+ p->old_swapchain = sinfo.oldSwapchain;
+ vk_dev_callback(vk, (vk_cb) destroy_swapchain, vk, p);
+ }
+
+ // Get the new swapchain images
+ int num;
+ VK(vkGetSwapchainImagesKHR(vk->dev, p->swapchain, &num, NULL));
+ vkimages = talloc_array(NULL, VkImage, num);
+ VK(vkGetSwapchainImagesKHR(vk->dev, p->swapchain, &num, vkimages));
+
+ // If needed, allocate some more semaphores
+ while (num > p->num_acquired) {
+ VkSemaphore sem;
+ static const VkSemaphoreCreateInfo seminfo = {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+ };
+ VK(vkCreateSemaphore(vk->dev, &seminfo, MPVK_ALLOCATOR, &sem));
+ MP_TARRAY_APPEND(NULL, p->acquired, p->num_acquired, sem);
+ }
+
+ // Recreate the ra_tex wrappers
+ for (int i = 0; i < p->num_images; i++)
+ ra_tex_free(ra, &p->images[i]);
+
+ p->num_images = num;
+ MP_TARRAY_GROW(NULL, p->images, p->num_images);
+ for (int i = 0; i < num; i++) {
+ p->images[i] = ra_vk_wrap_swapchain_img(ra, vkimages[i], sinfo);
+ if (!p->images[i])
+ goto error;
+ }
+
+ talloc_free(vkimages);
+ return true;
+
+error:
+ talloc_free(vkimages);
+ vkDestroySwapchainKHR(vk->dev, p->swapchain, MPVK_ALLOCATOR);
+ p->swapchain = NULL;
+ return false;
+}
+
+static int color_depth(struct ra_swapchain *sw)
+{
+ struct priv *p = sw->priv;
+ int bits = 0;
+
+ if (!p->num_images)
+ return bits;
+
+ // The channel with the most bits is probably the most authoritative about
+ // the actual color information (consider e.g. a2bgr10). Slight downside
+ // in that it results in rounding r/b for e.g. rgb565, but we don't pick
+ // surfaces with fewer than 8 bits anyway.
+ const struct ra_format *fmt = p->images[0]->params.format;
+ for (int i = 0; i < fmt->num_components; i++) {
+ int depth = fmt->component_depth[i];
+ bits = MPMAX(bits, depth ? depth : fmt->component_size[i]);
+ }
+
+ return bits;
+}
+
+static bool start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo)
+{
+ struct priv *p = sw->priv;
+ struct mpvk_ctx *vk = p->vk;
+ if (!p->swapchain)
+ goto error;
+
+ uint32_t imgidx = 0;
+ MP_TRACE(vk, "vkAcquireNextImageKHR\n");
+ VkResult res = vkAcquireNextImageKHR(vk->dev, p->swapchain, UINT64_MAX,
+ p->acquired[p->idx_acquired], NULL,
+ &imgidx);
+ if (res == VK_ERROR_OUT_OF_DATE_KHR)
+ goto error; // just return in this case
+ VK_ASSERT(res, "Failed acquiring swapchain image");
+
+ p->last_imgidx = imgidx;
+ *out_fbo = (struct ra_fbo) {
+ .tex = p->images[imgidx],
+ .flip = false,
+ };
+ return true;
+
+error:
+ return false;
+}
+
+static bool submit_frame(struct ra_swapchain *sw, const struct vo_frame *frame)
+{
+ struct priv *p = sw->priv;
+ struct ra *ra = sw->ctx->ra;
+ struct mpvk_ctx *vk = p->vk;
+ if (!p->swapchain)
+ goto error;
+
+ VkSemaphore acquired = p->acquired[p->idx_acquired++];
+ p->idx_acquired %= p->num_acquired;
+
+ VkSemaphore done;
+ if (!ra_vk_submit(ra, p->images[p->last_imgidx], acquired, &done,
+ &p->frames_in_flight))
+ goto error;
+
+ // For some reason, nvidia absolutely shits itself when presenting from a
+ // full queue - so advance all of the cmdpool indices first and then do the
+ // present on an "empty" queue
+ vk_cmd_cycle_queues(vk);
+ struct vk_cmdpool *pool = vk->pool;
+ VkQueue queue = pool->queues[pool->qindex];
+
+ VkPresentInfoKHR pinfo = {
+ .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
+ .waitSemaphoreCount = 1,
+ .pWaitSemaphores = &done,
+ .swapchainCount = 1,
+ .pSwapchains = &p->swapchain,
+ .pImageIndices = &p->last_imgidx,
+ };
+
+ VK(vkQueuePresentKHR(queue, &pinfo));
+ return true;
+
+error:
+ return false;
+}
+
+static void swap_buffers(struct ra_swapchain *sw)
+{
+ struct priv *p = sw->priv;
+
+ while (p->frames_in_flight >= sw->ctx->opts.swapchain_depth)
+ mpvk_dev_poll_cmds(p->vk, 100000); // 100μs
+}
+
+static const struct ra_swapchain_fns vulkan_swapchain = {
+ // .screenshot is not currently supported
+ .color_depth = color_depth,
+ .start_frame = start_frame,
+ .submit_frame = submit_frame,
+ .swap_buffers = swap_buffers,
+};
diff --git a/video/out/vulkan/context.h b/video/out/vulkan/context.h
new file mode 100644
index 0000000000..3f630bc10e
--- /dev/null
+++ b/video/out/vulkan/context.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include "video/out/gpu/context.h"
+#include "common.h"
+
+// Helpers for ra_ctx based on ra_vk. These initialize ctx->ra and ctx->swchain.
+void ra_vk_ctx_uninit(struct ra_ctx *ctx);
+bool ra_vk_ctx_init(struct ra_ctx *ctx, struct mpvk_ctx *vk,
+ VkPresentModeKHR preferred_mode);
+bool ra_vk_ctx_resize(struct ra_swapchain *sw, int w, int h);
diff --git a/video/out/vulkan/context_xlib.c b/video/out/vulkan/context_xlib.c
new file mode 100644
index 0000000000..2611fbb706
--- /dev/null
+++ b/video/out/vulkan/context_xlib.c
@@ -0,0 +1,116 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "video/out/gpu/context.h"
+#include "video/out/x11_common.h"
+
+#include "common.h"
+#include "context.h"
+#include "utils.h"
+
+struct priv {
+ struct mpvk_ctx vk;
+};
+
+static void xlib_uninit(struct ra_ctx *ctx)
+{
+ struct priv *p = ctx->priv;
+
+ ra_vk_ctx_uninit(ctx);
+ mpvk_uninit(&p->vk);
+ vo_x11_uninit(ctx->vo);
+}
+
+static bool xlib_init(struct ra_ctx *ctx)
+{
+ struct priv *p = ctx->priv = talloc_zero(ctx, struct priv);
+ struct mpvk_ctx *vk = &p->vk;
+ int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR;
+
+ if (!vo_x11_init(ctx->vo))
+ goto error;
+
+ if (!vo_x11_create_vo_window(ctx->vo, NULL, "mpvk"))
+ goto error;
+
+ if (!mpvk_instance_init(vk, ctx->log, ctx->opts.debug))
+ goto error;
+
+ VkXlibSurfaceCreateInfoKHR xinfo = {
+ .sType = VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR,
+ .dpy = ctx->vo->x11->display,
+ .window = ctx->vo->x11->window,
+ };
+
+ VkResult res = vkCreateXlibSurfaceKHR(vk->inst, &xinfo, MPVK_ALLOCATOR,
+ &vk->surf);
+ if (res != VK_SUCCESS) {
+ MP_MSG(ctx, msgl, "Failed creating Xlib surface: %s\n", vk_err(res));
+ goto error;
+ }
+
+ if (!ra_vk_ctx_init(ctx, vk, VK_PRESENT_MODE_FIFO_KHR))
+ goto error;
+
+ return true;
+
+error:
+ xlib_uninit(ctx);
+ return false;
+}
+
+static bool resize(struct ra_ctx *ctx)
+{
+ return ra_vk_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight);
+}
+
+static bool xlib_reconfig(struct ra_ctx *ctx)
+{
+ vo_x11_config_vo_window(ctx->vo);
+ return resize(ctx);
+}
+
+static int xlib_control(struct ra_ctx *ctx, int *events, int request, void *arg)
+{
+ int ret = vo_x11_control(ctx->vo, events, request, arg);
+ if (*events & VO_EVENT_RESIZE) {
+ if (!resize(ctx))
+ return VO_ERROR;
+ }
+ return ret;
+}
+
+static void xlib_wakeup(struct ra_ctx *ctx)
+{
+ vo_x11_wakeup(ctx->vo);
+}
+
+static void xlib_wait_events(struct ra_ctx *ctx, int64_t until_time_us)
+{
+ vo_x11_wait_events(ctx->vo, until_time_us);
+}
+
+const struct ra_ctx_fns ra_ctx_vulkan_xlib = {
+ .type = "vulkan",
+ .name = "x11",
+ .reconfig = xlib_reconfig,
+ .control = xlib_control,
+ .wakeup = xlib_wakeup,
+ .wait_events = xlib_wait_events,
+ .init = xlib_init,
+ .uninit = xlib_uninit,
+};
diff --git a/video/out/vulkan/formats.c b/video/out/vulkan/formats.c
new file mode 100644
index 0000000000..b44bead99c
--- /dev/null
+++ b/video/out/vulkan/formats.c
@@ -0,0 +1,55 @@
+#include "formats.h"
+
+const struct vk_format vk_formats[] = {
+ // Regular, byte-aligned integer formats
+ {"r8", VK_FORMAT_R8_UNORM, 1, 1, {8 }, RA_CTYPE_UNORM },
+ {"rg8", VK_FORMAT_R8G8_UNORM, 2, 2, {8, 8 }, RA_CTYPE_UNORM },
+ {"rgb8", VK_FORMAT_R8G8B8_UNORM, 3, 3, {8, 8, 8 }, RA_CTYPE_UNORM },
+ {"rgba8", VK_FORMAT_R8G8B8A8_UNORM, 4, 4, {8, 8, 8, 8 }, RA_CTYPE_UNORM },
+ {"r16", VK_FORMAT_R16_UNORM, 1, 2, {16 }, RA_CTYPE_UNORM },
+ {"rg16", VK_FORMAT_R16G16_UNORM, 2, 4, {16, 16 }, RA_CTYPE_UNORM },
+ {"rgb16", VK_FORMAT_R16G16B16_UNORM, 3, 6, {16, 16, 16 }, RA_CTYPE_UNORM },
+ {"rgba16", VK_FORMAT_R16G16B16A16_UNORM, 4, 8, {16, 16, 16, 16}, RA_CTYPE_UNORM },
+
+ // Special, integer-only formats
+ {"r32ui", VK_FORMAT_R32_UINT, 1, 4, {32 }, RA_CTYPE_UINT },
+ {"rg32ui", VK_FORMAT_R32G32_UINT, 2, 8, {32, 32 }, RA_CTYPE_UINT },
+ {"rgb32ui", VK_FORMAT_R32G32B32_UINT, 3, 12, {32, 32, 32 }, RA_CTYPE_UINT },
+ {"rgba32ui", VK_FORMAT_R32G32B32A32_UINT, 4, 16, {32, 32, 32, 32}, RA_CTYPE_UINT },
+ {"r64ui", VK_FORMAT_R64_UINT, 1, 8, {64 }, RA_CTYPE_UINT },
+ {"rg64ui", VK_FORMAT_R64G64_UINT, 2, 16, {64, 64 }, RA_CTYPE_UINT },
+ {"rgb64ui", VK_FORMAT_R64G64B64_UINT, 3, 24, {64, 64, 64 }, RA_CTYPE_UINT },
+ {"rgba64ui", VK_FORMAT_R64G64B64A64_UINT, 4, 32, {64, 64, 64, 64}, RA_CTYPE_UINT },
+
+ // Packed integer formats
+ {"rg4", VK_FORMAT_R4G4_UNORM_PACK8, 2, 1, {4, 4 }, RA_CTYPE_UNORM },
+ {"rgba4", VK_FORMAT_R4G4B4A4_UNORM_PACK16, 4, 2, {4, 4, 4, 4 }, RA_CTYPE_UNORM },
+ {"rgb565", VK_FORMAT_R5G6B5_UNORM_PACK16, 3, 2, {5, 6, 5 }, RA_CTYPE_UNORM },
+ {"rgb565a1", VK_FORMAT_R5G5B5A1_UNORM_PACK16, 4, 2, {5, 5, 5, 1 }, RA_CTYPE_UNORM },
+
+ // Float formats (native formats, hf = half float, df = double float)
+ {"r16hf", VK_FORMAT_R16_SFLOAT, 1, 2, {16 }, RA_CTYPE_FLOAT },
+ {"rg16hf", VK_FORMAT_R16G16_SFLOAT, 2, 4, {16, 16 }, RA_CTYPE_FLOAT },
+ {"rgb16hf", VK_FORMAT_R16G16B16_SFLOAT, 3, 6, {16, 16, 16 }, RA_CTYPE_FLOAT },
+ {"rgba16hf", VK_FORMAT_R16G16B16A16_SFLOAT, 4, 8, {16, 16, 16, 16}, RA_CTYPE_FLOAT },
+ {"r32f", VK_FORMAT_R32_SFLOAT, 1, 4, {32 }, RA_CTYPE_FLOAT },
+ {"rg32f", VK_FORMAT_R32G32_SFLOAT, 2, 8, {32, 32 }, RA_CTYPE_FLOAT },
+ {"rgb32f", VK_FORMAT_R32G32B32_SFLOAT, 3, 12, {32, 32, 32 }, RA_CTYPE_FLOAT },
+ {"rgba32f", VK_FORMAT_R32G32B32A32_SFLOAT, 4, 16, {32, 32, 32, 32}, RA_CTYPE_FLOAT },
+ {"r64df", VK_FORMAT_R64_SFLOAT, 1, 8, {64 }, RA_CTYPE_FLOAT },
+ {"rg64df", VK_FORMAT_R64G64_SFLOAT, 2, 16, {64, 64 }, RA_CTYPE_FLOAT },
+ {"rgb64df", VK_FORMAT_R64G64B64_SFLOAT, 3, 24, {64, 64, 64 }, RA_CTYPE_FLOAT },
+ {"rgba64df", VK_FORMAT_R64G64B64A64_SFLOAT, 4, 32, {64, 64, 64, 64}, RA_CTYPE_FLOAT },
+
+ // "Swapped" component order images
+ {"bgr8", VK_FORMAT_B8G8R8_UNORM, 3, 3, {8, 8, 8 }, RA_CTYPE_UNORM, true },
+ {"bgra8", VK_FORMAT_B8G8R8A8_UNORM, 4, 4, {8, 8, 8, 8 }, RA_CTYPE_UNORM, true },
+ {"bgra4", VK_FORMAT_B4G4R4A4_UNORM_PACK16, 4, 2, {4, 4, 4, 4 }, RA_CTYPE_UNORM, true },
+ {"bgr565", VK_FORMAT_B5G6R5_UNORM_PACK16, 3, 2, {5, 6, 5 }, RA_CTYPE_UNORM, true },
+ {"bgr565a1", VK_FORMAT_B5G5R5A1_UNORM_PACK16, 4, 2, {5, 5, 5, 1 }, RA_CTYPE_UNORM, true },
+ {"a1rgb5", VK_FORMAT_A1R5G5B5_UNORM_PACK16, 4, 2, {1, 5, 5, 5 }, RA_CTYPE_UNORM, true },
+ {"a2rgb10", VK_FORMAT_A2R10G10B10_UNORM_PACK32, 4, 4, {2, 10, 10, 10}, RA_CTYPE_UNORM, true },
+ {"a2bgr10", VK_FORMAT_A2B10G10R10_UNORM_PACK32, 4, 4, {2, 10, 10, 10}, RA_CTYPE_UNORM, true },
+ {"abgr8", VK_FORMAT_A8B8G8R8_UNORM_PACK32, 4, 4, {8, 8, 8, 8 }, RA_CTYPE_UNORM, true },
+ {0}
+};
diff --git a/video/out/vulkan/formats.h b/video/out/vulkan/formats.h
new file mode 100644
index 0000000000..22782a6958
--- /dev/null
+++ b/video/out/vulkan/formats.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include "video/out/gpu/ra.h"
+#include "common.h"
+
+struct vk_format {
+ const char *name;
+ VkFormat iformat; // vulkan format enum
+ int components; // how many components are there
+ int bytes; // how many bytes is a texel
+ int bits[4]; // how many bits per component
+ enum ra_ctype ctype; // format representation type
+ bool fucked_order; // used for formats which are not simply rgba
+};
+
+extern const struct vk_format vk_formats[];
diff --git a/video/out/vulkan/malloc.c b/video/out/vulkan/malloc.c
new file mode 100644
index 0000000000..31fcd36ddb
--- /dev/null
+++ b/video/out/vulkan/malloc.c
@@ -0,0 +1,424 @@
+#include "malloc.h"
+#include "utils.h"
+#include "osdep/timer.h"
+
+// Controls the multiplication factor for new slab allocations. The new slab
+// will always be allocated such that the size of the slab is this factor times
+// the previous slab. Higher values make it grow faster.
+#define MPVK_HEAP_SLAB_GROWTH_RATE 4
+
+// Controls the minimum slab size, to reduce the frequency at which very small
+// slabs would need to get allocated when allocating the first few buffers.
+// (Default: 1 MB)
+#define MPVK_HEAP_MINIMUM_SLAB_SIZE (1 << 20)
+
+// Controls the maximum slab size, to reduce the effect of unbounded slab
+// growth exhausting memory. If the application needs a single allocation
+// that's bigger than this value, it will be allocated directly from the
+// device. (Default: 512 MB)
+#define MPVK_HEAP_MAXIMUM_SLAB_SIZE (1 << 29)
+
+// Controls the minimum free region size, to reduce thrashing the free space
+// map with lots of small buffers during uninit. (Default: 1 KB)
+#define MPVK_HEAP_MINIMUM_REGION_SIZE (1 << 10)
+
+// Represents a region of available memory
+struct vk_region {
+ size_t start; // first offset in region
+ size_t end; // first offset *not* in region
+};
+
+static inline size_t region_len(struct vk_region r)
+{
+ return r.end - r.start;
+}
+
+// A single slab represents a contiguous region of allocated memory. Actual
+// allocations are served as slices of this. Slabs are organized into linked
+// lists, which represent individual heaps.
+struct vk_slab {
+ VkDeviceMemory mem; // underlying device allocation
+ size_t size; // total size of `slab`
+ size_t used; // number of bytes actually in use (for GC accounting)
+ bool dedicated; // slab is allocated specifically for one object
+ // free space map: a sorted list of memory regions that are available
+ struct vk_region *regions;
+ int num_regions;
+ // optional, depends on the memory type:
+ VkBuffer buffer; // buffer spanning the entire slab
+ void *data; // mapped memory corresponding to `mem`
+};
+
+// Represents a single memory heap. We keep track of a vk_heap for each
+// combination of buffer type and memory selection parameters. This shouldn't
+// actually be that many in practice, because some combinations simply never
+// occur, and others will generally be the same for the same objects.
+struct vk_heap {
+ VkBufferUsageFlagBits usage; // the buffer usage ty