12 files changed, 3708 insertions, 0 deletions
diff --git a/video/out/vulkan/common.h b/video/out/vulkan/common.h
new file mode 100644
index 0000000000..4c0e783f0e
--- /dev/null
+++ b/video/out/vulkan/common.h
@@ -0,0 +1,51 @@
+#pragma once
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <assert.h>
+
+#include "config.h"
+
+#include "common/common.h"
+#include "common/msg.h"
+
+// We need to define all platforms we want to support. Since we have
+// our own mechanism for checking this, we re-define the right symbols
+#if HAVE_X11
+#define VK_USE_PLATFORM_XLIB_KHR
+#endif
+
+#include <vulkan/vulkan.h>
+
+// Vulkan allows the optional use of a custom allocator. We don't need one but
+// mark this parameter with a better name in case we ever decide to change this
+// in the future. (And to make the code more readable)
+#define MPVK_ALLOCATOR NULL
+
+// A lot of things depend on streaming resources across frames. Depending on
+// how many frames we render ahead of time, we need to pick enough to avoid
+// any conflicts, so make all of these tunable relative to this constant in
+// order to centralize them.
+#define MPVK_MAX_STREAMING_DEPTH 8
+
+// Shared struct used to hold vulkan context information
+struct mpvk_ctx {
+    struct mp_log *log;
+    VkInstance inst;
+    VkPhysicalDevice physd;
+    VkDebugReportCallbackEXT dbg;
+    VkDevice dev;
+
+    // Surface, must be initialized fter the context itself
+    VkSurfaceKHR surf;
+    VkSurfaceFormatKHR surf_format; // picked at surface initialization time
+
+    struct vk_malloc *alloc; // memory allocator for this device
+    struct vk_cmdpool *pool; // primary command pool for this device
+    struct vk_cmd *last_cmd; // most recently submitted command
+
+    // Cached capabilities
+    VkPhysicalDeviceLimits limits;
+};
diff --git a/video/out/vulkan/context.c b/video/out/vulkan/context.c
new file mode 100644
index 0000000000..bd456d214c
--- /dev/null
+++ b/video/out/vulkan/context.c
@@ -0,0 +1,501 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "options/m_config.h"
+#include "context.h"
+#include "ra_vk.h"
+#include "utils.h"
+
+enum {
+    SWAP_AUTO = 0,
+    SWAP_FIFO,
+    SWAP_FIFO_RELAXED,
+    SWAP_MAILBOX,
+    SWAP_IMMEDIATE,
+    SWAP_COUNT,
+};
+
+struct vulkan_opts {
+    struct mpvk_device_opts dev_opts; // logical device options
+    char *device; // force a specific GPU
+    int swap_mode;
+};
+
+static int vk_validate_dev(struct mp_log *log, const struct m_option *opt,
+                           struct bstr name, struct bstr param)
+{
+    int ret = M_OPT_INVALID;
+    VkResult res;
+
+    // Create a dummy instance to validate/list the devices
+    VkInstanceCreateInfo info = {
+        .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
+    };
+
+    VkInstance inst;
+    VkPhysicalDevice *devices = NULL;
+    uint32_t num = 0;
+
+    res = vkCreateInstance(&info, MPVK_ALLOCATOR, &inst);
+    if (res != VK_SUCCESS)
+        goto error;
+
+    res = vkEnumeratePhysicalDevices(inst, &num, NULL);
+    if (res != VK_SUCCESS)
+        goto error;
+
+    devices = talloc_array(NULL, VkPhysicalDevice, num);
+    vkEnumeratePhysicalDevices(inst, &num, devices);
+    if (res != VK_SUCCESS)
+        goto error;
+
+    bool help = bstr_equals0(param, "help");
+    if (help) {
+        mp_info(log, "Available vulkan devices:\n");
+        ret = M_OPT_EXIT;
+    }
+
+    for (int i = 0; i < num; i++) {
+        VkPhysicalDeviceProperties prop;
+        vkGetPhysicalDeviceProperties(devices[i], &prop);
+
+        if (help) {
+            mp_info(log, "  '%s' (GPU %d, ID %x:%x)\n", prop.deviceName, i,
+                    (unsigned)prop.vendorID, (unsigned)prop.deviceID);
+        } else if (bstr_equals0(param, prop.deviceName)) {
+            ret = 0;
+            break;
+        }
+    }
+
+    if (!help)
+        mp_err(log, "No device with name '%.*s'!\n", BSTR_P(param));
+
+error:
+    talloc_free(devices);
+    return ret;
+}
+
+#define OPT_BASE_STRUCT struct vulkan_opts
+const struct m_sub_options vulkan_conf = {
+    .opts = (const struct m_option[]) {
+        OPT_STRING_VALIDATE("vulkan-device", device, 0, vk_validate_dev),
+        OPT_CHOICE("vulkan-swap-mode", swap_mode, 0,
+                   ({"auto",        SWAP_AUTO},
+                   {"fifo",         SWAP_FIFO},
+                   {"fifo-relaxed", SWAP_FIFO_RELAXED},
+                   {"mailbox",      SWAP_MAILBOX},
+                   {"immediate",    SWAP_IMMEDIATE})),
+        OPT_INTRANGE("vulkan-queue-count", dev_opts.queue_count, 0, 1,
+                     MPVK_MAX_QUEUES, OPTDEF_INT(1)),
+        {0}
+    },
+    .size = sizeof(struct vulkan_opts)
+};
+
+struct priv {
+    struct mpvk_ctx *vk;
+    struct vulkan_opts *opts;
+    // Swapchain metadata:
+    int w, h;                 // current size
+    VkSwapchainCreateInfoKHR protoInfo; // partially filled-in prototype
+    VkSwapchainKHR swapchain;
+    VkSwapchainKHR old_swapchain;
+    int frames_in_flight;
+    // state of the images:
+    struct ra_tex **images;   // ra_tex wrappers for the vkimages
+    int num_images;           // size of images
+    VkSemaphore *acquired;    // pool of semaphores used to synchronize images
+    int num_acquired;         // size of this pool
+    int idx_acquired;         // index of next free semaphore within this pool
+    int last_imgidx;          // the image index last acquired (for submit)
+};
+
+static bool update_swapchain_info(struct priv *p,
+                                  VkSwapchainCreateInfoKHR *info)
+{
+    struct mpvk_ctx *vk = p->vk;
+
+    // Query the supported capabilities and update this struct as needed
+    VkSurfaceCapabilitiesKHR caps;
+    VK(vkGetPhysicalDeviceSurfaceCapabilitiesKHR(vk->physd, vk->surf, &caps));
+
+    // Sorted by preference
+    static const VkCompositeAlphaFlagBitsKHR alphaModes[] = {
+        VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR,
+        VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR,
+    };
+
+    for (int i = 0; i < MP_ARRAY_SIZE(alphaModes); i++) {
+        if (caps.supportedCompositeAlpha & alphaModes[i]) {
+            info->compositeAlpha = alphaModes[i];
+            break;
+        }
+    }
+
+    if (!info->compositeAlpha) {
+        MP_ERR(vk, "Failed picking alpha compositing mode (caps: 0x%x)\n",
+               caps.supportedCompositeAlpha);
+        goto error;
+    }
+
+    static const VkSurfaceTransformFlagBitsKHR rotModes[] = {
+        VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR,
+        VK_SURFACE_TRANSFORM_INHERIT_BIT_KHR,
+    };
+
+    for (int i = 0; i < MP_ARRAY_SIZE(rotModes); i++) {
+        if (caps.supportedTransforms & rotModes[i]) {
+            info->preTransform = rotModes[i];
+            break;
+        }
+    }
+
+    if (!info->preTransform) {
+        MP_ERR(vk, "Failed picking surface transform mode (caps: 0x%x)\n",
+               caps.supportedTransforms);
+        goto error;
+    }
+
+    // Image count as required
+    MP_VERBOSE(vk, "Requested image count: %d (min %d max %d)\n",
+               (int)info->minImageCount, (int)caps.minImageCount,
+               (int)caps.maxImageCount);
+
+    info->minImageCount = MPMAX(info->minImageCount, caps.minImageCount);
+    if (caps.maxImageCount)
+        info->minImageCount = MPMIN(info->minImageCount, caps.maxImageCount);
+
+    // Check the extent against the allowed parameters
+    if (caps.currentExtent.width != info->imageExtent.width &&
+        caps.currentExtent.width != 0xFFFFFFFF)
+    {
+        MP_WARN(vk, "Requested width %d does not match current width %d\n",
+                (int)info->imageExtent.width, (int)caps.currentExtent.width);
+        info->imageExtent.width = caps.currentExtent.width;
+    }
+
+    if (caps.currentExtent.height != info->imageExtent.height &&
+        caps.currentExtent.height != 0xFFFFFFFF)
+    {
+        MP_WARN(vk, "Requested height %d does not match current height %d\n",
+                (int)info->imageExtent.height, (int)caps.currentExtent.height);
+        info->imageExtent.height = caps.currentExtent.height;
+    }
+
+    if (caps.minImageExtent.width  > info->imageExtent.width ||
+        caps.minImageExtent.height > info->imageExtent.height)
+    {
+        MP_ERR(vk, "Requested size %dx%d smaller than device minimum %d%d\n",
+               (int)info->imageExtent.width, (int)info->imageExtent.height,
+               (int)caps.minImageExtent.width, (int)caps.minImageExtent.height);
+        goto error;
+    }
+
+    if (caps.maxImageExtent.width  < info->imageExtent.width ||
+        caps.maxImageExtent.height < info->imageExtent.height)
+    {
+        MP_ERR(vk, "Requested size %dx%d larger than device maximum %d%d\n",
+               (int)info->imageExtent.width, (int)info->imageExtent.height,
+               (int)caps.maxImageExtent.width, (int)caps.maxImageExtent.height);
+        goto error;
+    }
+
+    // We just request whatever usage we can, and let the ra_vk decide what
+    // ra_tex_params that translates to. This makes the images as flexible
+    // as possible.
+    info->imageUsage = caps.supportedUsageFlags;
+    return true;
+
+error:
+    return false;
+}
+
+void ra_vk_ctx_uninit(struct ra_ctx *ctx)
+{
+    if (ctx->ra) {
+        struct priv *p = ctx->swapchain->priv;
+        struct mpvk_ctx *vk = p->vk;
+
+        mpvk_pool_wait_idle(vk, vk->pool);
+
+        for (int i = 0; i < p->num_images; i++)
+            ra_tex_free(ctx->ra, &p->images[i]);
+        for (int i = 0; i < p->num_acquired; i++)
+            vkDestroySemaphore(vk->dev, p->acquired[i], MPVK_ALLOCATOR);
+
+        vkDestroySwapchainKHR(vk->dev, p->swapchain, MPVK_ALLOCATOR);
+
+        talloc_free(p->images);
+        talloc_free(p->acquired);
+        ctx->ra->fns->destroy(ctx->ra);
+        ctx->ra = NULL;
+    }
+
+    talloc_free(ctx->swapchain);
+    ctx->swapchain = NULL;
+}
+
+static const struct ra_swapchain_fns vulkan_swapchain;
+
+bool ra_vk_ctx_init(struct ra_ctx *ctx, struct mpvk_ctx *vk,
+                    VkPresentModeKHR preferred_mode)
+{
+    struct ra_swapchain *sw = ctx->swapchain = talloc_zero(NULL, struct ra_swapchain);
+    sw->ctx = ctx;
+    sw->fns = &vulkan_swapchain;
+
+    struct priv *p = sw->priv = talloc_zero(sw, struct priv);
+    p->vk = vk;
+    p->opts = mp_get_config_group(p, ctx->global, &vulkan_conf);
+
+    if (!mpvk_find_phys_device(vk, p->opts->device, ctx->opts.allow_sw))
+        goto error;
+    if (!mpvk_pick_surface_format(vk))
+        goto error;
+    if (!mpvk_device_init(vk, p->opts->dev_opts))
+        goto error;
+
+    ctx->ra = ra_create_vk(vk, ctx->log);
+    if (!ctx->ra)
+        goto error;
+
+    static const VkPresentModeKHR present_modes[SWAP_COUNT] = {
+        [SWAP_FIFO]         = VK_PRESENT_MODE_FIFO_KHR,
+        [SWAP_FIFO_RELAXED] = VK_PRESENT_MODE_FIFO_RELAXED_KHR,
+        [SWAP_MAILBOX]      = VK_PRESENT_MODE_MAILBOX_KHR,
+        [SWAP_IMMEDIATE]    = VK_PRESENT_MODE_IMMEDIATE_KHR,
+    };
+
+    p->protoInfo = (VkSwapchainCreateInfoKHR) {
+        .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
+        .surface = vk->surf,
+        .imageFormat = vk->surf_format.format,
+        .imageColorSpace = vk->surf_format.colorSpace,
+        .imageArrayLayers = 1, // non-stereoscopic
+        .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE,
+        .minImageCount = ctx->opts.swapchain_depth + 1, // +1 for FB
+        .presentMode = p->opts->swap_mode ? present_modes[p->opts->swap_mode]
+                                          : preferred_mode,
+        .clipped = true,
+    };
+
+    // Make sure the swapchain present mode is supported
+    int num_modes;
+    VK(vkGetPhysicalDeviceSurfacePresentModesKHR(vk->physd, vk->surf,
+                                                 &num_modes, NULL));
+    VkPresentModeKHR *modes = talloc_array(NULL, VkPresentModeKHR, num_modes);
+    VK(vkGetPhysicalDeviceSurfacePresentModesKHR(vk->physd, vk->surf,
+                                                 &num_modes, modes));
+    bool supported = false;
+    for (int i = 0; i < num_modes; i++)
+        supported |= (modes[i] == p->protoInfo.presentMode);
+    talloc_free(modes);
+
+    if (!supported) {
+        MP_ERR(ctx, "Requested swap mode unsupported by this device!\n");
+        goto error;
+    }
+
+    return true;
+
+error:
+    ra_vk_ctx_uninit(ctx);
+    return false;
+}
+
+static void destroy_swapchain(struct mpvk_ctx *vk, struct priv *p)
+{
+    assert(p->old_swapchain);
+    vkDestroySwapchainKHR(vk->dev, p->old_swapchain, MPVK_ALLOCATOR);
+    p->old_swapchain = NULL;
+}
+
+bool ra_vk_ctx_resize(struct ra_swapchain *sw, int w, int h)
+{
+    struct priv *p = sw->priv;
+    if (w == p->w && h == p->h)
+        return true;
+
+    struct ra *ra = sw->ctx->ra;
+    struct mpvk_ctx *vk = p->vk;
+    VkImage *vkimages = NULL;
+
+    // It's invalid to trigger another swapchain recreation while there's
+    // more than one swapchain already active, so we need to flush any pending
+    // asynchronous swapchain release operations that may be ongoing.
+    while (p->old_swapchain)
+        mpvk_dev_poll_cmds(vk, 100000); // 100μs
+
+    VkSwapchainCreateInfoKHR sinfo = p->protoInfo;
+    sinfo.imageExtent  = (VkExtent2D){ w, h };
+    sinfo.oldSwapchain = p->swapchain;
+
+    if (!update_swapchain_info(p, &sinfo))
+        goto error;
+
+    VK(vkCreateSwapchainKHR(vk->dev, &sinfo, MPVK_ALLOCATOR, &p->swapchain));
+    p->w = w;
+    p->h = h;
+
+    // Freeing the old swapchain while it's still in use is an error, so do
+    // it asynchronously once the device is idle.
+    if (sinfo.oldSwapchain) {
+        p->old_swapchain = sinfo.oldSwapchain;
+        vk_dev_callback(vk, (vk_cb) destroy_swapchain, vk, p);
+    }
+
+    // Get the new swapchain images
+    int num;
+    VK(vkGetSwapchainImagesKHR(vk->dev, p->swapchain, &num, NULL));
+    vkimages = talloc_array(NULL, VkImage, num);
+    VK(vkGetSwapchainImagesKHR(vk->dev, p->swapchain, &num, vkimages));
+
+    // If needed, allocate some more semaphores
+    while (num > p->num_acquired) {
+        VkSemaphore sem;
+        static const VkSemaphoreCreateInfo seminfo = {
+            .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+        };
+        VK(vkCreateSemaphore(vk->dev, &seminfo, MPVK_ALLOCATOR, &sem));
+        MP_TARRAY_APPEND(NULL, p->acquired, p->num_acquired, sem);
+    }
+
+    // Recreate the ra_tex wrappers
+    for (int i = 0; i < p->num_images; i++)
+        ra_tex_free(ra, &p->images[i]);
+
+    p->num_images = num;
+    MP_TARRAY_GROW(NULL, p->images, p->num_images);
+    for (int i = 0; i < num; i++) {
+        p->images[i] = ra_vk_wrap_swapchain_img(ra, vkimages[i], sinfo);
+        if (!p->images[i])
+            goto error;
+    }
+
+    talloc_free(vkimages);
+    return true;
+
+error:
+    talloc_free(vkimages);
+    vkDestroySwapchainKHR(vk->dev, p->swapchain, MPVK_ALLOCATOR);
+    p->swapchain = NULL;
+    return false;
+}
+
+static int color_depth(struct ra_swapchain *sw)
+{
+    struct priv *p = sw->priv;
+    int bits = 0;
+
+    if (!p->num_images)
+        return bits;
+
+    // The channel with the most bits is probably the most authoritative about
+    // the actual color information (consider e.g. a2bgr10). Slight downside
+    // in that it results in rounding r/b for e.g. rgb565, but we don't pick
+    // surfaces with fewer than 8 bits anyway.
+    const struct ra_format *fmt = p->images[0]->params.format;
+    for (int i = 0; i < fmt->num_components; i++) {
+        int depth = fmt->component_depth[i];
+        bits = MPMAX(bits, depth ? depth : fmt->component_size[i]);
+    }
+
+    return bits;
+}
+
+static bool start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo)
+{
+    struct priv *p = sw->priv;
+    struct mpvk_ctx *vk = p->vk;
+    if (!p->swapchain)
+        goto error;
+
+    uint32_t imgidx = 0;
+    MP_TRACE(vk, "vkAcquireNextImageKHR\n");
+    VkResult res = vkAcquireNextImageKHR(vk->dev, p->swapchain, UINT64_MAX,
+                                         p->acquired[p->idx_acquired], NULL,
+                                         &imgidx);
+    if (res == VK_ERROR_OUT_OF_DATE_KHR)
+        goto error; // just return in this case
+    VK_ASSERT(res, "Failed acquiring swapchain image");
+
+    p->last_imgidx = imgidx;
+    *out_fbo = (struct ra_fbo) {
+        .tex = p->images[imgidx],
+        .flip = false,
+    };
+    return true;
+
+error:
+    return false;
+}
+
+static bool submit_frame(struct ra_swapchain *sw, const struct vo_frame *frame)
+{
+    struct priv *p = sw->priv;
+    struct ra *ra = sw->ctx->ra;
+    struct mpvk_ctx *vk = p->vk;
+    if (!p->swapchain)
+        goto error;
+
+    VkSemaphore acquired = p->acquired[p->idx_acquired++];
+    p->idx_acquired %= p->num_acquired;
+
+    VkSemaphore done;
+    if (!ra_vk_submit(ra, p->images[p->last_imgidx], acquired, &done,
+                      &p->frames_in_flight))
+        goto error;
+
+    // For some reason, nvidia absolutely shits itself when presenting from a
+    // full queue - so advance all of the cmdpool indices first and then do the
+    // present on an "empty" queue
+    vk_cmd_cycle_queues(vk);
+    struct vk_cmdpool *pool = vk->pool;
+    VkQueue queue = pool->queues[pool->qindex];
+
+    VkPresentInfoKHR pinfo = {
+        .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
+        .waitSemaphoreCount = 1,
+        .pWaitSemaphores = &done,
+        .swapchainCount = 1,
+        .pSwapchains = &p->swapchain,
+        .pImageIndices = &p->last_imgidx,
+    };
+
+    VK(vkQueuePresentKHR(queue, &pinfo));
+    return true;
+
+error:
+    return false;
+}
+
+static void swap_buffers(struct ra_swapchain *sw)
+{
+    struct priv *p = sw->priv;
+
+    while (p->frames_in_flight >= sw->ctx->opts.swapchain_depth)
+        mpvk_dev_poll_cmds(p->vk, 100000); // 100μs
+}
+
+static const struct ra_swapchain_fns vulkan_swapchain = {
+    // .screenshot is not currently supported
+    .color_depth   = color_depth,
+    .start_frame   = start_frame,
+    .submit_frame  = submit_frame,
+    .swap_buffers  = swap_buffers,
+};
diff --git a/video/out/vulkan/context.h b/video/out/vulkan/context.h
new file mode 100644
index 0000000000..3f630bc10e
--- /dev/null
+++ b/video/out/vulkan/context.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include "video/out/gpu/context.h"
+#include "common.h"
+
+// Helpers for ra_ctx based on ra_vk. These initialize ctx->ra and ctx->swchain.
+void ra_vk_ctx_uninit(struct ra_ctx *ctx);
+bool ra_vk_ctx_init(struct ra_ctx *ctx, struct mpvk_ctx *vk,
+                    VkPresentModeKHR preferred_mode);
+bool ra_vk_ctx_resize(struct ra_swapchain *sw, int w, int h);
diff --git a/video/out/vulkan/context_xlib.c b/video/out/vulkan/context_xlib.c
new file mode 100644
index 0000000000..2611fbb706
--- /dev/null
+++ b/video/out/vulkan/context_xlib.c
@@ -0,0 +1,116 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "video/out/gpu/context.h"
+#include "video/out/x11_common.h"
+
+#include "common.h"
+#include "context.h"
+#include "utils.h"
+
+struct priv {
+    struct mpvk_ctx vk;
+};
+
+static void xlib_uninit(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    ra_vk_ctx_uninit(ctx);
+    mpvk_uninit(&p->vk);
+    vo_x11_uninit(ctx->vo);
+}
+
+static bool xlib_init(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv = talloc_zero(ctx, struct priv);
+    struct mpvk_ctx *vk = &p->vk;
+    int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR;
+
+    if (!vo_x11_init(ctx->vo))
+        goto error;
+
+    if (!vo_x11_create_vo_window(ctx->vo, NULL, "mpvk"))
+        goto error;
+
+    if (!mpvk_instance_init(vk, ctx->log, ctx->opts.debug))
+        goto error;
+
+    VkXlibSurfaceCreateInfoKHR xinfo = {
+         .sType = VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR,
+         .dpy = ctx->vo->x11->display,
+         .window = ctx->vo->x11->window,
+    };
+
+    VkResult res = vkCreateXlibSurfaceKHR(vk->inst, &xinfo, MPVK_ALLOCATOR,
+                                          &vk->surf);
+    if (res != VK_SUCCESS) {
+        MP_MSG(ctx, msgl, "Failed creating Xlib surface: %s\n", vk_err(res));
+        goto error;
+    }
+
+    if (!ra_vk_ctx_init(ctx, vk, VK_PRESENT_MODE_FIFO_KHR))
+        goto error;
+
+    return true;
+
+error:
+    xlib_uninit(ctx);
+    return false;
+}
+
+static bool resize(struct ra_ctx *ctx)
+{
+    return ra_vk_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight);
+}
+
+static bool xlib_reconfig(struct ra_ctx *ctx)
+{
+    vo_x11_config_vo_window(ctx->vo);
+    return resize(ctx);
+}
+
+static int xlib_control(struct ra_ctx *ctx, int *events, int request, void *arg)
+{
+    int ret = vo_x11_control(ctx->vo, events, request, arg);
+    if (*events & VO_EVENT_RESIZE) {
+        if (!resize(ctx))
+            return VO_ERROR;
+    }
+    return ret;
+}
+
+static void xlib_wakeup(struct ra_ctx *ctx)
+{
+    vo_x11_wakeup(ctx->vo);
+}
+
+static void xlib_wait_events(struct ra_ctx *ctx, int64_t until_time_us)
+{
+    vo_x11_wait_events(ctx->vo, until_time_us);
+}
+
+const struct ra_ctx_fns ra_ctx_vulkan_xlib = {
+    .type           = "vulkan",
+    .name           = "x11",
+    .reconfig       = xlib_reconfig,
+    .control        = xlib_control,
+    .wakeup         = xlib_wakeup,
+    .wait_events    = xlib_wait_events,
+    .init           = xlib_init,
+    .uninit         = xlib_uninit,
+};
diff --git a/video/out/vulkan/formats.c b/video/out/vulkan/formats.c
new file mode 100644
index 0000000000..b44bead99c
--- /dev/null
+++ b/video/out/vulkan/formats.c
@@ -0,0 +1,55 @@
+#include "formats.h"
+
+const struct vk_format vk_formats[] = {
+    // Regular, byte-aligned integer formats
+    {"r8",       VK_FORMAT_R8_UNORM,                  1,  1,   {8             }, RA_CTYPE_UNORM },
+    {"rg8",      VK_FORMAT_R8G8_UNORM,                2,  2,   {8,  8         }, RA_CTYPE_UNORM },
+    {"rgb8",     VK_FORMAT_R8G8B8_UNORM,              3,  3,   {8,  8,  8     }, RA_CTYPE_UNORM },
+    {"rgba8",    VK_FORMAT_R8G8B8A8_UNORM,            4,  4,   {8,  8,  8,  8 }, RA_CTYPE_UNORM },
+    {"r16",      VK_FORMAT_R16_UNORM,                 1,  2,   {16            }, RA_CTYPE_UNORM },
+    {"rg16",     VK_FORMAT_R16G16_UNORM,              2,  4,   {16, 16        }, RA_CTYPE_UNORM },
+    {"rgb16",    VK_FORMAT_R16G16B16_UNORM,           3,  6,   {16, 16, 16    }, RA_CTYPE_UNORM },
+    {"rgba16",   VK_FORMAT_R16G16B16A16_UNORM,        4,  8,   {16, 16, 16, 16}, RA_CTYPE_UNORM },
+
+    // Special, integer-only formats
+    {"r32ui",    VK_FORMAT_R32_UINT,                  1,  4,   {32            }, RA_CTYPE_UINT },
+    {"rg32ui",   VK_FORMAT_R32G32_UINT,               2,  8,   {32, 32        }, RA_CTYPE_UINT },
+    {"rgb32ui",  VK_FORMAT_R32G32B32_UINT,            3,  12,  {32, 32, 32    }, RA_CTYPE_UINT },
+    {"rgba32ui", VK_FORMAT_R32G32B32A32_UINT,         4,  16,  {32, 32, 32, 32}, RA_CTYPE_UINT },
+    {"r64ui",    VK_FORMAT_R64_UINT,                  1,  8,   {64            }, RA_CTYPE_UINT },
+    {"rg64ui",   VK_FORMAT_R64G64_UINT,               2,  16,  {64, 64        }, RA_CTYPE_UINT },
+    {"rgb64ui",  VK_FORMAT_R64G64B64_UINT,            3,  24,  {64, 64, 64    }, RA_CTYPE_UINT },
+    {"rgba64ui", VK_FORMAT_R64G64B64A64_UINT,         4,  32,  {64, 64, 64, 64}, RA_CTYPE_UINT },
+
+    // Packed integer formats
+    {"rg4",      VK_FORMAT_R4G4_UNORM_PACK8,          2,  1,   {4,  4         }, RA_CTYPE_UNORM },
+    {"rgba4",    VK_FORMAT_R4G4B4A4_UNORM_PACK16,     4,  2,   {4,  4,  4,  4 }, RA_CTYPE_UNORM },
+    {"rgb565",   VK_FORMAT_R5G6B5_UNORM_PACK16,       3,  2,   {5,  6,  5     }, RA_CTYPE_UNORM },
+    {"rgb565a1", VK_FORMAT_R5G5B5A1_UNORM_PACK16,     4,  2,   {5,  5,  5,  1 }, RA_CTYPE_UNORM },
+
+    // Float formats (native formats, hf = half float, df = double float)
+    {"r16hf",    VK_FORMAT_R16_SFLOAT,                1,  2,   {16            }, RA_CTYPE_FLOAT },
+    {"rg16hf",   VK_FORMAT_R16G16_SFLOAT,             2,  4,   {16, 16        }, RA_CTYPE_FLOAT },
+    {"rgb16hf",  VK_FORMAT_R16G16B16_SFLOAT,          3,  6,   {16, 16, 16    }, RA_CTYPE_FLOAT },
+    {"rgba16hf", VK_FORMAT_R16G16B16A16_SFLOAT,       4,  8,   {16, 16, 16, 16}, RA_CTYPE_FLOAT },
+    {"r32f",     VK_FORMAT_R32_SFLOAT,                1,  4,   {32            }, RA_CTYPE_FLOAT },
+    {"rg32f",    VK_FORMAT_R32G32_SFLOAT,             2,  8,   {32, 32        }, RA_CTYPE_FLOAT },
+    {"rgb32f",   VK_FORMAT_R32G32B32_SFLOAT,          3, 12,   {32, 32, 32    }, RA_CTYPE_FLOAT },
+    {"rgba32f",  VK_FORMAT_R32G32B32A32_SFLOAT,       4, 16,   {32, 32, 32, 32}, RA_CTYPE_FLOAT },
+    {"r64df",    VK_FORMAT_R64_SFLOAT,                1,  8,   {64            }, RA_CTYPE_FLOAT },
+    {"rg64df",   VK_FORMAT_R64G64_SFLOAT,             2, 16,   {64, 64        }, RA_CTYPE_FLOAT },
+    {"rgb64df",  VK_FORMAT_R64G64B64_SFLOAT,          3, 24,   {64, 64, 64    }, RA_CTYPE_FLOAT },
+    {"rgba64df", VK_FORMAT_R64G64B64A64_SFLOAT,       4, 32,   {64, 64, 64, 64}, RA_CTYPE_FLOAT },
+
+    // "Swapped" component order images
+    {"bgr8",     VK_FORMAT_B8G8R8_UNORM,              3,  3,   {8,  8,  8     }, RA_CTYPE_UNORM, true },
+    {"bgra8",    VK_FORMAT_B8G8R8A8_UNORM,            4,  4,   {8,  8,  8,  8 }, RA_CTYPE_UNORM, true },
+    {"bgra4",    VK_FORMAT_B4G4R4A4_UNORM_PACK16,     4,  2,   {4,  4,  4,  4 }, RA_CTYPE_UNORM, true },
+    {"bgr565",   VK_FORMAT_B5G6R5_UNORM_PACK16,       3,  2,   {5,  6,  5     }, RA_CTYPE_UNORM, true },
+    {"bgr565a1", VK_FORMAT_B5G5R5A1_UNORM_PACK16,     4,  2,   {5,  5,  5,  1 }, RA_CTYPE_UNORM, true },
+    {"a1rgb5",   VK_FORMAT_A1R5G5B5_UNORM_PACK16,     4,  2,   {1,  5,  5,  5 }, RA_CTYPE_UNORM, true },
+    {"a2rgb10",  VK_FORMAT_A2R10G10B10_UNORM_PACK32,  4,  4,   {2,  10, 10, 10}, RA_CTYPE_UNORM, true },
+    {"a2bgr10",  VK_FORMAT_A2B10G10R10_UNORM_PACK32,  4,  4,   {2,  10, 10, 10}, RA_CTYPE_UNORM, true },
+    {"abgr8",    VK_FORMAT_A8B8G8R8_UNORM_PACK32,     4,  4,   {8,  8,  8,  8 }, RA_CTYPE_UNORM, true },
+    {0}
+};
diff --git a/video/out/vulkan/formats.h b/video/out/vulkan/formats.h
new file mode 100644
index 0000000000..22782a6958
--- /dev/null
+++ b/video/out/vulkan/formats.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include "video/out/gpu/ra.h"
+#include "common.h"
+
+struct vk_format {
+    const char *name;
+    VkFormat iformat;    // vulkan format enum
+    int components;      // how many components are there
+    int bytes;           // how many bytes is a texel
+    int bits[4];         // how many bits per component
+    enum ra_ctype ctype; // format representation type
+    bool fucked_order;   // used for formats which are not simply rgba
+};
+
+extern const struct vk_format vk_formats[];
diff --git a/video/out/vulkan/malloc.c b/video/out/vulkan/malloc.c
new file mode 100644
index 0000000000..31fcd36ddb
--- /dev/null
+++ b/video/out/vulkan/malloc.c
@@ -0,0 +1,424 @@
+#include "malloc.h"
+#include "utils.h"
+#include "osdep/timer.h"
+
+// Controls the multiplication factor for new slab allocations. The new slab
+// will always be allocated such that the size of the slab is this factor times
+// the previous slab. Higher values make it grow faster.
+#define MPVK_HEAP_SLAB_GROWTH_RATE 4
+
+// Controls the minimum slab size, to reduce the frequency at which very small
+// slabs would need to get allocated when allocating the first few buffers.
+// (Default: 1 MB)
+#define MPVK_HEAP_MINIMUM_SLAB_SIZE (1 << 20)
+
+// Controls the maximum slab size, to reduce the effect of unbounded slab
+// growth exhausting memory. If the application needs a single allocation
+// that's bigger than this value, it will be allocated directly from the
+// device. (Default: 512 MB)
+#define MPVK_HEAP_MAXIMUM_SLAB_SIZE (1 << 29)
+
+// Controls the minimum free region size, to reduce thrashing the free space
+// map with lots of small buffers during uninit. (Default: 1 KB)
+#define MPVK_HEAP_MINIMUM_REGION_SIZE (1 << 10)
+
+// Represents a region of available memory
+struct vk_region {
+    size_t start; // first offset in region
+    size_t end;   // first offset *not* in region
+};
+
+static inline size_t region_len(struct vk_region r)
+{
+    return r.end - r.start;
+}
+
+// A single slab represents a contiguous region of allocated memory. Actual
+// allocations are served as slices of this. Slabs are organized into linked
+// lists, which represent individual heaps.
+struct vk_slab {
+    VkDeviceMemory mem;   // underlying device allocation
+    size_t size;          // total size of `slab`
+    size_t used;          // number of bytes actually in use (for GC accounting)
+    bool dedicated;       // slab is allocated specifically for one object
+    // free space map: a sorted list of memory regions that are available
+    struct vk_region *regions;
+    int num_regions;
+    // optional, depends on the memory type:
+    VkBuffer buffer;      // buffer spanning the entire slab
+    void *data;           // mapped memory corresponding to `mem`
+};
+
+// Represents a single memory heap. We keep track of a vk_heap for each
+// combination of buffer type and memory selection parameters. This shouldn't
+// actually be that many in practice, because some combinations simply never
+// occur, and others will generally be the same for the same objects.
+struct vk_heap {
+    VkBufferUsageFlagBits usage;    // the buffer usage type (or 0)
+    VkMemoryPropertyFlagBits flags; // the memory type flags (or 0)
+    uint32_t typeBits;              // the memory type index requirements (or 0)
+    struct vk_slab **slabs;         // array of slabs sorted by size
+    int num_slabs;
+};
+
+// The overall state of the allocator, which keeps track of a vk_heap for each
+// memory type.
+struct vk_malloc {
+    VkPhysicalDeviceMemoryProperties props;
+    struct vk_heap *heaps;
+    int num_heaps;
+};
+
+static void slab_free(struct mpvk_ctx *vk, struct vk_slab *slab)
+{
+    if (!slab)
+        return;
+
+    assert(slab->used == 0);
+
+    int64_t start = mp_time_us();
+    vkDestroyBuffer(vk->dev, slab->buffer, MPVK_ALLOCATOR);
+    // also implicitly unmaps the memory if needed
+    vkFreeMemory(vk->dev, slab->mem, MPVK_ALLOCATOR);
+    int64_t stop = mp_time_us();
+
+    MP_VERBOSE(vk, "Freeing slab of size %zu took %lld μs.\n",
+               slab->size, (long long)(stop - start));
+
+    talloc_free(slab);
+}
+
+static bool find_best_memtype(struct mpvk_ctx *vk, uint32_t typeBits,
+                              VkMemoryPropertyFlagBits flags,
+                              VkMemoryType *out_type, int *out_index)
+{
+    struct vk_malloc *ma = vk->alloc;
+
+    // The vulkan spec requires memory types to be sorted in the "optimal"
+    // order, so the first matching type we find will be the best/fastest one.
+    for (int i = 0; i < ma->props.memoryTypeCount; i++) {
+        // The memory type flags must include our properties
+        if ((ma->props.memoryTypes[i].propertyFlags & flags) != flags)
+            continue;
+        // The memory type must be supported by the requirements (bitfield)
+        if (typeBits && !(typeBits & (1 << i)))
+            continue;
+        *out_type = ma->props.memoryTypes[i];
+        *out_index = i;
+        return true;
+    }
+
+    MP_ERR(vk, "Found no memory type matching property flags 0x%x and type "
+               "bits 0x%x!\n", flags, (unsigned)typeBits);
+    return false;
+}
+
+static struct vk_slab *slab_alloc(struct mpvk_ctx *vk, struct vk_heap *heap,
+                                  size_t size)
+{
+    struct vk_slab *slab = talloc_ptrtype(NULL, slab);
+    *slab = (struct vk_slab) {
+        .size = size,
+    };
+