summaryrefslogtreecommitdiffstats
path: root/video/out/vulkan/ra_vk.c
diff options
context:
space:
mode:
Diffstat (limited to 'video/out/vulkan/ra_vk.c')
-rw-r--r--video/out/vulkan/ra_vk.c1590
1 files changed, 1590 insertions, 0 deletions
diff --git a/video/out/vulkan/ra_vk.c b/video/out/vulkan/ra_vk.c
new file mode 100644
index 0000000000..ce0cbc66e9
--- /dev/null
+++ b/video/out/vulkan/ra_vk.c
@@ -0,0 +1,1590 @@
+#include "ra_vk.h"
+#include "malloc.h"
+#include "video/out/opengl/utils.h"
+
+static struct ra_fns ra_fns_vk;
+
+// For ra.priv
+struct ra_vk {
+ struct mpvk_ctx *vk;
+ struct ra_tex *clear_tex; // stupid hack for clear()
+ struct vk_cmd *cmd; // currently recording cmd
+};
+
+struct mpvk_ctx *ra_vk_get(struct ra *ra)
+{
+ if (ra->fns != &ra_fns_vk)
+ return NULL;
+
+ struct ra_vk *p = ra->priv;
+ return p->vk;
+}
+
+// Returns a command buffer, or NULL on error
+static struct vk_cmd *vk_require_cmd(struct ra *ra)
+{
+ struct ra_vk *p = ra->priv;
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+
+ if (!p->cmd)
+ p->cmd = vk_cmd_begin(vk, vk->pool);
+
+ return p->cmd;
+}
+
+// Note: This technically follows the flush() API, but we don't need
+// to expose that (and in fact, it's a bad idea) since we control flushing
+// behavior with ra_vk_present_frame already.
+static bool vk_flush(struct ra *ra, VkSemaphore *done)
+{
+ struct ra_vk *p = ra->priv;
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+
+ if (p->cmd) {
+ if (!vk_cmd_submit(vk, p->cmd, done))
+ return false;
+ p->cmd = NULL;
+ }
+
+ return true;
+}
+
+// The callback's *priv will always be set to `ra`
+static void vk_callback(struct ra *ra, vk_cb callback, void *arg)
+{
+ struct ra_vk *p = ra->priv;
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+
+ if (p->cmd) {
+ vk_cmd_callback(p->cmd, callback, ra, arg);
+ } else {
+ vk_dev_callback(vk, callback, ra, arg);
+ }
+}
+
+#define MAKE_LAZY_DESTRUCTOR(fun, argtype) \
+ static void fun##_lazy(struct ra *ra, argtype *arg) { \
+ vk_callback(ra, (vk_cb) fun, arg); \
+ }
+
+static void vk_destroy_ra(struct ra *ra)
+{
+ struct ra_vk *p = ra->priv;
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+
+ vk_flush(ra, NULL);
+ mpvk_dev_wait_idle(vk);
+ ra_tex_free(ra, &p->clear_tex);
+
+ talloc_free(ra);
+}
+
+static bool vk_setup_formats(struct ra *ra)
+{
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+
+ for (const struct vk_format *vk_fmt = vk_formats; vk_fmt->name; vk_fmt++) {
+ VkFormatProperties prop;
+ vkGetPhysicalDeviceFormatProperties(vk->physd, vk_fmt->iformat, &prop);
+
+ // As a bare minimum, we need to sample from an allocated image
+ VkFormatFeatureFlags flags = prop.optimalTilingFeatures;
+ if (!(flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT))
+ continue;
+
+ VkFormatFeatureFlags linear_bits, render_bits;
+ linear_bits = VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
+ render_bits = VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
+ VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
+
+ struct ra_format *fmt = talloc_zero(ra, struct ra_format);
+ *fmt = (struct ra_format) {
+ .name = vk_fmt->name,
+ .priv = (void *)vk_fmt,
+ .ctype = vk_fmt->ctype,
+ .ordered = !vk_fmt->fucked_order,
+ .num_components = vk_fmt->components,
+ .pixel_size = vk_fmt->bytes,
+ .linear_filter = !!(flags & linear_bits),
+ .renderable = !!(flags & render_bits),
+ };
+
+ for (int i = 0; i < 4; i++)
+ fmt->component_size[i] = fmt->component_depth[i] = vk_fmt->bits[i];
+
+ MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, fmt);
+ }
+
+ // Populate some other capabilities related to formats while we're at it
+ VkImageType imgType[3] = {
+ VK_IMAGE_TYPE_1D,
+ VK_IMAGE_TYPE_2D,
+ VK_IMAGE_TYPE_3D
+ };
+
+ // R8_UNORM is supported on literally every single vulkan implementation
+ const VkFormat testfmt = VK_FORMAT_R8_UNORM;
+
+ for (int d = 0; d < 3; d++) {
+ VkImageFormatProperties iprop;
+ VkResult res = vkGetPhysicalDeviceImageFormatProperties(vk->physd,
+ testfmt, imgType[d], VK_IMAGE_TILING_OPTIMAL,
+ VK_IMAGE_USAGE_SAMPLED_BIT, 0, &iprop);
+
+ switch (imgType[d]) {
+ case VK_IMAGE_TYPE_1D:
+ if (res == VK_SUCCESS)
+ ra->caps |= RA_CAP_TEX_1D;
+ break;
+ case VK_IMAGE_TYPE_2D:
+ // 2D formats must be supported by RA, so ensure this is the case
+ VK_ASSERT(res, "Querying 2D format limits");
+ ra->max_texture_wh = MPMIN(iprop.maxExtent.width, iprop.maxExtent.height);
+ break;
+ case VK_IMAGE_TYPE_3D:
+ if (res == VK_SUCCESS)
+ ra->caps |= RA_CAP_TEX_3D;
+ break;
+ }
+ }
+
+ // RA_CAP_BLIT implies both blitting between images as well as blitting
+ // directly to the swapchain image, so check for all three operations
+ bool blittable = true;
+ VkFormatProperties prop;
+ vkGetPhysicalDeviceFormatProperties(vk->physd, testfmt, &prop);
+ if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_SRC_BIT))
+ blittable = false;
+ if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT))
+ blittable = false;
+
+ vkGetPhysicalDeviceFormatProperties(vk->physd, vk->surf_format.format, &prop);
+ if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT))
+ blittable = false;
+
+ if (blittable)
+ ra->caps |= RA_CAP_BLIT;
+
+ return true;
+
+error:
+ return false;
+}
+
+static struct ra_fns ra_fns_vk;
+
+struct ra *ra_create_vk(struct mpvk_ctx *vk, struct mp_log *log)
+{
+ assert(vk->dev);
+ assert(vk->alloc);
+
+ struct ra *ra = talloc_zero(NULL, struct ra);
+ ra->log = log;
+ ra->fns = &ra_fns_vk;
+
+ struct ra_vk *p = ra->priv = talloc_zero(ra, struct ra_vk);
+ p->vk = vk;
+
+ // There's no way to query the supported GLSL version from VK_NV_glsl_shader
+ // (thanks nvidia), so just pick the GL version that modern nvidia devices
+ // support..
+ ra->glsl_version = 450;
+ ra->glsl_vulkan = true;
+ ra->max_shmem = vk->limits.maxComputeSharedMemorySize;
+ ra->caps = RA_CAP_NESTED_ARRAY;
+
+ if (vk->pool->props.queueFlags & VK_QUEUE_COMPUTE_BIT)
+ ra->caps |= RA_CAP_COMPUTE;
+
+ if (!vk_setup_formats(ra))
+ goto error;
+
+ // UBO support is required
+ ra->caps |= RA_CAP_BUF_RO;
+
+ // Try creating a shader storage buffer
+ struct ra_buf_params ssbo_params = {
+ .type = RA_BUF_TYPE_SHADER_STORAGE,
+ .size = 16,
+ };
+
+ struct ra_buf *ssbo = ra_buf_create(ra, &ssbo_params);
+ if (ssbo) {
+ ra->caps |= RA_CAP_BUF_RW;
+ ra_buf_free(ra, &ssbo);
+ }
+
+ // To support clear() by region, we need to allocate a dummy 1x1 image that
+ // will be used as the source of blit operations
+ struct ra_tex_params clear_params = {
+ .dimensions = 1, // no point in using a 2D image if height = 1
+ .w = 1,
+ .h = 1,
+ .d = 1,
+ .format = ra_find_float16_format(ra, 4),
+ .blit_src = 1,
+ .host_mutable = 1,
+ };
+
+ p->clear_tex = ra_tex_create(ra, &clear_params);
+ if (!p->clear_tex) {
+ MP_ERR(ra, "Failed creating 1x1 dummy texture for clear()!\n");
+ goto error;
+ }
+
+ return ra;
+
+error:
+ vk_destroy_ra(ra);
+ return NULL;
+}
+
+// Boilerplate wrapper around vkCreateRenderPass to ensure passes remain
+// compatible
+static VkResult vk_create_render_pass(VkDevice dev, const struct ra_format *fmt,
+ bool load_fbo, VkRenderPass *out)
+{
+ struct vk_format *vk_fmt = fmt->priv;
+ assert(fmt->renderable);
+
+ VkRenderPassCreateInfo rinfo = {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = &(VkAttachmentDescription) {
+ .format = vk_fmt->iformat,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .loadOp = load_fbo ? VK_ATTACHMENT_LOAD_OP_LOAD
+ : VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ .finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ },
+ .subpassCount = 1,
+ .pSubpasses = &(VkSubpassDescription) {
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .colorAttachmentCount = 1,
+ .pColorAttachments = &(VkAttachmentReference) {
+ .attachment = 0,
+ .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ },
+ },
+ };
+
+ return vkCreateRenderPass(dev, &rinfo, MPVK_ALLOCATOR, out);
+}
+
+// For ra_tex.priv
+struct ra_tex_vk {
+ bool external_img;
+ VkImageType type;
+ VkImage img;
+ struct vk_memslice mem;
+ // for sampling
+ VkImageView view;
+ VkSampler sampler;
+ // for rendering
+ VkFramebuffer framebuffer;
+ VkRenderPass dummyPass;
+ // for uploading
+ struct ra_buf_pool pbo;
+ // "current" metadata, can change during the course of execution
+ VkImageLayout current_layout;
+ VkPipelineStageFlagBits current_stage;
+ VkAccessFlagBits current_access;
+};
+
+// Small helper to ease image barrier creation. if `discard` is set, the contents
+// of the image will be undefined after the barrier
+static void tex_barrier(struct vk_cmd *cmd, struct ra_tex_vk *tex_vk,
+ VkPipelineStageFlagBits newStage,
+ VkAccessFlagBits newAccess, VkImageLayout newLayout,
+ bool discard)
+{
+ VkImageMemoryBarrier imgBarrier = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .oldLayout = tex_vk->current_layout,
+ .newLayout = newLayout,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .srcAccessMask = tex_vk->current_access,
+ .dstAccessMask = newAccess,
+ .image = tex_vk->img,
+ .subresourceRange = vk_range,
+ };
+
+ if (discard) {
+ imgBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+ imgBarrier.srcAccessMask = 0;
+ }
+
+ if (imgBarrier.oldLayout != imgBarrier.newLayout ||
+ imgBarrier.srcAccessMask != imgBarrier.dstAccessMask)
+ {
+ vkCmdPipelineBarrier(cmd->buf, tex_vk->current_stage, newStage, 0,
+ 0, NULL, 0, NULL, 1, &imgBarrier);
+ }
+
+ tex_vk->current_stage = newStage;
+ tex_vk->current_layout = newLayout;
+ tex_vk->current_access = newAccess;
+}
+
+static void vk_tex_destroy(struct ra *ra, struct ra_tex *tex)
+{
+ if (!tex)
+ return;
+
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+ struct ra_tex_vk *tex_vk = tex->priv;
+
+ ra_buf_pool_uninit(ra, &tex_vk->pbo);
+ vkDestroyFramebuffer(vk->dev, tex_vk->framebuffer, MPVK_ALLOCATOR);
+ vkDestroyRenderPass(vk->dev, tex_vk->dummyPass, MPVK_ALLOCATOR);
+ vkDestroySampler(vk->dev, tex_vk->sampler, MPVK_ALLOCATOR);
+ vkDestroyImageView(vk->dev, tex_vk->view, MPVK_ALLOCATOR);
+ if (!tex_vk->external_img) {
+ vkDestroyImage(vk->dev, tex_vk->img, MPVK_ALLOCATOR);
+ vk_free_memslice(vk, tex_vk->mem);
+ }
+
+ talloc_free(tex);
+}
+
+MAKE_LAZY_DESTRUCTOR(vk_tex_destroy, struct ra_tex);
+
+// Initializes non-VkImage values like the image view, samplers, etc.
+static bool vk_init_image(struct ra *ra, struct ra_tex *tex)
+{
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+
+ struct ra_tex_params *params = &tex->params;
+ struct ra_tex_vk *tex_vk = tex->priv;
+ assert(tex_vk->img);
+
+ tex_vk->current_layout = VK_IMAGE_LAYOUT_UNDEFINED;
+ tex_vk->current_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+ tex_vk->current_access = 0;
+
+ if (params->render_src || params->render_dst) {
+ static const VkImageViewType viewType[] = {
+ [VK_IMAGE_TYPE_1D] = VK_IMAGE_VIEW_TYPE_1D,
+ [VK_IMAGE_TYPE_2D] = VK_IMAGE_VIEW_TYPE_2D,
+ [VK_IMAGE_TYPE_3D] = VK_IMAGE_VIEW_TYPE_3D,
+ };
+
+ const struct vk_format *fmt = params->format->priv;
+ VkImageViewCreateInfo vinfo = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = tex_vk->img,
+ .viewType = viewType[tex_vk->type],
+ .format = fmt->iformat,
+ .subresourceRange = vk_range,
+ };
+
+ VK(vkCreateImageView(vk->dev, &vinfo, MPVK_ALLOCATOR, &tex_vk->view));
+ }
+
+ if (params->render_src) {
+ assert(params->format->linear_filter || !params->src_linear);
+ VkFilter filter = params->src_linear
+ ? VK_FILTER_LINEAR
+ : VK_FILTER_NEAREST;
+ VkSamplerAddressMode wrap = params->src_repeat
+ ? VK_SAMPLER_ADDRESS_MODE_REPEAT
+ : VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
+ VkSamplerCreateInfo sinfo = {
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ .magFilter = filter,
+ .minFilter = filter,
+ .addressModeU = wrap,
+ .addressModeV = wrap,
+ .addressModeW = wrap,
+ .maxAnisotropy = 1.0,
+ };
+
+ VK(vkCreateSampler(vk->dev, &sinfo, MPVK_ALLOCATOR, &tex_vk->sampler));
+ }
+
+ if (params->render_dst) {
+ // Framebuffers need to be created against a specific render pass
+ // layout, so we need to temporarily create a skeleton/dummy render
+ // pass for vulkan to figure out the compatibility
+ VK(vk_create_render_pass(vk->dev, params->format, false, &tex_vk->dummyPass));
+
+ VkFramebufferCreateInfo finfo = {
+ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .renderPass = tex_vk->dummyPass,
+ .attachmentCount = 1,
+ .pAttachments = &tex_vk->view,
+ .width = tex->params.w,
+ .height = tex->params.h,
+ .layers = 1,
+ };
+
+ VK(vkCreateFramebuffer(vk->dev, &finfo, MPVK_ALLOCATOR,
+ &tex_vk->framebuffer));
+
+ // NOTE: Normally we would free the dummyPass again here, but a bug
+ // in the nvidia vulkan driver causes a segfault if you do.
+ }
+
+ return true;
+
+error:
+ return false;
+}
+
+static struct ra_tex *vk_tex_create(struct ra *ra,
+ const struct ra_tex_params *params)
+{
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+
+ struct ra_tex *tex = talloc_zero(NULL, struct ra_tex);
+ tex->params = *params;
+ tex->params.initial_data = NULL;
+
+ struct ra_tex_vk *tex_vk = tex->priv = talloc_zero(tex, struct ra_tex_vk);
+
+ const struct vk_format *fmt = params->format->priv;
+ switch (params->dimensions) {
+ case 1: tex_vk->type = VK_IMAGE_TYPE_1D; break;
+ case 2: tex_vk->type = VK_IMAGE_TYPE_2D; break;
+ case 3: tex_vk->type = VK_IMAGE_TYPE_3D; break;
+ default: abort();
+ }
+
+ VkImageUsageFlags usage = 0;
+ if (params->render_src)
+ usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
+ if (params->render_dst)
+ usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+ if (params->storage_dst)
+ usage |= VK_IMAGE_USAGE_STORAGE_BIT;
+ if (params->blit_src)
+ usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
+ if (params->host_mutable || params->blit_dst || params->initial_data)
+ usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
+
+ // Double-check image usage support and fail immediately if invalid
+ VkImageFormatProperties iprop;
+ VkResult res = vkGetPhysicalDeviceImageFormatProperties(vk->physd,
+ fmt->iformat, tex_vk->type, VK_IMAGE_TILING_OPTIMAL, usage, 0,
+ &iprop);
+ if (res == VK_ERROR_FORMAT_NOT_SUPPORTED) {
+ return NULL;
+ } else {
+ VK_ASSERT(res, "Querying image format properties");
+ }
+
+ VkFormatProperties prop;
+ vkGetPhysicalDeviceFormatProperties(vk->physd, fmt->iformat, &prop);
+ VkFormatFeatureFlags flags = prop.optimalTilingFeatures;
+
+ bool has_blit_src = flags & VK_FORMAT_FEATURE_BLIT_SRC_BIT,
+ has_src_linear = flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
+
+ if (params->w > iprop.maxExtent.width ||
+ params->h > iprop.maxExtent.height ||
+ params->d > iprop.maxExtent.depth ||
+ (params->blit_src && !has_blit_src) ||
+ (params->src_linear && !has_src_linear))
+ {
+ return NULL;
+ }
+
+ VkImageCreateInfo iinfo = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .imageType = tex_vk->type,
+ .format = fmt->iformat,
+ .extent = (VkExtent3D) { params->w, params->h, params->d },
+ .mipLevels = 1,
+ .arrayLayers = 1,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .tiling = VK_IMAGE_TILING_OPTIMAL,
+ .usage = usage,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 1,
+ .pQueueFamilyIndices = &vk->pool->qf,
+ };
+
+ VK(vkCreateImage(vk->dev, &iinfo, MPVK_ALLOCATOR, &tex_vk->img));
+
+ VkMemoryPropertyFlagBits memFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ VkMemoryRequirements reqs;
+ vkGetImageMemoryRequirements(vk->dev, tex_vk->img, &reqs);
+
+ struct vk_memslice *mem = &tex_vk->mem;
+ if (!vk_malloc_generic(vk, reqs, memFlags, mem))
+ goto error;
+
+ VK(vkBindImageMemory(vk->dev, tex_vk->img, mem->vkmem, mem->offset));
+
+ if (!vk_init_image(ra, tex))
+ goto error;
+
+ if (params->initial_data) {
+ struct ra_tex_upload_params ul_params = {
+ .tex = tex,
+ .invalidate = true,
+ .src = params->initial_data,
+ .stride = params->w * fmt->bytes,
+ };
+ if (!ra->fns->tex_upload(ra, &ul_params))
+ goto error;
+ }
+
+ return tex;
+
+error:
+ vk_tex_destroy(ra, tex);
+ return NULL;
+}
+
+struct ra_tex *ra_vk_wrap_swapchain_img(struct ra *ra, VkImage vkimg,
+ VkSwapchainCreateInfoKHR info)
+{
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+ struct ra_tex *tex = NULL;
+
+ const struct ra_format *format = NULL;
+ for (int i = 0; i < ra->num_formats; i++) {
+ const struct vk_format *fmt = ra->formats[i]->priv;
+ if (fmt->iformat == vk->surf_format.format) {
+ format = ra->formats[i];
+ break;
+ }
+ }
+
+ if (!format) {
+ MP_ERR(ra, "Could not find ra_format suitable for wrapped swchain image "
+ "with surface format 0x%x\n", vk->surf_format.format);
+ goto error;
+ }
+
+ tex = talloc_zero(NULL, struct ra_tex);
+ tex->params = (struct ra_tex_params) {
+ .format = format,
+ .dimensions = 2,
+ .w = info.imageExtent.width,
+ .h = info.imageExtent.height,
+ .d = 1,
+ .blit_src = !!(info.imageUsage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT),
+ .blit_dst = !!(info.imageUsage & VK_IMAGE_USAGE_TRANSFER_DST_BIT),
+ .render_src = !!(info.imageUsage & VK_IMAGE_USAGE_SAMPLED_BIT),
+ .render_dst = !!(info.imageUsage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT),
+ .storage_dst = !!(info.imageUsage & VK_IMAGE_USAGE_STORAGE_BIT),
+ };
+
+ struct ra_tex_vk *tex_vk = tex->priv = talloc_zero(tex, struct ra_tex_vk);
+ tex_vk->type = VK_IMAGE_TYPE_2D;
+ tex_vk->external_img = true;
+ tex_vk->img = vkimg;
+
+ if (!vk_init_image(ra, tex))
+ goto error;
+
+ return tex;
+
+error:
+ vk_tex_destroy(ra, tex);
+ return NULL;
+}
+
+// For ra_buf.priv
+struct ra_buf_vk {
+ struct vk_bufslice slice;
+ int refcount; // 1 = object allocated but not in use, > 1 = in use
+ bool needsflush;
+ // "current" metadata, can change during course of execution
+ VkPipelineStageFlagBits current_stage;
+ VkAccessFlagBits current_access;
+};
+
+static void vk_buf_deref(struct ra *ra, struct ra_buf *buf)
+{
+ if (!buf)
+ return;
+
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+ struct ra_buf_vk *buf_vk = buf->priv;
+
+ if (--buf_vk->refcount == 0) {
+ vk_free_memslice(vk, buf_vk->slice.mem);
+ talloc_free(buf);
+ }
+}
+
+static void buf_barrier(struct ra *ra, struct vk_cmd *cmd, struct ra_buf *buf,
+ VkPipelineStageFlagBits newStage,
+ VkAccessFlagBits newAccess, int offset, size_t size)
+{
+ struct ra_buf_vk *buf_vk = buf->priv;
+
+ VkBufferMemoryBarrier buffBarrier = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+ .srcAccessMask = buf_vk->current_access,
+ .dstAccessMask = newAccess,
+ .buffer = buf_vk->slice.buf,
+ .offset = offset,
+ .size = size,
+ };
+
+ if (buf_vk->needsflush || buf->params.host_mapped) {
+ buffBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
+ buf_vk->current_stage = VK_PIPELINE_STAGE_HOST_BIT;
+ buf_vk->needsflush = false;
+ }
+
+ if (buffBarrier.srcAccessMask != buffBarrier.dstAccessMask) {
+ vkCmdPipelineBarrier(cmd->buf, buf_vk->current_stage, newStage, 0,
+ 0, NULL, 1, &buffBarrier, 0, NULL);
+ }
+
+ buf_vk->current_stage = newStage;
+ buf_vk->current_access = newAccess;
+ buf_vk->refcount++;
+ vk_cmd_callback(cmd, (vk_cb) vk_buf_deref, ra, buf);
+}
+
+#define vk_buf_destroy vk_buf_deref
+MAKE_LAZY_DESTRUCTOR(vk_buf_destroy, struct ra_buf);
+
+static void vk_buf_update(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset,
+ const void *data, size_t size)
+{
+ assert(buf->params.host_mutable || buf->params.initial_data);
+ struct ra_buf_vk *buf_vk = buf->priv;
+
+ // For host-mapped buffers, we can just directly memcpy the buffer contents.
+ // Otherwise, we can update the buffer from the GPU using a command buffer.
+ if (buf_vk->slice.data) {
+ assert(offset + size <= buf->params.size);
+ uintptr_t addr = (uintptr_t)buf_vk->slice.data + offset;
+ memcpy((void *)addr, data, size);
+ buf_vk->needsflush = true;
+ } else {
+ struct vk_cmd *cmd = vk_require_cmd(ra);
+ if (!cmd) {
+ MP_ERR(ra, "Failed updating buffer!\n");
+ return;
+ }
+
+ buf_barrier(ra, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_ACCESS_TRANSFER_WRITE_BIT, offset, size);
+
+ VkDeviceSize bufOffset = buf_vk->slice.mem.offset + offset;
+ assert(bufOffset == MP_ALIGN_UP(bufOffset, 4));
+ vkCmdUpdateBuffer(cmd->buf, buf_vk->slice.buf, bufOffset, size, data);
+ }
+}
+
+static struct ra_buf *vk_buf_create(struct ra *ra,
+ const struct ra_buf_params *params)
+{
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+
+ struct ra_buf *buf = talloc_zero(NULL, struct ra_buf);
+ buf->params = *params;
+
+ struct ra_buf_vk *buf_vk = buf->priv = talloc_zero(buf, struct ra_buf_vk);
+ buf_vk->current_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+ buf_vk->current_access = 0;
+ buf_vk->refcount = 1;
+
+ VkBufferUsageFlagBits bufFlags = 0;
+ VkMemoryPropertyFlagBits memFlags = 0;
+ VkDeviceSize align = 4; // alignment 4 is needed for buf_update
+
+ switch (params->type) {
+ case RA_BUF_TYPE_TEX_UPLOAD:
+ bufFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
+ memFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
+ break;
+ case RA_BUF_TYPE_UNIFORM:
+ bufFlags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
+ memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ align = MP_ALIGN_UP(align, vk->limits.minUniformBufferOffsetAlignment);
+ break;
+ case RA_BUF_TYPE_SHADER_STORAGE:
+ bufFlags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
+ memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ align = MP_ALIGN_UP(align, vk->limits.minStorageBufferOffsetAlignment);
+ break;
+ case RA_BUF_TYPE_VERTEX:
+ bufFlags |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
+ memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ break;
+ default: abort();
+ }
+
+ if (params->host_mutable || params->initial_data) {
+ bufFlags |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+ align = MP_ALIGN_UP(align, vk->limits.optimalBufferCopyOffsetAlignment);
+ }
+
+ if (params->host_mapped) {
+ memFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
+ VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+ }
+
+ if (!vk_malloc_buffer(vk, bufFlags, memFlags, params->size, align,
+ &buf_vk->slice))
+ {
+ goto error;
+ }
+
+ if (params->host_mapped)
+ buf->data = buf_vk->slice.data;
+
+ if (params->initial_data)
+ vk_buf_update(ra, buf, 0, params->initial_data, params->size);
+
+ buf->params.initial_data = NULL; // do this after vk_buf_update
+ return buf;
+
+error:
+ vk_buf_destroy(ra, buf);
+ return NULL;
+}
+
+static bool vk_buf_poll(struct ra *ra, struct ra_buf *buf)
+{
+ struct ra_buf_vk *buf_vk = buf->priv;
+ return buf_vk->refcount == 1;
+}
+
+static bool vk_tex_upload(struct ra *ra,
+ const struct ra_tex_upload_params *params)
+{
+ struct ra_tex *tex = params->tex;
+ struct ra_tex_vk *tex_vk = tex->priv;
+
+ if (!params->buf)
+ return ra_tex_upload_pbo(ra, &tex_vk->pbo, params);
+
+ assert(!params->src);
+ assert(params->buf);
+ struct ra_buf *buf = params->buf;
+ struct ra_buf_vk *buf_vk = buf->priv;
+
+ VkBufferImageCopy region = {
+ .bufferOffset = buf_vk->slice.mem.offset + params->buf_offset,
+ .bufferRowLength = tex->params.w,
+ .bufferImageHeight = tex->params.h,
+ .imageSubresource = vk_layers,
+ .imageExtent = (VkExtent3D){tex->params.w, tex->params.h, tex->params.d},
+ };
+
+ if (tex->params.dimensions == 2) {
+ int pix_size = tex->params.format->pixel_size;
+ region.bufferRowLength = params->stride / pix_size;
+ if (region.bufferRowLength * pix_size != params->stride) {
+ MP_ERR(ra, "Texture upload strides must be a multiple of the texel "
+ "size!\n");
+ goto error;
+ }
+
+ if (params->rc) {
+ struct mp_rect *rc = params->rc;
+ region.imageOffset = (VkOffset3D){rc->x0, rc->y0, 0};
+ region.imageExtent = (VkExtent3D){mp_rect_w(*rc), mp_rect_h(*rc), 1};
+ }
+ }
+
+ uint64_t size = region.bufferRowLength * region.bufferImageHeight *
+ region.imageExtent.depth;
+
+ struct vk_cmd *cmd = vk_require_cmd(ra);
+ if (!cmd)
+ goto error;
+
+ buf_barrier(ra, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_ACCESS_TRANSFER_READ_BIT, region.bufferOffset, size);
+
+ tex_barrier(cmd, tex_vk, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_ACCESS_TRANSFER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ params->invalidate);
+
+ vkCmdCopyBufferToImage(cmd->buf, buf_vk->slice.buf, tex_vk->img,
+ tex_vk->current_layout, 1, &region);
+
+ return true;
+
+error:
+ return false;
+}
+
+#define MPVK_NUM_DS MPVK_MAX_STREAMING_DEPTH
+
+// For ra_renderpass.priv
+struct ra_renderpass_vk {
+ // Compiled shaders
+ VkShaderModule vert;
+ VkShaderModule frag;
+ VkShaderModule comp;
+ // Pipeline / render pass
+ VkPipeline pipe;
+ VkPipelineLayout pipeLayout;
+ VkPipelineCache pipeCache;
+ VkRenderPass renderPass;
+ // Descriptor set (bindings)
+ VkDescriptorSetLayout dsLayout;
+ VkDescriptorPool dsPool;
+ VkDescriptorSet dss[MPVK_NUM_DS];
+ int dindex;
+ // Vertex buffers (vertices)
+ struct ra_buf_pool vbo;
+
+ // For updating
+ VkWriteDescriptorSet *dswrite;
+ VkDescriptorImageInfo *dsiinfo;
+ VkDescriptorBufferInfo *dsbinfo;
+};
+
+static void vk_renderpass_destroy(struct ra *ra, struct ra_renderpass *pass)
+{
+ if (!pass)
+ return;
+
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+ struct ra_renderpass_vk *pass_vk = pass->priv;
+
+ ra_buf_pool_uninit(ra, &pass_vk->vbo);
+ vkDestroyPipeline(vk->dev, pass_vk->pipe, MPVK_ALLOCATOR);
+ vkDestroyPipelineCache(vk->dev, pass_vk->pipeCache, MPVK_ALLOCATOR);
+ vkDestroyRenderPass(vk->dev, pass_vk->renderPass, MPVK_ALLOCATOR);
+ vkDestroyPipelineLayout(vk->dev, pass_vk->pipeLayout, MPVK_ALLOCATOR);
+ vkDestroyDescriptorPool(vk->dev, pass_vk->dsPool, MPVK_ALLOCATOR);
+ vkDestroyDescriptorSetLayout(vk->dev, pass_vk->dsLayout, MPVK_ALLOCATOR);
+ vkDestroyShaderModule(vk->dev, pass_vk->vert, MPVK_ALLOCATOR);
+ vkDestroyShaderModule(vk->dev, pass_vk->frag, MPVK_ALLOCATOR);
+ vkDestroyShaderModule(vk->dev, pass_vk->comp, MPVK_ALLOCATOR);
+
+ talloc_free(pass);
+}
+
+MAKE_LAZY_DESTRUCTOR(vk_renderpass_destroy, struct ra_renderpass);
+
+static const VkDescriptorType dsType[] = {
+ [RA_VARTYPE_TEX] = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ [RA_VARTYPE_IMG_W] = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ [RA_VARTYPE_BUF_RO] = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ [RA_VARTYPE_BUF_RW] = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+};
+
+static bool vk_get_input_format(struct ra *ra, struct ra_renderpass_input *inp,
+ VkFormat *out_fmt)
+{
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+
+ enum ra_ctype ctype;
+ switch (inp->type) {
+ case RA_VARTYPE_FLOAT: ctype = RA_CTYPE_FLOAT; break;
+ case RA_VARTYPE_BYTE_UNORM: ctype = RA_CTYPE_UNORM; break;
+ default: abort();
+ }
+
+ assert(inp->dim_m == 1);
+ for (const struct vk_format *fmt = vk_formats; fmt->name; fmt++) {
+ if (fmt->ctype != ctype)
+ continue;
+ if (fmt->components != inp->dim_v)
+ continue;
+ if (fmt->bytes != ra_renderpass_input_layout(inp).size)
+ continue;
+
+ // Ensure this format is valid for vertex attributes
+ VkFormatProperties prop;
+ vkGetPhysicalDeviceFormatProperties(vk->physd, fmt->iformat, &prop);
+ if (!(prop.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT))
+ continue;
+
+ *out_fmt = fmt->iformat;
+ return true;
+ }
+
+ return false;
+}
+
+static const VkPipelineStageFlagBits stageFlags[] = {
+ [RA_RENDERPASS_TYPE_RASTER] = VK_SHADER_STAGE_FRAGMENT_BIT,
+ [RA_RENDERPASS_TYPE_COMPUTE] = VK_SHADER_STAGE_COMPUTE_BIT,
+};
+
+static struct ra_renderpass *vk_renderpass_create(struct ra *ra,
+ const struct ra_renderpass_params *params)
+{
+ struct mpvk_ctx *vk = ra_vk_get(ra);
+
+ struct ra_renderpass *pass = talloc_zero(NULL, struct ra_renderpass);
+ pass->params = *ra_renderpass_params_copy(pass, params);
+ pass->params.cached_program = (bstr){0};
+ struct ra_renderpass_vk *pass_vk = pass->priv =
+ talloc_zero(pass, struct ra_renderpass_vk);
+
+ static int dsCount[RA_VARTYPE_COUNT] = {0};
+ VkDescriptorSetLayoutBinding *bindings = NULL;
+ int num_bindings = 0;
+
+ for (int i = 0; i < params->num_inputs; i++) {
+ struct ra_renderpass_input *inp = &params->inputs[i];
+ switch (inp->type) {
+ case RA_VARTYPE_TEX:
+ case RA_VARTYPE_IMG_W:
+ case RA_VARTYPE_BUF_RO:
+ case RA_VARTYPE_BUF_RW: {
+ VkDescriptorSetLayoutBinding desc = {
+ .binding = inp->binding,
+ .descriptorType = dsType[inp->type],
+ .descriptorCount = 1,
+ .stageFlags = stageFlags[params->type],
+ };
+
+ MP_TARRAY_APPEND(pass, bindings, num_bindings, desc);
+ dsCount[inp->type]++;
+ break;
+ }
+ default: abort();
+ }
+ }
+
+ VkDescriptorPoolSize *dsPoolSizes = NULL;
+ int poolSizeCount = 0;
+ for (enum ra_vartype t = 0; t < RA_VARTYPE_COUNT; t++) {
+ if (dsCount[t] > 0) {
+ VkDescriptorPoolSize dssize = {
+ .type = dsType[t],
+ .descriptorCount = dsCount[t] * MPVK_NUM_DS,
+ };
+
+ MP_TARRAY_APPEND(pass, dsPoolSizes, poolSizeCount, dssize);
+ }
+ }
+
+ VkDescriptorPoolCreateInfo pinfo = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+ .maxSets = MPVK_NUM_DS,
+ .pPoolSizes = dsPoolSizes,
+ .poolSizeCount = poolSizeCount,
+ };
+
+ VK(vkCreateDescriptorPool(vk->dev, &pinfo, MPVK_ALLOCATOR, &pass_vk->dsPool));
+ talloc_free(dsPoolSizes);
+
+ pass_vk->dswrite = talloc_array(pass, VkWriteDescriptorSet, num_bindings);
+ pass_vk->dsiinfo = talloc_array(pass, VkDescriptorImageInfo, num_bindings);
+ pass_vk->dsbinfo = talloc_array(pass, VkDescriptorBufferInfo, num_bindings);
+
+ VkDescriptorSetLayoutCreateInfo dinfo = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .pBindings = bindings,
+ .bindingCount = num_bindings,
+ };
+
+ VK(vkCreateDescriptorSetLayout(vk->dev, &dinfo, MPVK_ALLOCATOR,
+ &pass_vk->dsLayout));
+
+ VkDescriptorSetLayout layouts[MPVK_NUM_DS];
+ for (int i = 0; i < MPVK_NUM_DS; i++)
+ layouts[i] = pass_vk->dsLayout;
+
+ VkDescriptorSetAllocateInfo ainfo = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ .descriptorPool = pass_vk->dsPool,
+ .descriptorSetCount = MPVK_NUM_DS,
+ .pSetLayouts = layouts,
+ };
+
+ VK(vkAllocateDescriptorSets(vk->dev, &ainfo, pass_vk->dss));
+
+ VkPipelineLayoutCreateInfo linfo = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &pass_vk->dsLayout,
+ };
+
+ VK(vkCreatePipelineLayout(vk->dev, &linfo, MPVK_ALLOCATOR,
+ &pass_vk->pipeLayout));
+
+ VkPipelineCacheCreateInfo pcinfo = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO,
+ .pInitialData = params->cached_program.start,
+ .initialDataSize = params->cached_program.len,
+ };
+
+ VK(vkCreatePipelineCache(vk->dev, &pcinfo, MPVK_ALLOCATOR, &pass_vk->pipeCache));
+
+ VkShaderModuleCreateInfo sinfo = {
+