summaryrefslogtreecommitdiffstats
path: root/video/out/vulkan/ra_vk.c
diff options
context:
space:
mode:
authorNiklas Haas <git@haasn.xyz>2018-11-10 12:53:33 +0100
committerJan Ekström <jeebjp@gmail.com>2019-04-21 23:55:22 +0300
commit7006d6752d7da21870dfdb2b0d7640a3734f748c (patch)
tree035ca58d22de438e834d212e97b73d03a4248d98 /video/out/vulkan/ra_vk.c
parent9f7dcc0726ab635fb34fb7310e54b1aec9467f14 (diff)
downloadmpv-7006d6752d7da21870dfdb2b0d7640a3734f748c.tar.bz2
mpv-7006d6752d7da21870dfdb2b0d7640a3734f748c.tar.xz
vo_gpu: vulkan: use libplacebo instead
This commit rips out the entire mpv vulkan implementation in favor of exposing lightweight wrappers on top of libplacebo instead, which provides much of the same except in a more up-to-date and polished form. This (finally) unifies the code base between mpv and libplacebo, which is something I've been hoping to do for a long time. Note: The ra_pl wrappers are abstract enough from the actual libplacebo device type that we can in theory re-use them for other devices like d3d11 or even opengl in the future, so I moved them to a separate directory for the time being. However, the rest of the code is still vulkan-specific, so I've kept the "vulkan" naming and file paths, rather than introducing a new `--gpu-api` type. (Which would have been ended up with significantly more code duplicaiton) Plus, the code and functionality is similar enough that for most users this should just be a straight-up drop-in replacement. Note: This commit excludes some changes; specifically, the updates to context_win and hwdec_cuda are deferred to separate commits for authorship reasons.
Diffstat (limited to 'video/out/vulkan/ra_vk.c')
-rw-r--r--video/out/vulkan/ra_vk.c1982
1 files changed, 0 insertions, 1982 deletions
diff --git a/video/out/vulkan/ra_vk.c b/video/out/vulkan/ra_vk.c
deleted file mode 100644
index 3e4ba28ac4..0000000000
--- a/video/out/vulkan/ra_vk.c
+++ /dev/null
@@ -1,1982 +0,0 @@
-#include "video/out/gpu/utils.h"
-#include "video/out/gpu/spirv.h"
-
-#include "ra_vk.h"
-#include "malloc.h"
-
-#if HAVE_WIN32_DESKTOP
-#include <versionhelpers.h>
-#endif
-
-static struct ra_fns ra_fns_vk;
-
-enum queue_type {
- GRAPHICS,
- COMPUTE,
- TRANSFER,
-};
-
-// For ra.priv
-struct ra_vk {
- struct mpvk_ctx *vk;
- struct ra_tex *clear_tex; // stupid hack for clear()
- struct vk_cmd *cmd; // currently recording cmd
-};
-
-struct mpvk_ctx *ra_vk_get(struct ra *ra)
-{
- if (ra->fns != &ra_fns_vk)
- return NULL;
-
- struct ra_vk *p = ra->priv;
- return p->vk;
-}
-
-static void vk_submit(struct ra *ra)
-{
- struct ra_vk *p = ra->priv;
- struct mpvk_ctx *vk = ra_vk_get(ra);
-
- if (p->cmd) {
- vk_cmd_queue(vk, p->cmd);
- p->cmd = NULL;
- }
-}
-
-// Returns a command buffer, or NULL on error
-static struct vk_cmd *vk_require_cmd(struct ra *ra, enum queue_type type)
-{
- struct ra_vk *p = ra->priv;
- struct mpvk_ctx *vk = ra_vk_get(ra);
-
- struct vk_cmdpool *pool;
- switch (type) {
- case GRAPHICS: pool = vk->pool_graphics; break;
- case COMPUTE: pool = vk->pool_compute; break;
-
- // GRAPHICS and COMPUTE also imply TRANSFER capability (vulkan spec)
- case TRANSFER:
- pool = vk->pool_transfer;
- if (!pool)
- pool = vk->pool_compute;
- if (!pool)
- pool = vk->pool_graphics;
- break;
- default: abort();
- }
-
- assert(pool);
- if (p->cmd && p->cmd->pool == pool)
- return p->cmd;
-
- vk_submit(ra);
- p->cmd = vk_cmd_begin(vk, pool);
- return p->cmd;
-}
-
-#define MAKE_LAZY_DESTRUCTOR(fun, argtype) \
- static void fun##_lazy(struct ra *ra, argtype *arg) { \
- struct ra_vk *p = ra->priv; \
- struct mpvk_ctx *vk = ra_vk_get(ra); \
- if (p->cmd) { \
- vk_cmd_callback(p->cmd, (vk_cb) fun, ra, arg); \
- } else { \
- vk_dev_callback(vk, (vk_cb) fun, ra, arg); \
- } \
- }
-
-static void vk_destroy_ra(struct ra *ra)
-{
- struct ra_vk *p = ra->priv;
- struct mpvk_ctx *vk = ra_vk_get(ra);
-
- vk_submit(ra);
- mpvk_flush_commands(vk);
- mpvk_poll_commands(vk, UINT64_MAX);
- ra_tex_free(ra, &p->clear_tex);
-
- talloc_free(ra);
-}
-
-static bool vk_setup_formats(struct ra *ra)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
-
- for (const struct vk_format *vk_fmt = vk_formats; vk_fmt->name; vk_fmt++) {
- VkFormatProperties prop;
- vkGetPhysicalDeviceFormatProperties(vk->physd, vk_fmt->iformat, &prop);
-
- // As a bare minimum, we need to sample from an allocated image
- VkFormatFeatureFlags flags = prop.optimalTilingFeatures;
- if (!(flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT))
- continue;
-
- VkFormatFeatureFlags linear_bits, render_bits;
- linear_bits = VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
- render_bits = VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
- VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
-
- struct ra_format *fmt = talloc_zero(ra, struct ra_format);
- *fmt = (struct ra_format) {
- .name = vk_fmt->name,
- .priv = (void *)vk_fmt,
- .ctype = vk_fmt->ctype,
- .ordered = !vk_fmt->fucked_order,
- .num_components = vk_fmt->components,
- .pixel_size = vk_fmt->bytes,
- .linear_filter = !!(flags & linear_bits),
- .renderable = !!(flags & render_bits),
- };
-
- for (int i = 0; i < 4; i++)
- fmt->component_size[i] = fmt->component_depth[i] = vk_fmt->bits[i];
-
- fmt->glsl_format = ra_fmt_glsl_format(fmt);
-
- MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, fmt);
- }
-
- // Populate some other capabilities related to formats while we're at it
- VkImageType imgType[3] = {
- VK_IMAGE_TYPE_1D,
- VK_IMAGE_TYPE_2D,
- VK_IMAGE_TYPE_3D
- };
-
- // R8_UNORM is supported on literally every single vulkan implementation
- const VkFormat testfmt = VK_FORMAT_R8_UNORM;
-
- for (int d = 0; d < 3; d++) {
- VkImageFormatProperties iprop;
- VkResult res = vkGetPhysicalDeviceImageFormatProperties(vk->physd,
- testfmt, imgType[d], VK_IMAGE_TILING_OPTIMAL,
- VK_IMAGE_USAGE_SAMPLED_BIT, 0, &iprop);
-
- switch (imgType[d]) {
- case VK_IMAGE_TYPE_1D:
- if (res == VK_SUCCESS)
- ra->caps |= RA_CAP_TEX_1D;
- break;
- case VK_IMAGE_TYPE_2D:
- // 2D formats must be supported by RA, so ensure this is the case
- VK_ASSERT(res, "Querying 2D format limits");
- ra->max_texture_wh = MPMIN(iprop.maxExtent.width, iprop.maxExtent.height);
- break;
- case VK_IMAGE_TYPE_3D:
- if (res == VK_SUCCESS)
- ra->caps |= RA_CAP_TEX_3D;
- break;
- }
- }
-
- // RA_CAP_BLIT implies both blitting between images as well as blitting
- // directly to the swapchain image, so check for all three operations
- bool blittable = true;
- VkFormatProperties prop;
- vkGetPhysicalDeviceFormatProperties(vk->physd, testfmt, &prop);
- if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_SRC_BIT))
- blittable = false;
- if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT))
- blittable = false;
-
- vkGetPhysicalDeviceFormatProperties(vk->physd, vk->surf_format.format, &prop);
- if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT))
- blittable = false;
-
- if (blittable)
- ra->caps |= RA_CAP_BLIT;
-
- return true;
-
-error:
- return false;
-}
-
-static struct ra_fns ra_fns_vk;
-
-struct ra *ra_create_vk(struct mpvk_ctx *vk, struct mp_log *log)
-{
- assert(vk->dev);
- assert(vk->alloc);
-
- struct ra *ra = talloc_zero(NULL, struct ra);
- ra->log = log;
- ra->fns = &ra_fns_vk;
-
- struct ra_vk *p = ra->priv = talloc_zero(ra, struct ra_vk);
- p->vk = vk;
-
- ra->caps |= vk->spirv->ra_caps;
- ra->glsl_version = vk->spirv->glsl_version;
- ra->glsl_vulkan = true;
- ra->max_shmem = vk->limits.maxComputeSharedMemorySize;
- ra->max_pushc_size = vk->limits.maxPushConstantsSize;
-
- if (vk->pool_compute) {
- ra->caps |= RA_CAP_COMPUTE | RA_CAP_NUM_GROUPS;
- // If we have more compute queues than graphics queues, we probably
- // want to be using them. (This seems mostly relevant for AMD)
- if (vk->pool_compute->num_queues > vk->pool_graphics->num_queues)
- ra->caps |= RA_CAP_PARALLEL_COMPUTE;
- }
-
- if (!vk_setup_formats(ra))
- goto error;
-
- // UBO support is required
- ra->caps |= RA_CAP_BUF_RO | RA_CAP_FRAGCOORD;
-
- // textureGather requires the ImageGatherExtended capability
- if (vk->features.shaderImageGatherExtended)
- ra->caps |= RA_CAP_GATHER;
-
- // Try creating a shader storage buffer
- struct ra_buf_params ssbo_params = {
- .type = RA_BUF_TYPE_SHADER_STORAGE,
- .size = 16,
- };
-
- struct ra_buf *ssbo = ra_buf_create(ra, &ssbo_params);
- if (ssbo) {
- ra->caps |= RA_CAP_BUF_RW;
- ra_buf_free(ra, &ssbo);
- }
-
- // To support clear() by region, we need to allocate a dummy 1x1 image that
- // will be used as the source of blit operations
- struct ra_tex_params clear_params = {
- .dimensions = 1, // no point in using a 2D image if height = 1
- .w = 1,
- .h = 1,
- .d = 1,
- .format = ra_find_float16_format(ra, 4),
- .blit_src = 1,
- .host_mutable = 1,
- };
-
- p->clear_tex = ra_tex_create(ra, &clear_params);
- if (!p->clear_tex) {
- MP_ERR(ra, "Failed creating 1x1 dummy texture for clear()!\n");
- goto error;
- }
-
- return ra;
-
-error:
- vk_destroy_ra(ra);
- return NULL;
-}
-
-// Boilerplate wrapper around vkCreateRenderPass to ensure passes remain
-// compatible. The renderpass will automatically transition the image out of
-// initialLayout and into finalLayout.
-static VkResult vk_create_render_pass(VkDevice dev, const struct ra_format *fmt,
- VkAttachmentLoadOp loadOp,
- VkImageLayout initialLayout,
- VkImageLayout finalLayout,
- VkRenderPass *out)
-{
- struct vk_format *vk_fmt = fmt->priv;
- assert(fmt->renderable);
-
- VkRenderPassCreateInfo rinfo = {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
- .attachmentCount = 1,
- .pAttachments = &(VkAttachmentDescription) {
- .format = vk_fmt->iformat,
- .samples = VK_SAMPLE_COUNT_1_BIT,
- .loadOp = loadOp,
- .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
- .initialLayout = initialLayout,
- .finalLayout = finalLayout,
- },
- .subpassCount = 1,
- .pSubpasses = &(VkSubpassDescription) {
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .colorAttachmentCount = 1,
- .pColorAttachments = &(VkAttachmentReference) {
- .attachment = 0,
- .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
- },
- },
- };
-
- return vkCreateRenderPass(dev, &rinfo, MPVK_ALLOCATOR, out);
-}
-
-// For ra_tex.priv
-struct ra_tex_vk {
- bool external_img;
- enum queue_type upload_queue;
- VkImageType type;
- VkImage img;
- struct vk_memslice mem;
- // for sampling
- VkImageView view;
- VkSampler sampler;
- // for rendering
- VkFramebuffer framebuffer;
- VkRenderPass dummyPass;
- // for uploading
- struct ra_buf_pool pbo;
- // "current" metadata, can change during the course of execution
- VkImageLayout current_layout;
- VkAccessFlags current_access;
- // the signal guards reuse, and can be NULL
- struct vk_signal *sig;
- VkPipelineStageFlags sig_stage;
- VkSemaphore ext_dep; // external semaphore, not owned by the ra_tex
-};
-
-void ra_tex_vk_external_dep(struct ra *ra, struct ra_tex *tex, VkSemaphore dep)
-{
- struct ra_tex_vk *tex_vk = tex->priv;
- assert(!tex_vk->ext_dep);
- tex_vk->ext_dep = dep;
-}
-
-// Small helper to ease image barrier creation. if `discard` is set, the contents
-// of the image will be undefined after the barrier
-static void tex_barrier(struct ra *ra, struct vk_cmd *cmd, struct ra_tex *tex,
- VkPipelineStageFlags stage, VkAccessFlags newAccess,
- VkImageLayout newLayout, bool discard)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
- struct ra_tex_vk *tex_vk = tex->priv;
-
- if (tex_vk->ext_dep) {
- vk_cmd_dep(cmd, tex_vk->ext_dep, stage);
- tex_vk->ext_dep = NULL;
- }
-
- VkImageMemoryBarrier imgBarrier = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .oldLayout = tex_vk->current_layout,
- .newLayout = newLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .srcAccessMask = tex_vk->current_access,
- .dstAccessMask = newAccess,
- .image = tex_vk->img,
- .subresourceRange = vk_range,
- };
-
- if (discard) {
- imgBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
- imgBarrier.srcAccessMask = 0;
- }
-
- VkEvent event = NULL;
- vk_cmd_wait(vk, cmd, &tex_vk->sig, stage, &event);
-
- bool need_trans = tex_vk->current_layout != newLayout ||
- tex_vk->current_access != newAccess;
-
- // Transitioning to VK_IMAGE_LAYOUT_UNDEFINED is a pseudo-operation
- // that for us means we don't need to perform the actual transition
- if (need_trans && newLayout != VK_IMAGE_LAYOUT_UNDEFINED) {
- if (event) {
- vkCmdWaitEvents(cmd->buf, 1, &event, tex_vk->sig_stage,
- stage, 0, NULL, 0, NULL, 1, &imgBarrier);
- } else {
- // If we're not using an event, then the source stage is irrelevant
- // because we're coming from a different queue anyway, so we can
- // safely set it to TOP_OF_PIPE.
- imgBarrier.srcAccessMask = 0;
- vkCmdPipelineBarrier(cmd->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- stage, 0, 0, NULL, 0, NULL, 1, &imgBarrier);
- }
- }
-
- tex_vk->current_layout = newLayout;
- tex_vk->current_access = newAccess;
-}
-
-static void tex_signal(struct ra *ra, struct vk_cmd *cmd, struct ra_tex *tex,
- VkPipelineStageFlags stage)
-{
- struct ra_tex_vk *tex_vk = tex->priv;
- struct mpvk_ctx *vk = ra_vk_get(ra);
- assert(!tex_vk->sig);
-
- tex_vk->sig = vk_cmd_signal(vk, cmd, stage);
- tex_vk->sig_stage = stage;
-}
-
-static void vk_tex_destroy(struct ra *ra, struct ra_tex *tex)
-{
- if (!tex)
- return;
-
- struct mpvk_ctx *vk = ra_vk_get(ra);
- struct ra_tex_vk *tex_vk = tex->priv;
-
- ra_buf_pool_uninit(ra, &tex_vk->pbo);
- vk_signal_destroy(vk, &tex_vk->sig);
- vkDestroyFramebuffer(vk->dev, tex_vk->framebuffer, MPVK_ALLOCATOR);
- vkDestroyRenderPass(vk->dev, tex_vk->dummyPass, MPVK_ALLOCATOR);
- vkDestroySampler(vk->dev, tex_vk->sampler, MPVK_ALLOCATOR);
- vkDestroyImageView(vk->dev, tex_vk->view, MPVK_ALLOCATOR);
- if (!tex_vk->external_img) {
- vkDestroyImage(vk->dev, tex_vk->img, MPVK_ALLOCATOR);
- vk_free_memslice(vk, tex_vk->mem);
- }
-
- talloc_free(tex);
-}
-
-MAKE_LAZY_DESTRUCTOR(vk_tex_destroy, struct ra_tex);
-
-// Initializes non-VkImage values like the image view, samplers, etc.
-static bool vk_init_image(struct ra *ra, struct ra_tex *tex)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
-
- struct ra_tex_params *params = &tex->params;
- struct ra_tex_vk *tex_vk = tex->priv;
- assert(tex_vk->img);
-
- tex_vk->current_layout = VK_IMAGE_LAYOUT_UNDEFINED;
- tex_vk->current_access = 0;
-
- if (params->render_src || params->render_dst) {
- static const VkImageViewType viewType[] = {
- [VK_IMAGE_TYPE_1D] = VK_IMAGE_VIEW_TYPE_1D,
- [VK_IMAGE_TYPE_2D] = VK_IMAGE_VIEW_TYPE_2D,
- [VK_IMAGE_TYPE_3D] = VK_IMAGE_VIEW_TYPE_3D,
- };
-
- const struct vk_format *fmt = params->format->priv;
- VkImageViewCreateInfo vinfo = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = tex_vk->img,
- .viewType = viewType[tex_vk->type],
- .format = fmt->iformat,
- .subresourceRange = vk_range,
- };
-
- VK(vkCreateImageView(vk->dev, &vinfo, MPVK_ALLOCATOR, &tex_vk->view));
- }
-
- if (params->render_src) {
- assert(params->format->linear_filter || !params->src_linear);
- VkFilter filter = params->src_linear
- ? VK_FILTER_LINEAR
- : VK_FILTER_NEAREST;
- VkSamplerAddressMode wrap = params->src_repeat
- ? VK_SAMPLER_ADDRESS_MODE_REPEAT
- : VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
- VkSamplerCreateInfo sinfo = {
- .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
- .magFilter = filter,
- .minFilter = filter,
- .addressModeU = wrap,
- .addressModeV = wrap,
- .addressModeW = wrap,
- .maxAnisotropy = 1.0,
- };
-
- VK(vkCreateSampler(vk->dev, &sinfo, MPVK_ALLOCATOR, &tex_vk->sampler));
- }
-
- if (params->render_dst) {
- // Framebuffers need to be created against a specific render pass
- // layout, so we need to temporarily create a skeleton/dummy render
- // pass for vulkan to figure out the compatibility
- VK(vk_create_render_pass(vk->dev, params->format,
- VK_ATTACHMENT_LOAD_OP_DONT_CARE,
- VK_IMAGE_LAYOUT_UNDEFINED,
- VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
- &tex_vk->dummyPass));
-
- VkFramebufferCreateInfo finfo = {
- .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
- .renderPass = tex_vk->dummyPass,
- .attachmentCount = 1,
- .pAttachments = &tex_vk->view,
- .width = tex->params.w,
- .height = tex->params.h,
- .layers = 1,
- };
-
- VK(vkCreateFramebuffer(vk->dev, &finfo, MPVK_ALLOCATOR,
- &tex_vk->framebuffer));
-
- // NOTE: Normally we would free the dummyPass again here, but a bug
- // in the nvidia vulkan driver causes a segfault if you do.
- }
-
- return true;
-
-error:
- return false;
-}
-
-static struct ra_tex *vk_tex_create(struct ra *ra,
- const struct ra_tex_params *params)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
- assert(!params->format->dummy_format);
-
- struct ra_tex *tex = talloc_zero(NULL, struct ra_tex);
- tex->params = *params;
- tex->params.initial_data = NULL;
-
- struct ra_tex_vk *tex_vk = tex->priv = talloc_zero(tex, struct ra_tex_vk);
- tex_vk->upload_queue = GRAPHICS;
-
- const struct vk_format *fmt = params->format->priv;
- switch (params->dimensions) {
- case 1: tex_vk->type = VK_IMAGE_TYPE_1D; break;
- case 2: tex_vk->type = VK_IMAGE_TYPE_2D; break;
- case 3: tex_vk->type = VK_IMAGE_TYPE_3D; break;
- default: abort();
- }
-
- VkImageUsageFlags usage = 0;
- if (params->render_src)
- usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
- if (params->render_dst)
- usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
- if (params->storage_dst)
- usage |= VK_IMAGE_USAGE_STORAGE_BIT;
- if (params->blit_src)
- usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
- if (params->host_mutable || params->blit_dst || params->initial_data)
- usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
-
- // Always use the transfer pool if available, for efficiency
- if (params->host_mutable && vk->pool_transfer)
- tex_vk->upload_queue = TRANSFER;
-
- // Double-check image usage support and fail immediately if invalid
- VkImageFormatProperties iprop;
- VkResult res = vkGetPhysicalDeviceImageFormatProperties(vk->physd,
- fmt->iformat, tex_vk->type, VK_IMAGE_TILING_OPTIMAL, usage, 0,
- &iprop);
- if (res == VK_ERROR_FORMAT_NOT_SUPPORTED) {
- return NULL;
- } else {
- VK_ASSERT(res, "Querying image format properties");
- }
-
- VkFormatProperties prop;
- vkGetPhysicalDeviceFormatProperties(vk->physd, fmt->iformat, &prop);
- VkFormatFeatureFlags flags = prop.optimalTilingFeatures;
-
- bool has_blit_src = flags & VK_FORMAT_FEATURE_BLIT_SRC_BIT,
- has_src_linear = flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
-
- if (params->w > iprop.maxExtent.width ||
- params->h > iprop.maxExtent.height ||
- params->d > iprop.maxExtent.depth ||
- (params->blit_src && !has_blit_src) ||
- (params->src_linear && !has_src_linear))
- {
- return NULL;
- }
-
- // FIXME: Since we can't keep track of queue family ownership properly,
- // and we don't know in advance what types of queue families this image
- // will belong to, we're forced to share all of our images between all
- // command pools.
- uint32_t qfs[3] = {0};
- for (int i = 0; i < vk->num_pools; i++)
- qfs[i] = vk->pools[i]->qf;
-
- VkImageCreateInfo iinfo = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
- .imageType = tex_vk->type,
- .format = fmt->iformat,
- .extent = (VkExtent3D) { params->w, params->h, params->d },
- .mipLevels = 1,
- .arrayLayers = 1,
- .samples = VK_SAMPLE_COUNT_1_BIT,
- .tiling = VK_IMAGE_TILING_OPTIMAL,
- .usage = usage,
- .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
- .sharingMode = vk->num_pools > 1 ? VK_SHARING_MODE_CONCURRENT
- : VK_SHARING_MODE_EXCLUSIVE,
- .queueFamilyIndexCount = vk->num_pools,
- .pQueueFamilyIndices = qfs,
- };
-
- VK(vkCreateImage(vk->dev, &iinfo, MPVK_ALLOCATOR, &tex_vk->img));
-
- VkMemoryPropertyFlags memFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
- VkMemoryRequirements reqs;
- vkGetImageMemoryRequirements(vk->dev, tex_vk->img, &reqs);
-
- struct vk_memslice *mem = &tex_vk->mem;
- if (!vk_malloc_generic(vk, reqs, memFlags, mem))
- goto error;
-
- VK(vkBindImageMemory(vk->dev, tex_vk->img, mem->vkmem, mem->offset));
-
- if (!vk_init_image(ra, tex))
- goto error;
-
- if (params->initial_data) {
- struct ra_tex_upload_params ul_params = {
- .tex = tex,
- .invalidate = true,
- .src = params->initial_data,
- .stride = params->w * fmt->bytes,
- };
- if (!ra->fns->tex_upload(ra, &ul_params))
- goto error;
- }
-
- return tex;
-
-error:
- vk_tex_destroy(ra, tex);
- return NULL;
-}
-
-struct ra_tex *ra_vk_wrap_swapchain_img(struct ra *ra, VkImage vkimg,
- VkSwapchainCreateInfoKHR info)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
- struct ra_tex *tex = NULL;
-
- const struct ra_format *format = NULL;
- for (int i = 0; i < ra->num_formats; i++) {
- const struct vk_format *fmt = ra->formats[i]->priv;
- if (fmt->iformat == vk->surf_format.format) {
- format = ra->formats[i];
- break;
- }
- }
-
- if (!format) {
- MP_ERR(ra, "Could not find ra_format suitable for wrapped swchain image "
- "with surface format 0x%x\n", vk->surf_format.format);
- goto error;
- }
-
- tex = talloc_zero(NULL, struct ra_tex);
- tex->params = (struct ra_tex_params) {
- .format = format,
- .dimensions = 2,
- .w = info.imageExtent.width,
- .h = info.imageExtent.height,
- .d = 1,
- .blit_src = !!(info.imageUsage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT),
- .blit_dst = !!(info.imageUsage & VK_IMAGE_USAGE_TRANSFER_DST_BIT),
- .render_src = !!(info.imageUsage & VK_IMAGE_USAGE_SAMPLED_BIT),
- .render_dst = !!(info.imageUsage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT),
- .storage_dst = !!(info.imageUsage & VK_IMAGE_USAGE_STORAGE_BIT),
- };
-
- struct ra_tex_vk *tex_vk = tex->priv = talloc_zero(tex, struct ra_tex_vk);
- tex_vk->type = VK_IMAGE_TYPE_2D;
- tex_vk->external_img = true;
- tex_vk->img = vkimg;
-
- if (!vk_init_image(ra, tex))
- goto error;
-
- return tex;
-
-error:
- vk_tex_destroy(ra, tex);
- return NULL;
-}
-
-// For ra_buf.priv
-struct ra_buf_vk {
- struct vk_bufslice slice;
- int refcount; // 1 = object allocated but not in use, > 1 = in use
- bool needsflush;
- enum queue_type update_queue;
- // "current" metadata, can change during course of execution
- VkPipelineStageFlags current_stage;
- VkAccessFlags current_access;
- // Arbitrary user data for the creator of a buffer
- void *user_data;
-};
-
-void ra_vk_buf_set_user_data(struct ra_buf *buf, void *user_data) {
- struct ra_buf_vk *vk_priv = buf->priv;
- vk_priv->user_data = user_data;
-}
-
-void *ra_vk_buf_get_user_data(struct ra_buf *buf) {
- struct ra_buf_vk *vk_priv = buf->priv;
- return vk_priv->user_data;
-}
-
-static void vk_buf_deref(struct ra *ra, struct ra_buf *buf)
-{
- if (!buf)
- return;
-
- struct mpvk_ctx *vk = ra_vk_get(ra);
- struct ra_buf_vk *buf_vk = buf->priv;
-
- if (--buf_vk->refcount == 0) {
- vk_free_memslice(vk, buf_vk->slice.mem);
- talloc_free(buf);
- }
-}
-
-static void buf_barrier(struct ra *ra, struct vk_cmd *cmd, struct ra_buf *buf,
- VkPipelineStageFlags newStage,
- VkAccessFlags newAccess, int offset, size_t size)
-{
- struct ra_buf_vk *buf_vk = buf->priv;
-
- VkBufferMemoryBarrier buffBarrier = {
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
- .srcAccessMask = buf_vk->current_access,
- .dstAccessMask = newAccess,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = buf_vk->slice.buf,
- .offset = offset,
- .size = size,
- };
-
- if (buf_vk->needsflush || buf->params.host_mapped) {
- buffBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
- buf_vk->current_stage = VK_PIPELINE_STAGE_HOST_BIT;
- buf_vk->needsflush = false;
- }
-
- if (buffBarrier.srcAccessMask != buffBarrier.dstAccessMask) {
- vkCmdPipelineBarrier(cmd->buf, buf_vk->current_stage, newStage, 0,
- 0, NULL, 1, &buffBarrier, 0, NULL);
- }
-
- buf_vk->current_stage = newStage;
- buf_vk->current_access = newAccess;
- buf_vk->refcount++;
- vk_cmd_callback(cmd, (vk_cb) vk_buf_deref, ra, buf);
-}
-
-#define vk_buf_destroy vk_buf_deref
-MAKE_LAZY_DESTRUCTOR(vk_buf_destroy, struct ra_buf);
-
-static void vk_buf_update(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset,
- const void *data, size_t size)
-{
- assert(buf->params.host_mutable || buf->params.initial_data);
- struct ra_buf_vk *buf_vk = buf->priv;
-
- // For host-mapped buffers, we can just directly memcpy the buffer contents.
- // Otherwise, we can update the buffer from the GPU using a command buffer.
- if (buf_vk->slice.data) {
- assert(offset + size <= buf->params.size);
- uintptr_t addr = (uintptr_t)buf_vk->slice.data + offset;
- memcpy((void *)addr, data, size);
- buf_vk->needsflush = true;
- } else {
- struct vk_cmd *cmd = vk_require_cmd(ra, buf_vk->update_queue);
- if (!cmd) {
- MP_ERR(ra, "Failed updating buffer!\n");
- return;
- }
-
- buf_barrier(ra, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT,
- VK_ACCESS_TRANSFER_WRITE_BIT, offset, size);
-
- VkDeviceSize bufOffset = buf_vk->slice.mem.offset + offset;
- assert(bufOffset == MP_ALIGN_UP(bufOffset, 4));
- vkCmdUpdateBuffer(cmd->buf, buf_vk->slice.buf, bufOffset, size, data);
- }
-}
-
-static struct ra_buf *vk_buf_create(struct ra *ra,
- const struct ra_buf_params *params)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
-
- struct ra_buf *buf = talloc_zero(NULL, struct ra_buf);
- buf->params = *params;
-
- struct ra_buf_vk *buf_vk = buf->priv = talloc_zero(buf, struct ra_buf_vk);
- buf_vk->current_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
- buf_vk->current_access = 0;
- buf_vk->refcount = 1;
-
- VkBufferUsageFlags bufFlags = 0;
- VkMemoryPropertyFlags memFlags = 0;
- VkDeviceSize align = 4; // alignment 4 is needed for buf_update
- bool exportable = false;
-
- switch (params->type) {
- case RA_BUF_TYPE_TEX_UPLOAD:
- bufFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
- memFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
- // Use TRANSFER-style updates for large enough buffers for efficiency
- if (params->size > 1024*1024) // 1 MB
- buf_vk->update_queue = TRANSFER;
- break;
- case RA_BUF_TYPE_UNIFORM:
- bufFlags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
- memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
- align = MP_ALIGN_UP(align, vk->limits.minUniformBufferOffsetAlignment);
- break;
- case RA_BUF_TYPE_SHADER_STORAGE:
- bufFlags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
- memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
- align = MP_ALIGN_UP(align, vk->limits.minStorageBufferOffsetAlignment);
- buf_vk->update_queue = COMPUTE;
- break;
- case RA_BUF_TYPE_VERTEX:
- bufFlags |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
- memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
- break;
- case RA_BUF_TYPE_SHARED_MEMORY:
- bufFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
- memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
- exportable = true;
- break;
- default: abort();
- }
-
- if (params->host_mutable || params->initial_data) {
- bufFlags |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
- align = MP_ALIGN_UP(align, vk->limits.optimalBufferCopyOffsetAlignment);
- }
-
- if (params->host_mapped) {
- memFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
- VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
- VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
- }
-
- if (!vk_malloc_buffer(vk, bufFlags, memFlags, params->size, align,
- exportable, &buf_vk->slice))
- {
- goto error;
- }
-
- if (params->host_mapped)
- buf->data = buf_vk->slice.data;
-
- if (params->initial_data)
- vk_buf_update(ra, buf, 0, params->initial_data, params->size);
-
- buf->params.initial_data = NULL; // do this after vk_buf_update
- return buf;
-
-error:
- vk_buf_destroy(ra, buf);
- return NULL;
-}
-
-static bool vk_buf_poll(struct ra *ra, struct ra_buf *buf)
-{
- struct ra_buf_vk *buf_vk = buf->priv;
- return buf_vk->refcount == 1;
-}
-
-static bool vk_tex_upload(struct ra *ra,
- const struct ra_tex_upload_params *params)
-{
- struct ra_tex *tex = params->tex;
- struct ra_tex_vk *tex_vk = tex->priv;
-
- if (!params->buf)
- return ra_tex_upload_pbo(ra, &tex_vk->pbo, params);
-
- assert(!params->src);
- assert(params->buf);
- struct ra_buf *buf = params->buf;
- struct ra_buf_vk *buf_vk = buf->priv;
-
- VkBufferImageCopy region = {
- .bufferOffset = buf_vk->slice.mem.offset + params->buf_offset,
- .bufferRowLength = tex->params.w,
- .bufferImageHeight = tex->params.h,
- .imageSubresource = vk_layers,
- .imageExtent = (VkExtent3D){tex->params.w, tex->params.h, tex->params.d},
- };
-
- if (tex->params.dimensions == 2) {
- int pix_size = tex->params.format->pixel_size;
- region.bufferRowLength = params->stride / pix_size;
- if (region.bufferRowLength * pix_size != params->stride) {
- MP_ERR(ra, "Texture upload strides must be a multiple of the texel "
- "size!\n");
- goto error;
- }
-
- if (params->rc) {
- struct mp_rect *rc = params->rc;
- region.imageOffset = (VkOffset3D){rc->x0, rc->y0, 0};
- region.imageExtent = (VkExtent3D){mp_rect_w(*rc), mp_rect_h(*rc), 1};
- region.bufferImageHeight = region.imageExtent.height;
- }
- }
-
- uint64_t size = region.bufferRowLength * region.bufferImageHeight *
- region.imageExtent.depth;
-
- struct vk_cmd *cmd = vk_require_cmd(ra, tex_vk->upload_queue);
- if (!cmd)
- goto error;
-
- buf_barrier(ra, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT,
- VK_ACCESS_TRANSFER_READ_BIT, region.bufferOffset, size);
-
- tex_barrier(ra, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT,
- VK_ACCESS_TRANSFER_WRITE_BIT,
- VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
- params->invalidate);
-
- vkCmdCopyBufferToImage(cmd->buf, buf_vk->slice.buf, tex_vk->img,
- tex_vk->current_layout, 1, &region);
-
- tex_signal(ra, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT);
-
- return true;
-
-error:
- return false;
-}
-
-static bool ra_vk_mem_get_external_info(struct ra *ra, struct vk_memslice *mem, struct vk_external_mem *ret)
-{
- struct mpvk_ctx *vk = ra_vk_get(ra);
-
-#if HAVE_WIN32_DESKTOP
- HANDLE mem_handle;
-
- VkMemoryGetWin32HandleInfoKHR info = {
- .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
- .pNext = NULL,
- .memory = mem->vkmem,
- .handleType = IsWindows8OrGreater()
- ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR
- : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR,
- };
-
- VK_LOAD_PFN(vkGetMemoryWin32HandleKHR);
- VK(pfn_vkGetMemoryWin32HandleKHR(vk->dev, &info, &mem_handle));
-
- ret->mem_handle = mem_handle;
-#else
- int mem_fd;
-
- VkMemoryGetFdInfoKHR info = {
- .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
- .pNext = NULL,
- .memory = mem->