diff options
Diffstat (limited to 'video/out/vulkan/ra_vk.c')
-rw-r--r-- | video/out/vulkan/ra_vk.c | 1982 |
1 files changed, 0 insertions, 1982 deletions
diff --git a/video/out/vulkan/ra_vk.c b/video/out/vulkan/ra_vk.c deleted file mode 100644 index 3e4ba28ac4..0000000000 --- a/video/out/vulkan/ra_vk.c +++ /dev/null @@ -1,1982 +0,0 @@ -#include "video/out/gpu/utils.h" -#include "video/out/gpu/spirv.h" - -#include "ra_vk.h" -#include "malloc.h" - -#if HAVE_WIN32_DESKTOP -#include <versionhelpers.h> -#endif - -static struct ra_fns ra_fns_vk; - -enum queue_type { - GRAPHICS, - COMPUTE, - TRANSFER, -}; - -// For ra.priv -struct ra_vk { - struct mpvk_ctx *vk; - struct ra_tex *clear_tex; // stupid hack for clear() - struct vk_cmd *cmd; // currently recording cmd -}; - -struct mpvk_ctx *ra_vk_get(struct ra *ra) -{ - if (ra->fns != &ra_fns_vk) - return NULL; - - struct ra_vk *p = ra->priv; - return p->vk; -} - -static void vk_submit(struct ra *ra) -{ - struct ra_vk *p = ra->priv; - struct mpvk_ctx *vk = ra_vk_get(ra); - - if (p->cmd) { - vk_cmd_queue(vk, p->cmd); - p->cmd = NULL; - } -} - -// Returns a command buffer, or NULL on error -static struct vk_cmd *vk_require_cmd(struct ra *ra, enum queue_type type) -{ - struct ra_vk *p = ra->priv; - struct mpvk_ctx *vk = ra_vk_get(ra); - - struct vk_cmdpool *pool; - switch (type) { - case GRAPHICS: pool = vk->pool_graphics; break; - case COMPUTE: pool = vk->pool_compute; break; - - // GRAPHICS and COMPUTE also imply TRANSFER capability (vulkan spec) - case TRANSFER: - pool = vk->pool_transfer; - if (!pool) - pool = vk->pool_compute; - if (!pool) - pool = vk->pool_graphics; - break; - default: abort(); - } - - assert(pool); - if (p->cmd && p->cmd->pool == pool) - return p->cmd; - - vk_submit(ra); - p->cmd = vk_cmd_begin(vk, pool); - return p->cmd; -} - -#define MAKE_LAZY_DESTRUCTOR(fun, argtype) \ - static void fun##_lazy(struct ra *ra, argtype *arg) { \ - struct ra_vk *p = ra->priv; \ - struct mpvk_ctx *vk = ra_vk_get(ra); \ - if (p->cmd) { \ - vk_cmd_callback(p->cmd, (vk_cb) fun, ra, arg); \ - } else { \ - vk_dev_callback(vk, (vk_cb) fun, ra, arg); \ - } \ - } - -static void vk_destroy_ra(struct ra *ra) -{ - struct ra_vk *p = ra->priv; - struct mpvk_ctx *vk = ra_vk_get(ra); - - vk_submit(ra); - mpvk_flush_commands(vk); - mpvk_poll_commands(vk, UINT64_MAX); - ra_tex_free(ra, &p->clear_tex); - - talloc_free(ra); -} - -static bool vk_setup_formats(struct ra *ra) -{ - struct mpvk_ctx *vk = ra_vk_get(ra); - - for (const struct vk_format *vk_fmt = vk_formats; vk_fmt->name; vk_fmt++) { - VkFormatProperties prop; - vkGetPhysicalDeviceFormatProperties(vk->physd, vk_fmt->iformat, &prop); - - // As a bare minimum, we need to sample from an allocated image - VkFormatFeatureFlags flags = prop.optimalTilingFeatures; - if (!(flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) - continue; - - VkFormatFeatureFlags linear_bits, render_bits; - linear_bits = VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; - render_bits = VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | - VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; - - struct ra_format *fmt = talloc_zero(ra, struct ra_format); - *fmt = (struct ra_format) { - .name = vk_fmt->name, - .priv = (void *)vk_fmt, - .ctype = vk_fmt->ctype, - .ordered = !vk_fmt->fucked_order, - .num_components = vk_fmt->components, - .pixel_size = vk_fmt->bytes, - .linear_filter = !!(flags & linear_bits), - .renderable = !!(flags & render_bits), - }; - - for (int i = 0; i < 4; i++) - fmt->component_size[i] = fmt->component_depth[i] = vk_fmt->bits[i]; - - fmt->glsl_format = ra_fmt_glsl_format(fmt); - - MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, fmt); - } - - // Populate some other capabilities related to formats while we're at it - VkImageType imgType[3] = { - VK_IMAGE_TYPE_1D, - VK_IMAGE_TYPE_2D, - VK_IMAGE_TYPE_3D - }; - - // R8_UNORM is supported on literally every single vulkan implementation - const VkFormat testfmt = VK_FORMAT_R8_UNORM; - - for (int d = 0; d < 3; d++) { - VkImageFormatProperties iprop; - VkResult res = vkGetPhysicalDeviceImageFormatProperties(vk->physd, - testfmt, imgType[d], VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_SAMPLED_BIT, 0, &iprop); - - switch (imgType[d]) { - case VK_IMAGE_TYPE_1D: - if (res == VK_SUCCESS) - ra->caps |= RA_CAP_TEX_1D; - break; - case VK_IMAGE_TYPE_2D: - // 2D formats must be supported by RA, so ensure this is the case - VK_ASSERT(res, "Querying 2D format limits"); - ra->max_texture_wh = MPMIN(iprop.maxExtent.width, iprop.maxExtent.height); - break; - case VK_IMAGE_TYPE_3D: - if (res == VK_SUCCESS) - ra->caps |= RA_CAP_TEX_3D; - break; - } - } - - // RA_CAP_BLIT implies both blitting between images as well as blitting - // directly to the swapchain image, so check for all three operations - bool blittable = true; - VkFormatProperties prop; - vkGetPhysicalDeviceFormatProperties(vk->physd, testfmt, &prop); - if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_SRC_BIT)) - blittable = false; - if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT)) - blittable = false; - - vkGetPhysicalDeviceFormatProperties(vk->physd, vk->surf_format.format, &prop); - if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT)) - blittable = false; - - if (blittable) - ra->caps |= RA_CAP_BLIT; - - return true; - -error: - return false; -} - -static struct ra_fns ra_fns_vk; - -struct ra *ra_create_vk(struct mpvk_ctx *vk, struct mp_log *log) -{ - assert(vk->dev); - assert(vk->alloc); - - struct ra *ra = talloc_zero(NULL, struct ra); - ra->log = log; - ra->fns = &ra_fns_vk; - - struct ra_vk *p = ra->priv = talloc_zero(ra, struct ra_vk); - p->vk = vk; - - ra->caps |= vk->spirv->ra_caps; - ra->glsl_version = vk->spirv->glsl_version; - ra->glsl_vulkan = true; - ra->max_shmem = vk->limits.maxComputeSharedMemorySize; - ra->max_pushc_size = vk->limits.maxPushConstantsSize; - - if (vk->pool_compute) { - ra->caps |= RA_CAP_COMPUTE | RA_CAP_NUM_GROUPS; - // If we have more compute queues than graphics queues, we probably - // want to be using them. (This seems mostly relevant for AMD) - if (vk->pool_compute->num_queues > vk->pool_graphics->num_queues) - ra->caps |= RA_CAP_PARALLEL_COMPUTE; - } - - if (!vk_setup_formats(ra)) - goto error; - - // UBO support is required - ra->caps |= RA_CAP_BUF_RO | RA_CAP_FRAGCOORD; - - // textureGather requires the ImageGatherExtended capability - if (vk->features.shaderImageGatherExtended) - ra->caps |= RA_CAP_GATHER; - - // Try creating a shader storage buffer - struct ra_buf_params ssbo_params = { - .type = RA_BUF_TYPE_SHADER_STORAGE, - .size = 16, - }; - - struct ra_buf *ssbo = ra_buf_create(ra, &ssbo_params); - if (ssbo) { - ra->caps |= RA_CAP_BUF_RW; - ra_buf_free(ra, &ssbo); - } - - // To support clear() by region, we need to allocate a dummy 1x1 image that - // will be used as the source of blit operations - struct ra_tex_params clear_params = { - .dimensions = 1, // no point in using a 2D image if height = 1 - .w = 1, - .h = 1, - .d = 1, - .format = ra_find_float16_format(ra, 4), - .blit_src = 1, - .host_mutable = 1, - }; - - p->clear_tex = ra_tex_create(ra, &clear_params); - if (!p->clear_tex) { - MP_ERR(ra, "Failed creating 1x1 dummy texture for clear()!\n"); - goto error; - } - - return ra; - -error: - vk_destroy_ra(ra); - return NULL; -} - -// Boilerplate wrapper around vkCreateRenderPass to ensure passes remain -// compatible. The renderpass will automatically transition the image out of -// initialLayout and into finalLayout. -static VkResult vk_create_render_pass(VkDevice dev, const struct ra_format *fmt, - VkAttachmentLoadOp loadOp, - VkImageLayout initialLayout, - VkImageLayout finalLayout, - VkRenderPass *out) -{ - struct vk_format *vk_fmt = fmt->priv; - assert(fmt->renderable); - - VkRenderPassCreateInfo rinfo = { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = &(VkAttachmentDescription) { - .format = vk_fmt->iformat, - .samples = VK_SAMPLE_COUNT_1_BIT, - .loadOp = loadOp, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = initialLayout, - .finalLayout = finalLayout, - }, - .subpassCount = 1, - .pSubpasses = &(VkSubpassDescription) { - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .colorAttachmentCount = 1, - .pColorAttachments = &(VkAttachmentReference) { - .attachment = 0, - .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, - }, - }, - }; - - return vkCreateRenderPass(dev, &rinfo, MPVK_ALLOCATOR, out); -} - -// For ra_tex.priv -struct ra_tex_vk { - bool external_img; - enum queue_type upload_queue; - VkImageType type; - VkImage img; - struct vk_memslice mem; - // for sampling - VkImageView view; - VkSampler sampler; - // for rendering - VkFramebuffer framebuffer; - VkRenderPass dummyPass; - // for uploading - struct ra_buf_pool pbo; - // "current" metadata, can change during the course of execution - VkImageLayout current_layout; - VkAccessFlags current_access; - // the signal guards reuse, and can be NULL - struct vk_signal *sig; - VkPipelineStageFlags sig_stage; - VkSemaphore ext_dep; // external semaphore, not owned by the ra_tex -}; - -void ra_tex_vk_external_dep(struct ra *ra, struct ra_tex *tex, VkSemaphore dep) -{ - struct ra_tex_vk *tex_vk = tex->priv; - assert(!tex_vk->ext_dep); - tex_vk->ext_dep = dep; -} - -// Small helper to ease image barrier creation. if `discard` is set, the contents -// of the image will be undefined after the barrier -static void tex_barrier(struct ra *ra, struct vk_cmd *cmd, struct ra_tex *tex, - VkPipelineStageFlags stage, VkAccessFlags newAccess, - VkImageLayout newLayout, bool discard) -{ - struct mpvk_ctx *vk = ra_vk_get(ra); - struct ra_tex_vk *tex_vk = tex->priv; - - if (tex_vk->ext_dep) { - vk_cmd_dep(cmd, tex_vk->ext_dep, stage); - tex_vk->ext_dep = NULL; - } - - VkImageMemoryBarrier imgBarrier = { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .oldLayout = tex_vk->current_layout, - .newLayout = newLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .srcAccessMask = tex_vk->current_access, - .dstAccessMask = newAccess, - .image = tex_vk->img, - .subresourceRange = vk_range, - }; - - if (discard) { - imgBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - imgBarrier.srcAccessMask = 0; - } - - VkEvent event = NULL; - vk_cmd_wait(vk, cmd, &tex_vk->sig, stage, &event); - - bool need_trans = tex_vk->current_layout != newLayout || - tex_vk->current_access != newAccess; - - // Transitioning to VK_IMAGE_LAYOUT_UNDEFINED is a pseudo-operation - // that for us means we don't need to perform the actual transition - if (need_trans && newLayout != VK_IMAGE_LAYOUT_UNDEFINED) { - if (event) { - vkCmdWaitEvents(cmd->buf, 1, &event, tex_vk->sig_stage, - stage, 0, NULL, 0, NULL, 1, &imgBarrier); - } else { - // If we're not using an event, then the source stage is irrelevant - // because we're coming from a different queue anyway, so we can - // safely set it to TOP_OF_PIPE. - imgBarrier.srcAccessMask = 0; - vkCmdPipelineBarrier(cmd->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - stage, 0, 0, NULL, 0, NULL, 1, &imgBarrier); - } - } - - tex_vk->current_layout = newLayout; - tex_vk->current_access = newAccess; -} - -static void tex_signal(struct ra *ra, struct vk_cmd *cmd, struct ra_tex *tex, - VkPipelineStageFlags stage) -{ - struct ra_tex_vk *tex_vk = tex->priv; - struct mpvk_ctx *vk = ra_vk_get(ra); - assert(!tex_vk->sig); - - tex_vk->sig = vk_cmd_signal(vk, cmd, stage); - tex_vk->sig_stage = stage; -} - -static void vk_tex_destroy(struct ra *ra, struct ra_tex *tex) -{ - if (!tex) - return; - - struct mpvk_ctx *vk = ra_vk_get(ra); - struct ra_tex_vk *tex_vk = tex->priv; - - ra_buf_pool_uninit(ra, &tex_vk->pbo); - vk_signal_destroy(vk, &tex_vk->sig); - vkDestroyFramebuffer(vk->dev, tex_vk->framebuffer, MPVK_ALLOCATOR); - vkDestroyRenderPass(vk->dev, tex_vk->dummyPass, MPVK_ALLOCATOR); - vkDestroySampler(vk->dev, tex_vk->sampler, MPVK_ALLOCATOR); - vkDestroyImageView(vk->dev, tex_vk->view, MPVK_ALLOCATOR); - if (!tex_vk->external_img) { - vkDestroyImage(vk->dev, tex_vk->img, MPVK_ALLOCATOR); - vk_free_memslice(vk, tex_vk->mem); - } - - talloc_free(tex); -} - -MAKE_LAZY_DESTRUCTOR(vk_tex_destroy, struct ra_tex); - -// Initializes non-VkImage values like the image view, samplers, etc. -static bool vk_init_image(struct ra *ra, struct ra_tex *tex) -{ - struct mpvk_ctx *vk = ra_vk_get(ra); - - struct ra_tex_params *params = &tex->params; - struct ra_tex_vk *tex_vk = tex->priv; - assert(tex_vk->img); - - tex_vk->current_layout = VK_IMAGE_LAYOUT_UNDEFINED; - tex_vk->current_access = 0; - - if (params->render_src || params->render_dst) { - static const VkImageViewType viewType[] = { - [VK_IMAGE_TYPE_1D] = VK_IMAGE_VIEW_TYPE_1D, - [VK_IMAGE_TYPE_2D] = VK_IMAGE_VIEW_TYPE_2D, - [VK_IMAGE_TYPE_3D] = VK_IMAGE_VIEW_TYPE_3D, - }; - - const struct vk_format *fmt = params->format->priv; - VkImageViewCreateInfo vinfo = { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = tex_vk->img, - .viewType = viewType[tex_vk->type], - .format = fmt->iformat, - .subresourceRange = vk_range, - }; - - VK(vkCreateImageView(vk->dev, &vinfo, MPVK_ALLOCATOR, &tex_vk->view)); - } - - if (params->render_src) { - assert(params->format->linear_filter || !params->src_linear); - VkFilter filter = params->src_linear - ? VK_FILTER_LINEAR - : VK_FILTER_NEAREST; - VkSamplerAddressMode wrap = params->src_repeat - ? VK_SAMPLER_ADDRESS_MODE_REPEAT - : VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - VkSamplerCreateInfo sinfo = { - .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, - .magFilter = filter, - .minFilter = filter, - .addressModeU = wrap, - .addressModeV = wrap, - .addressModeW = wrap, - .maxAnisotropy = 1.0, - }; - - VK(vkCreateSampler(vk->dev, &sinfo, MPVK_ALLOCATOR, &tex_vk->sampler)); - } - - if (params->render_dst) { - // Framebuffers need to be created against a specific render pass - // layout, so we need to temporarily create a skeleton/dummy render - // pass for vulkan to figure out the compatibility - VK(vk_create_render_pass(vk->dev, params->format, - VK_ATTACHMENT_LOAD_OP_DONT_CARE, - VK_IMAGE_LAYOUT_UNDEFINED, - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, - &tex_vk->dummyPass)); - - VkFramebufferCreateInfo finfo = { - .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .renderPass = tex_vk->dummyPass, - .attachmentCount = 1, - .pAttachments = &tex_vk->view, - .width = tex->params.w, - .height = tex->params.h, - .layers = 1, - }; - - VK(vkCreateFramebuffer(vk->dev, &finfo, MPVK_ALLOCATOR, - &tex_vk->framebuffer)); - - // NOTE: Normally we would free the dummyPass again here, but a bug - // in the nvidia vulkan driver causes a segfault if you do. - } - - return true; - -error: - return false; -} - -static struct ra_tex *vk_tex_create(struct ra *ra, - const struct ra_tex_params *params) -{ - struct mpvk_ctx *vk = ra_vk_get(ra); - assert(!params->format->dummy_format); - - struct ra_tex *tex = talloc_zero(NULL, struct ra_tex); - tex->params = *params; - tex->params.initial_data = NULL; - - struct ra_tex_vk *tex_vk = tex->priv = talloc_zero(tex, struct ra_tex_vk); - tex_vk->upload_queue = GRAPHICS; - - const struct vk_format *fmt = params->format->priv; - switch (params->dimensions) { - case 1: tex_vk->type = VK_IMAGE_TYPE_1D; break; - case 2: tex_vk->type = VK_IMAGE_TYPE_2D; break; - case 3: tex_vk->type = VK_IMAGE_TYPE_3D; break; - default: abort(); - } - - VkImageUsageFlags usage = 0; - if (params->render_src) - usage |= VK_IMAGE_USAGE_SAMPLED_BIT; - if (params->render_dst) - usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - if (params->storage_dst) - usage |= VK_IMAGE_USAGE_STORAGE_BIT; - if (params->blit_src) - usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; - if (params->host_mutable || params->blit_dst || params->initial_data) - usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; - - // Always use the transfer pool if available, for efficiency - if (params->host_mutable && vk->pool_transfer) - tex_vk->upload_queue = TRANSFER; - - // Double-check image usage support and fail immediately if invalid - VkImageFormatProperties iprop; - VkResult res = vkGetPhysicalDeviceImageFormatProperties(vk->physd, - fmt->iformat, tex_vk->type, VK_IMAGE_TILING_OPTIMAL, usage, 0, - &iprop); - if (res == VK_ERROR_FORMAT_NOT_SUPPORTED) { - return NULL; - } else { - VK_ASSERT(res, "Querying image format properties"); - } - - VkFormatProperties prop; - vkGetPhysicalDeviceFormatProperties(vk->physd, fmt->iformat, &prop); - VkFormatFeatureFlags flags = prop.optimalTilingFeatures; - - bool has_blit_src = flags & VK_FORMAT_FEATURE_BLIT_SRC_BIT, - has_src_linear = flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; - - if (params->w > iprop.maxExtent.width || - params->h > iprop.maxExtent.height || - params->d > iprop.maxExtent.depth || - (params->blit_src && !has_blit_src) || - (params->src_linear && !has_src_linear)) - { - return NULL; - } - - // FIXME: Since we can't keep track of queue family ownership properly, - // and we don't know in advance what types of queue families this image - // will belong to, we're forced to share all of our images between all - // command pools. - uint32_t qfs[3] = {0}; - for (int i = 0; i < vk->num_pools; i++) - qfs[i] = vk->pools[i]->qf; - - VkImageCreateInfo iinfo = { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = tex_vk->type, - .format = fmt->iformat, - .extent = (VkExtent3D) { params->w, params->h, params->d }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = VK_SAMPLE_COUNT_1_BIT, - .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = usage, - .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - .sharingMode = vk->num_pools > 1 ? VK_SHARING_MODE_CONCURRENT - : VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = vk->num_pools, - .pQueueFamilyIndices = qfs, - }; - - VK(vkCreateImage(vk->dev, &iinfo, MPVK_ALLOCATOR, &tex_vk->img)); - - VkMemoryPropertyFlags memFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - VkMemoryRequirements reqs; - vkGetImageMemoryRequirements(vk->dev, tex_vk->img, &reqs); - - struct vk_memslice *mem = &tex_vk->mem; - if (!vk_malloc_generic(vk, reqs, memFlags, mem)) - goto error; - - VK(vkBindImageMemory(vk->dev, tex_vk->img, mem->vkmem, mem->offset)); - - if (!vk_init_image(ra, tex)) - goto error; - - if (params->initial_data) { - struct ra_tex_upload_params ul_params = { - .tex = tex, - .invalidate = true, - .src = params->initial_data, - .stride = params->w * fmt->bytes, - }; - if (!ra->fns->tex_upload(ra, &ul_params)) - goto error; - } - - return tex; - -error: - vk_tex_destroy(ra, tex); - return NULL; -} - -struct ra_tex *ra_vk_wrap_swapchain_img(struct ra *ra, VkImage vkimg, - VkSwapchainCreateInfoKHR info) -{ - struct mpvk_ctx *vk = ra_vk_get(ra); - struct ra_tex *tex = NULL; - - const struct ra_format *format = NULL; - for (int i = 0; i < ra->num_formats; i++) { - const struct vk_format *fmt = ra->formats[i]->priv; - if (fmt->iformat == vk->surf_format.format) { - format = ra->formats[i]; - break; - } - } - - if (!format) { - MP_ERR(ra, "Could not find ra_format suitable for wrapped swchain image " - "with surface format 0x%x\n", vk->surf_format.format); - goto error; - } - - tex = talloc_zero(NULL, struct ra_tex); - tex->params = (struct ra_tex_params) { - .format = format, - .dimensions = 2, - .w = info.imageExtent.width, - .h = info.imageExtent.height, - .d = 1, - .blit_src = !!(info.imageUsage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT), - .blit_dst = !!(info.imageUsage & VK_IMAGE_USAGE_TRANSFER_DST_BIT), - .render_src = !!(info.imageUsage & VK_IMAGE_USAGE_SAMPLED_BIT), - .render_dst = !!(info.imageUsage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT), - .storage_dst = !!(info.imageUsage & VK_IMAGE_USAGE_STORAGE_BIT), - }; - - struct ra_tex_vk *tex_vk = tex->priv = talloc_zero(tex, struct ra_tex_vk); - tex_vk->type = VK_IMAGE_TYPE_2D; - tex_vk->external_img = true; - tex_vk->img = vkimg; - - if (!vk_init_image(ra, tex)) - goto error; - - return tex; - -error: - vk_tex_destroy(ra, tex); - return NULL; -} - -// For ra_buf.priv -struct ra_buf_vk { - struct vk_bufslice slice; - int refcount; // 1 = object allocated but not in use, > 1 = in use - bool needsflush; - enum queue_type update_queue; - // "current" metadata, can change during course of execution - VkPipelineStageFlags current_stage; - VkAccessFlags current_access; - // Arbitrary user data for the creator of a buffer - void *user_data; -}; - -void ra_vk_buf_set_user_data(struct ra_buf *buf, void *user_data) { - struct ra_buf_vk *vk_priv = buf->priv; - vk_priv->user_data = user_data; -} - -void *ra_vk_buf_get_user_data(struct ra_buf *buf) { - struct ra_buf_vk *vk_priv = buf->priv; - return vk_priv->user_data; -} - -static void vk_buf_deref(struct ra *ra, struct ra_buf *buf) -{ - if (!buf) - return; - - struct mpvk_ctx *vk = ra_vk_get(ra); - struct ra_buf_vk *buf_vk = buf->priv; - - if (--buf_vk->refcount == 0) { - vk_free_memslice(vk, buf_vk->slice.mem); - talloc_free(buf); - } -} - -static void buf_barrier(struct ra *ra, struct vk_cmd *cmd, struct ra_buf *buf, - VkPipelineStageFlags newStage, - VkAccessFlags newAccess, int offset, size_t size) -{ - struct ra_buf_vk *buf_vk = buf->priv; - - VkBufferMemoryBarrier buffBarrier = { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .srcAccessMask = buf_vk->current_access, - .dstAccessMask = newAccess, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = buf_vk->slice.buf, - .offset = offset, - .size = size, - }; - - if (buf_vk->needsflush || buf->params.host_mapped) { - buffBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; - buf_vk->current_stage = VK_PIPELINE_STAGE_HOST_BIT; - buf_vk->needsflush = false; - } - - if (buffBarrier.srcAccessMask != buffBarrier.dstAccessMask) { - vkCmdPipelineBarrier(cmd->buf, buf_vk->current_stage, newStage, 0, - 0, NULL, 1, &buffBarrier, 0, NULL); - } - - buf_vk->current_stage = newStage; - buf_vk->current_access = newAccess; - buf_vk->refcount++; - vk_cmd_callback(cmd, (vk_cb) vk_buf_deref, ra, buf); -} - -#define vk_buf_destroy vk_buf_deref -MAKE_LAZY_DESTRUCTOR(vk_buf_destroy, struct ra_buf); - -static void vk_buf_update(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset, - const void *data, size_t size) -{ - assert(buf->params.host_mutable || buf->params.initial_data); - struct ra_buf_vk *buf_vk = buf->priv; - - // For host-mapped buffers, we can just directly memcpy the buffer contents. - // Otherwise, we can update the buffer from the GPU using a command buffer. - if (buf_vk->slice.data) { - assert(offset + size <= buf->params.size); - uintptr_t addr = (uintptr_t)buf_vk->slice.data + offset; - memcpy((void *)addr, data, size); - buf_vk->needsflush = true; - } else { - struct vk_cmd *cmd = vk_require_cmd(ra, buf_vk->update_queue); - if (!cmd) { - MP_ERR(ra, "Failed updating buffer!\n"); - return; - } - - buf_barrier(ra, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_TRANSFER_WRITE_BIT, offset, size); - - VkDeviceSize bufOffset = buf_vk->slice.mem.offset + offset; - assert(bufOffset == MP_ALIGN_UP(bufOffset, 4)); - vkCmdUpdateBuffer(cmd->buf, buf_vk->slice.buf, bufOffset, size, data); - } -} - -static struct ra_buf *vk_buf_create(struct ra *ra, - const struct ra_buf_params *params) -{ - struct mpvk_ctx *vk = ra_vk_get(ra); - - struct ra_buf *buf = talloc_zero(NULL, struct ra_buf); - buf->params = *params; - - struct ra_buf_vk *buf_vk = buf->priv = talloc_zero(buf, struct ra_buf_vk); - buf_vk->current_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; - buf_vk->current_access = 0; - buf_vk->refcount = 1; - - VkBufferUsageFlags bufFlags = 0; - VkMemoryPropertyFlags memFlags = 0; - VkDeviceSize align = 4; // alignment 4 is needed for buf_update - bool exportable = false; - - switch (params->type) { - case RA_BUF_TYPE_TEX_UPLOAD: - bufFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT; - memFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; - // Use TRANSFER-style updates for large enough buffers for efficiency - if (params->size > 1024*1024) // 1 MB - buf_vk->update_queue = TRANSFER; - break; - case RA_BUF_TYPE_UNIFORM: - bufFlags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; - memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - align = MP_ALIGN_UP(align, vk->limits.minUniformBufferOffsetAlignment); - break; - case RA_BUF_TYPE_SHADER_STORAGE: - bufFlags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; - memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - align = MP_ALIGN_UP(align, vk->limits.minStorageBufferOffsetAlignment); - buf_vk->update_queue = COMPUTE; - break; - case RA_BUF_TYPE_VERTEX: - bufFlags |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; - memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - break; - case RA_BUF_TYPE_SHARED_MEMORY: - bufFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT; - memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - exportable = true; - break; - default: abort(); - } - - if (params->host_mutable || params->initial_data) { - bufFlags |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; - align = MP_ALIGN_UP(align, vk->limits.optimalBufferCopyOffsetAlignment); - } - - if (params->host_mapped) { - memFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | - VK_MEMORY_PROPERTY_HOST_CACHED_BIT; - } - - if (!vk_malloc_buffer(vk, bufFlags, memFlags, params->size, align, - exportable, &buf_vk->slice)) - { - goto error; - } - - if (params->host_mapped) - buf->data = buf_vk->slice.data; - - if (params->initial_data) - vk_buf_update(ra, buf, 0, params->initial_data, params->size); - - buf->params.initial_data = NULL; // do this after vk_buf_update - return buf; - -error: - vk_buf_destroy(ra, buf); - return NULL; -} - -static bool vk_buf_poll(struct ra *ra, struct ra_buf *buf) -{ - struct ra_buf_vk *buf_vk = buf->priv; - return buf_vk->refcount == 1; -} - -static bool vk_tex_upload(struct ra *ra, - const struct ra_tex_upload_params *params) -{ - struct ra_tex *tex = params->tex; - struct ra_tex_vk *tex_vk = tex->priv; - - if (!params->buf) - return ra_tex_upload_pbo(ra, &tex_vk->pbo, params); - - assert(!params->src); - assert(params->buf); - struct ra_buf *buf = params->buf; - struct ra_buf_vk *buf_vk = buf->priv; - - VkBufferImageCopy region = { - .bufferOffset = buf_vk->slice.mem.offset + params->buf_offset, - .bufferRowLength = tex->params.w, - .bufferImageHeight = tex->params.h, - .imageSubresource = vk_layers, - .imageExtent = (VkExtent3D){tex->params.w, tex->params.h, tex->params.d}, - }; - - if (tex->params.dimensions == 2) { - int pix_size = tex->params.format->pixel_size; - region.bufferRowLength = params->stride / pix_size; - if (region.bufferRowLength * pix_size != params->stride) { - MP_ERR(ra, "Texture upload strides must be a multiple of the texel " - "size!\n"); - goto error; - } - - if (params->rc) { - struct mp_rect *rc = params->rc; - region.imageOffset = (VkOffset3D){rc->x0, rc->y0, 0}; - region.imageExtent = (VkExtent3D){mp_rect_w(*rc), mp_rect_h(*rc), 1}; - region.bufferImageHeight = region.imageExtent.height; - } - } - - uint64_t size = region.bufferRowLength * region.bufferImageHeight * - region.imageExtent.depth; - - struct vk_cmd *cmd = vk_require_cmd(ra, tex_vk->upload_queue); - if (!cmd) - goto error; - - buf_barrier(ra, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_TRANSFER_READ_BIT, region.bufferOffset, size); - - tex_barrier(ra, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_TRANSFER_WRITE_BIT, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - params->invalidate); - - vkCmdCopyBufferToImage(cmd->buf, buf_vk->slice.buf, tex_vk->img, - tex_vk->current_layout, 1, ®ion); - - tex_signal(ra, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT); - - return true; - -error: - return false; -} - -static bool ra_vk_mem_get_external_info(struct ra *ra, struct vk_memslice *mem, struct vk_external_mem *ret) -{ - struct mpvk_ctx *vk = ra_vk_get(ra); - -#if HAVE_WIN32_DESKTOP - HANDLE mem_handle; - - VkMemoryGetWin32HandleInfoKHR info = { - .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR, - .pNext = NULL, - .memory = mem->vkmem, - .handleType = IsWindows8OrGreater() - ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR - : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR, - }; - - VK_LOAD_PFN(vkGetMemoryWin32HandleKHR); - VK(pfn_vkGetMemoryWin32HandleKHR(vk->dev, &info, &mem_handle)); - - ret->mem_handle = mem_handle; -#else - int mem_fd; - - VkMemoryGetFdInfoKHR info = { - .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR, - .pNext = NULL, - .memory = mem->vkmem, - .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR, - }; - - VK_LOAD_PFN(vkGetMemoryFdKHR); - VK(pfn_vkGetMemoryFdKHR(vk->dev, &info, &mem_fd)); - - ret->mem_fd = mem_fd; -#endif - ret->size = mem->size; - ret->offset = mem->offset; - ret->mem_size = mem->slab_size; - - return true; - -error: - return false; -} - -bool ra_vk_buf_get_external_info(struct ra *ra, struct ra_buf *buf, struct vk_external_mem *ret) -{ - if (buf->params.type != RA_BUF_TYPE_SHARED_MEMORY) { - MP_ERR(ra, "Buffer must be of TYPE_SHARED_MEMORY to be able to export it..."); - return false; - } - - struct ra_buf_vk *buf_vk = buf->priv; - struct vk_memslice *mem = &buf_vk->slice.mem; - - return ra_vk_mem_get_external_info(ra, mem, ret); -} - -#define MPVK_NUM_DS MPVK_MAX_STREAMING_DEPTH - -// For ra_renderpass.priv -struct ra_renderpass_vk { - // Pipeline / render pass - VkPipeline pipe; - VkPipelineLayout pipeLayout; - VkRenderPass renderPass; - VkImageLayout initialLayout; - VkImageLayout finalLayout; - // Descriptor set (bindings) - VkDescriptorSetLayout dsLayout; - VkDescriptorPool dsPool; - VkDescriptorSet dss[MPVK_NUM_DS]; - int |