From 258487370fd840b018a404225277d74f74899c59 Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Wed, 13 Sep 2017 03:09:48 +0200 Subject: vo_gpu: vulkan: generalize SPIR-V compiler In addition to the built-in nvidia compiler, we now also support a backend based on libshaderc. shaderc is sort of like glslang except it has a C API and is available as a dynamic library. The generated SPIR-V is now cached alongside the VkPipeline in the cached_program. We use a special cache header to ensure validity of this cache before passing it blindly to the vulkan implementation, since passing invalid SPIR-V can cause all sorts of nasty things. It's also designed to self-invalidate if the compiler gets better, by offering a catch-all `int compiler_version` that implementations can use as a cache invalidation marker. --- DOCS/man/options.rst | 17 +++ options/options.c | 2 + options/options.h | 1 + video/out/gpu/context.c | 18 ++- video/out/gpu/context.h | 1 + video/out/gpu/spirv.c | 78 ++++++++++++ video/out/gpu/spirv.h | 41 ++++++ video/out/gpu/spirv_shaderc.c | 123 ++++++++++++++++++ video/out/vulkan/common.h | 1 + video/out/vulkan/context.c | 16 +++ video/out/vulkan/context.h | 3 + video/out/vulkan/ra_vk.c | 275 +++++++++++++++++++++++++++++----------- video/out/vulkan/spirv_nvidia.c | 54 ++++++++ video/out/vulkan/utils.c | 28 ++-- wscript | 4 + wscript_build.py | 3 + 16 files changed, 571 insertions(+), 94 deletions(-) create mode 100644 video/out/gpu/spirv.c create mode 100644 video/out/gpu/spirv.h create mode 100644 video/out/gpu/spirv_shaderc.c create mode 100644 video/out/vulkan/spirv_nvidia.c diff --git a/DOCS/man/options.rst b/DOCS/man/options.rst index 80e7350292..078981be24 100644 --- a/DOCS/man/options.rst +++ b/DOCS/man/options.rst @@ -4205,6 +4205,23 @@ The following video options are currently all specific to ``--vo=gpu`` and as mpv's vulkan implementation currently does not try and protect textures against concurrent access. +``--spirv-compiler=`` + Controls which compiler is used to translate GLSL to SPIR-V. This is + (currently) only relevant for ``--gpu-api=vulkan``. The possible choices + are: + + auto + Use the first available compiler. (Default) + shaderc + Use libshaderc, which is an API wrapper around glslang. This is + generally the most preferred, if available. + nvidia + Use nvidia's built-in compiler. Only works for nvidia GPUs. Can be + buggy, but also supports some features glslang does not. Only works + with vulkan. WARNING: Pretty buggy, handles push constants incorrectly + (this causes graphical corruption with e.g. ``--temporal-dither``)! Use + only for testing. + ``--glsl-shaders=`` Custom GLSL hooks. These are a flexible way to add custom fragment shaders, which can be injected at almost arbitrary points in the rendering pipeline, diff --git a/options/options.c b/options/options.c index 6467468691..b96ed5ef02 100644 --- a/options/options.c +++ b/options/options.c @@ -90,6 +90,7 @@ extern const struct m_obj_list ao_obj_list; extern const struct m_sub_options opengl_conf; extern const struct m_sub_options vulkan_conf; +extern const struct m_sub_options spirv_conf; extern const struct m_sub_options angle_conf; extern const struct m_sub_options cocoa_conf; @@ -686,6 +687,7 @@ const m_option_t mp_opts[] = { OPT_SUBSTRUCT("", demux_opts, demux_conf, 0), OPT_SUBSTRUCT("", gl_video_opts, gl_video_conf, 0), + OPT_SUBSTRUCT("", spirv_opts, spirv_conf, 0), #if HAVE_GL OPT_SUBSTRUCT("", opengl_opts, opengl_conf, 0), diff --git a/options/options.h b/options/options.h index 63dee03612..d2d0ea3cf9 100644 --- a/options/options.h +++ b/options/options.h @@ -330,6 +330,7 @@ typedef struct MPOpts { struct angle_opts *angle_opts; struct opengl_opts *opengl_opts; struct vulkan_opts *vulkan_opts; + struct spirv_opts *spirv_opts; struct cocoa_opts *cocoa_opts; struct dvd_opts *dvd_opts; diff --git a/video/out/gpu/context.c b/video/out/gpu/context.c index 25e2a754bf..69f322c422 100644 --- a/video/out/gpu/context.c +++ b/video/out/gpu/context.c @@ -31,6 +31,7 @@ #include "video/out/vo.h" #include "context.h" +#include "spirv.h" extern const struct ra_ctx_fns ra_ctx_glx; extern const struct ra_ctx_fns ra_ctx_glx_probe; @@ -185,10 +186,17 @@ struct ra_ctx *ra_ctx_create(struct vo *vo, const char *context_type, return NULL; } -void ra_ctx_destroy(struct ra_ctx **ctx) +void ra_ctx_destroy(struct ra_ctx **ctx_ptr) { - if (*ctx) - (*ctx)->fns->uninit(*ctx); - talloc_free(*ctx); - *ctx = NULL; + struct ra_ctx *ctx = *ctx_ptr; + if (!ctx) + return; + + if (ctx->spirv && ctx->spirv->fns->uninit) + ctx->spirv->fns->uninit(ctx); + + ctx->fns->uninit(ctx); + talloc_free(ctx); + + *ctx_ptr = NULL; } diff --git a/video/out/gpu/context.h b/video/out/gpu/context.h index f74087592d..78c0441cdf 100644 --- a/video/out/gpu/context.h +++ b/video/out/gpu/context.h @@ -22,6 +22,7 @@ struct ra_ctx { struct ra_ctx_opts opts; const struct ra_ctx_fns *fns; struct ra_swapchain *swapchain; + struct spirv_compiler *spirv; void *priv; }; diff --git a/video/out/gpu/spirv.c b/video/out/gpu/spirv.c new file mode 100644 index 0000000000..9375fd9508 --- /dev/null +++ b/video/out/gpu/spirv.c @@ -0,0 +1,78 @@ +#include "common/msg.h" +#include "options/m_config.h" + +#include "spirv.h" +#include "config.h" + +extern const struct spirv_compiler_fns spirv_shaderc; +extern const struct spirv_compiler_fns spirv_nvidia_builtin; + +// in probe-order +enum { + SPIRV_AUTO = 0, + SPIRV_SHADERC, // generally preferred, but not packaged everywhere + SPIRV_NVIDIA, // can be useful for testing, only available on nvidia +}; + +static const struct spirv_compiler_fns *compilers[] = { +#if HAVE_SHADERC + [SPIRV_SHADERC] = &spirv_shaderc, +#endif +#if HAVE_VULKAN + [SPIRV_NVIDIA] = &spirv_nvidia_builtin, +#endif +}; + +static const struct m_opt_choice_alternatives compiler_choices[] = { + {"auto", SPIRV_AUTO}, +#if HAVE_SHADERC + {"shaderc", SPIRV_SHADERC}, +#endif +#if HAVE_VULKAN + {"nvidia", SPIRV_NVIDIA}, +#endif + {0} +}; + +struct spirv_opts { + int compiler; +}; + +#define OPT_BASE_STRUCT struct spirv_opts +const struct m_sub_options spirv_conf = { + .opts = (const struct m_option[]) { + OPT_CHOICE_C("spirv-compiler", compiler, 0, compiler_choices), + {0} + }, + .size = sizeof(struct spirv_opts), +}; + +bool spirv_compiler_init(struct ra_ctx *ctx) +{ + void *tmp = talloc_new(NULL); + struct spirv_opts *opts = mp_get_config_group(tmp, ctx->global, &spirv_conf); + int compiler = opts->compiler; + talloc_free(tmp); + + for (int i = SPIRV_AUTO+1; i < MP_ARRAY_SIZE(compilers); i++) { + if (compiler != SPIRV_AUTO && i != compiler) + continue; + if (!compilers[i]) + continue; + + ctx->spirv = talloc_zero(NULL, struct spirv_compiler); + ctx->spirv->log = ctx->log, + ctx->spirv->fns = compilers[i]; + + const char *name = m_opt_choice_str(compiler_choices, i); + strncpy(ctx->spirv->name, name, sizeof(ctx->spirv->name)); + MP_VERBOSE(ctx, "Initializing SPIR-V compiler '%s'\n", name); + if (ctx->spirv->fns->init(ctx)) + return true; + talloc_free(ctx->spirv); + ctx->spirv = NULL; + } + + MP_ERR(ctx, "Failed initializing SPIR-V compiler!\n"); + return false; +} diff --git a/video/out/gpu/spirv.h b/video/out/gpu/spirv.h new file mode 100644 index 0000000000..e3dbd4f52a --- /dev/null +++ b/video/out/gpu/spirv.h @@ -0,0 +1,41 @@ +#pragma once + +#include "common/msg.h" +#include "common/common.h" +#include "context.h" + +enum glsl_shader { + GLSL_SHADER_VERTEX, + GLSL_SHADER_FRAGMENT, + GLSL_SHADER_COMPUTE, +}; + +#define SPIRV_NAME_MAX_LEN 32 + +struct spirv_compiler { + char name[SPIRV_NAME_MAX_LEN]; + const struct spirv_compiler_fns *fns; + struct mp_log *log; + void *priv; + + const char *required_ext; // or NULL + int glsl_version; // GLSL version supported + int compiler_version; // for cache invalidation, may be left as 0 + int ra_caps; // RA_CAP_* provided by this implementation, if any +}; + +struct spirv_compiler_fns { + // Compile GLSL to SPIR-V, under GL_KHR_vulkan_glsl semantics. + bool (*compile_glsl)(struct spirv_compiler *spirv, void *tactx, + enum glsl_shader type, const char *glsl, + struct bstr *out_spirv); + + // Called by spirv_compiler_init / ra_ctx_destroy. These don't need to + // allocate/free ctx->spirv, that is done by the caller + bool (*init)(struct ra_ctx *ctx); + void (*uninit)(struct ra_ctx *ctx); // optional +}; + +// Initializes ctx->spirv to a valid SPIR-V compiler, or returns false on +// failure. Cleanup will be handled by ra_ctx_destroy. +bool spirv_compiler_init(struct ra_ctx *ctx); diff --git a/video/out/gpu/spirv_shaderc.c b/video/out/gpu/spirv_shaderc.c new file mode 100644 index 0000000000..9b429ca2c2 --- /dev/null +++ b/video/out/gpu/spirv_shaderc.c @@ -0,0 +1,123 @@ +#include "common/msg.h" + +#include "context.h" +#include "spirv.h" + +#include + +struct priv { + shaderc_compiler_t compiler; + shaderc_compile_options_t opts; +}; + +static void shaderc_uninit(struct ra_ctx *ctx) +{ + struct priv *p = ctx->spirv->priv; + if (!p) + return; + + shaderc_compile_options_release(p->opts); + shaderc_compiler_release(p->compiler); +} + +static bool shaderc_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->spirv->priv = talloc_zero(ctx->spirv, struct priv); + + p->compiler = shaderc_compiler_initialize(); + if (!p->compiler) + goto error; + p->opts = shaderc_compile_options_initialize(); + if (!p->opts) + goto error; + + shaderc_compile_options_set_optimization_level(p->opts, + shaderc_optimization_level_size); + + int ver, rev; + shaderc_get_spv_version(&ver, &rev); + ctx->spirv->compiler_version = ver * 100 + rev; // forwards compatibility + ctx->spirv->glsl_version = 450; // impossible to query? + return true; + +error: + shaderc_uninit(ctx); + return false; +} + +static shaderc_compilation_result_t compile(struct priv *p, + enum glsl_shader type, + const char *glsl, bool debug) +{ + static const shaderc_shader_kind kinds[] = { + [GLSL_SHADER_VERTEX] = shaderc_glsl_vertex_shader, + [GLSL_SHADER_FRAGMENT] = shaderc_glsl_fragment_shader, + [GLSL_SHADER_COMPUTE] = shaderc_glsl_compute_shader, + }; + + if (debug) { + return shaderc_compile_into_spv_assembly(p->compiler, glsl, strlen(glsl), + kinds[type], "input", "main", p->opts); + } else { + return shaderc_compile_into_spv(p->compiler, glsl, strlen(glsl), + kinds[type], "input", "main", p->opts); + } +} + +static bool shaderc_compile(struct spirv_compiler *spirv, void *tactx, + enum glsl_shader type, const char *glsl, + struct bstr *out_spirv) +{ + struct priv *p = spirv->priv; + + shaderc_compilation_result_t res = compile(p, type, glsl, false); + int errs = shaderc_result_get_num_errors(res), + warn = shaderc_result_get_num_warnings(res), + msgl = errs ? MSGL_ERR : warn ? MSGL_WARN : MSGL_V; + + const char *msg = shaderc_result_get_error_message(res); + if (msg[0]) + MP_MSG(spirv, msgl, "shaderc output:\n%s", msg); + + int s = shaderc_result_get_compilation_status(res); + bool success = s == shaderc_compilation_status_success; + + static const char *results[] = { + [shaderc_compilation_status_success] = "success", + [shaderc_compilation_status_invalid_stage] = "invalid stage", + [shaderc_compilation_status_compilation_error] = "error", + [shaderc_compilation_status_internal_error] = "internal error", + [shaderc_compilation_status_null_result_object] = "no result", + [shaderc_compilation_status_invalid_assembly] = "invalid assembly", + }; + + const char *status = s < MP_ARRAY_SIZE(results) ? results[s] : "unknown"; + MP_MSG(spirv, msgl, "shaderc compile status '%s' (%d errors, %d warnings)\n", + status, errs, warn); + + if (success) { + void *bytes = (void *) shaderc_result_get_bytes(res); + out_spirv->len = shaderc_result_get_length(res); + out_spirv->start = talloc_memdup(tactx, bytes, out_spirv->len); + } + + // Also print SPIR-V disassembly for debugging purposes. Unfortunately + // there doesn't seem to be a way to get this except compiling the shader + // a second time.. + if (mp_msg_test(spirv->log, MSGL_TRACE)) { + shaderc_compilation_result_t dis = compile(p, type, glsl, true); + MP_TRACE(spirv, "Generated SPIR-V:\n%.*s", + (int)shaderc_result_get_length(dis), + shaderc_result_get_bytes(dis)); + shaderc_result_release(dis); + } + + shaderc_result_release(res); + return success; +} + +const struct spirv_compiler_fns spirv_shaderc = { + .compile_glsl = shaderc_compile, + .init = shaderc_init, + .uninit = shaderc_uninit, +}; diff --git a/video/out/vulkan/common.h b/video/out/vulkan/common.h index 4c0e783f0e..d0d14e28c2 100644 --- a/video/out/vulkan/common.h +++ b/video/out/vulkan/common.h @@ -45,6 +45,7 @@ struct mpvk_ctx { struct vk_malloc *alloc; // memory allocator for this device struct vk_cmdpool *pool; // primary command pool for this device struct vk_cmd *last_cmd; // most recently submitted command + struct spirv_compiler *spirv; // GLSL -> SPIR-V compiler // Cached capabilities VkPhysicalDeviceLimits limits; diff --git a/video/out/vulkan/context.c b/video/out/vulkan/context.c index bd456d214c..d2445fbda7 100644 --- a/video/out/vulkan/context.c +++ b/video/out/vulkan/context.c @@ -16,6 +16,8 @@ */ #include "options/m_config.h" +#include "video/out/gpu/spirv.h" + #include "context.h" #include "ra_vk.h" #include "utils.h" @@ -125,6 +127,17 @@ struct priv { int last_imgidx; // the image index last acquired (for submit) }; +static const struct ra_swapchain_fns vulkan_swapchain; + +struct mpvk_ctx *ra_vk_ctx_get(struct ra_ctx *ctx) +{ + if (ctx->swapchain->fns != &vulkan_swapchain) + return NULL; + + struct priv *p = ctx->swapchain->priv; + return p->vk; +} + static bool update_swapchain_info(struct priv *p, VkSwapchainCreateInfoKHR *info) { @@ -265,6 +278,9 @@ bool ra_vk_ctx_init(struct ra_ctx *ctx, struct mpvk_ctx *vk, if (!mpvk_find_phys_device(vk, p->opts->device, ctx->opts.allow_sw)) goto error; + if (!spirv_compiler_init(ctx)) + goto error; + vk->spirv = ctx->spirv; if (!mpvk_pick_surface_format(vk)) goto error; if (!mpvk_device_init(vk, p->opts->dev_opts)) diff --git a/video/out/vulkan/context.h b/video/out/vulkan/context.h index 3f630bc10e..a64d39f125 100644 --- a/video/out/vulkan/context.h +++ b/video/out/vulkan/context.h @@ -8,3 +8,6 @@ void ra_vk_ctx_uninit(struct ra_ctx *ctx); bool ra_vk_ctx_init(struct ra_ctx *ctx, struct mpvk_ctx *vk, VkPresentModeKHR preferred_mode); bool ra_vk_ctx_resize(struct ra_swapchain *sw, int w, int h); + +// May be called on a ra_ctx of any type. +struct mpvk_ctx *ra_vk_ctx_get(struct ra_ctx *ctx); diff --git a/video/out/vulkan/ra_vk.c b/video/out/vulkan/ra_vk.c index ce0cbc66e9..897b2e1ff1 100644 --- a/video/out/vulkan/ra_vk.c +++ b/video/out/vulkan/ra_vk.c @@ -1,6 +1,8 @@ +#include "video/out/gpu/utils.h" +#include "video/out/gpu/spirv.h" + #include "ra_vk.h" #include "malloc.h" -#include "video/out/opengl/utils.h" static struct ra_fns ra_fns_vk; @@ -185,13 +187,10 @@ struct ra *ra_create_vk(struct mpvk_ctx *vk, struct mp_log *log) struct ra_vk *p = ra->priv = talloc_zero(ra, struct ra_vk); p->vk = vk; - // There's no way to query the supported GLSL version from VK_NV_glsl_shader - // (thanks nvidia), so just pick the GL version that modern nvidia devices - // support.. - ra->glsl_version = 450; + ra->caps |= vk->spirv->ra_caps; + ra->glsl_version = vk->spirv->glsl_version; ra->glsl_vulkan = true; ra->max_shmem = vk->limits.maxComputeSharedMemorySize; - ra->caps = RA_CAP_NESTED_ARRAY; if (vk->pool->props.queueFlags & VK_QUEUE_COMPUTE_BIT) ra->caps |= RA_CAP_COMPUTE; @@ -821,14 +820,9 @@ error: // For ra_renderpass.priv struct ra_renderpass_vk { - // Compiled shaders - VkShaderModule vert; - VkShaderModule frag; - VkShaderModule comp; // Pipeline / render pass VkPipeline pipe; VkPipelineLayout pipeLayout; - VkPipelineCache pipeCache; VkRenderPass renderPass; // Descriptor set (bindings) VkDescriptorSetLayout dsLayout; @@ -854,14 +848,10 @@ static void vk_renderpass_destroy(struct ra *ra, struct ra_renderpass *pass) ra_buf_pool_uninit(ra, &pass_vk->vbo); vkDestroyPipeline(vk->dev, pass_vk->pipe, MPVK_ALLOCATOR); - vkDestroyPipelineCache(vk->dev, pass_vk->pipeCache, MPVK_ALLOCATOR); vkDestroyRenderPass(vk->dev, pass_vk->renderPass, MPVK_ALLOCATOR); vkDestroyPipelineLayout(vk->dev, pass_vk->pipeLayout, MPVK_ALLOCATOR); vkDestroyDescriptorPool(vk->dev, pass_vk->dsPool, MPVK_ALLOCATOR); vkDestroyDescriptorSetLayout(vk->dev, pass_vk->dsLayout, MPVK_ALLOCATOR); - vkDestroyShaderModule(vk->dev, pass_vk->vert, MPVK_ALLOCATOR); - vkDestroyShaderModule(vk->dev, pass_vk->frag, MPVK_ALLOCATOR); - vkDestroyShaderModule(vk->dev, pass_vk->comp, MPVK_ALLOCATOR); talloc_free(pass); } @@ -909,6 +899,82 @@ static bool vk_get_input_format(struct ra *ra, struct ra_renderpass_input *inp, return false; } +static const char vk_cache_magic[4] = {'R','A','V','K'}; +static const int vk_cache_version = 2; + +struct vk_cache_header { + char magic[sizeof(vk_cache_magic)]; + int cache_version; + char compiler[SPIRV_NAME_MAX_LEN]; + int compiler_version; + size_t vert_spirv_len; + size_t frag_spirv_len; + size_t comp_spirv_len; + size_t pipecache_len; +}; + +static bool vk_use_cached_program(const struct ra_renderpass_params *params, + const struct spirv_compiler *spirv, + struct bstr *vert_spirv, + struct bstr *frag_spirv, + struct bstr *comp_spirv, + struct bstr *pipecache) +{ + struct bstr cache = params->cached_program; + if (cache.len < sizeof(struct vk_cache_header)) + return false; + + struct vk_cache_header *header = (struct vk_cache_header *)cache.start; + cache = bstr_cut(cache, sizeof(*header)); + + if (strncmp(header->magic, vk_cache_magic, sizeof(vk_cache_magic)) != 0) + return false; + if (header->cache_version != vk_cache_version) + return false; + if (strncmp(header->compiler, spirv->name, sizeof(header->compiler)) != 0) + return false; + if (header->compiler_version != spirv->compiler_version) + return false; + +#define GET(ptr) \ + if (cache.len < header->ptr##_len) \ + return false; \ + *ptr = bstr_splice(cache, 0, header->ptr##_len); \ + cache = bstr_cut(cache, ptr->len); + + GET(vert_spirv); + GET(frag_spirv); + GET(comp_spirv); + GET(pipecache); + return true; +} + +static VkResult vk_compile_glsl(struct ra *ra, void *tactx, + enum glsl_shader type, const char *glsl, + struct bstr *spirv) +{ + struct mpvk_ctx *vk = ra_vk_get(ra); + VkResult ret = VK_SUCCESS; + int msgl = MSGL_DEBUG; + + if (!vk->spirv->fns->compile_glsl(vk->spirv, tactx, type, glsl, spirv)) { + ret = VK_ERROR_INVALID_SHADER_NV; + msgl = MSGL_ERR; + } + + static const char *shader_names[] = { + [GLSL_SHADER_VERTEX] = "vertex", + [GLSL_SHADER_FRAGMENT] = "fragment", + [GLSL_SHADER_COMPUTE] = "compute", + }; + + if (mp_msg_test(ra->log, msgl)) { + MP_MSG(ra, msgl, "%s shader source:\n", shader_names[type]); + mp_log_source(ra->log, msgl, glsl); + } + return ret; +} + static const VkPipelineStageFlagBits stageFlags[] = { [RA_RENDERPASS_TYPE_RASTER] = VK_SHADER_STAGE_FRAGMENT_BIT, [RA_RENDERPASS_TYPE_COMPUTE] = VK_SHADER_STAGE_COMPUTE_BIT, @@ -918,6 +984,8 @@ static struct ra_renderpass *vk_renderpass_create(struct ra *ra, const struct ra_renderpass_params *params) { struct mpvk_ctx *vk = ra_vk_get(ra); + bool success = false; + assert(vk->spirv); struct ra_renderpass *pass = talloc_zero(NULL, struct ra_renderpass); pass->params = *ra_renderpass_params_copy(pass, params); @@ -925,6 +993,13 @@ static struct ra_renderpass *vk_renderpass_create(struct ra *ra, struct ra_renderpass_vk *pass_vk = pass->priv = talloc_zero(pass, struct ra_renderpass_vk); + // temporary allocations/objects + void *tmp = talloc_new(NULL); + VkPipelineCache pipeCache = NULL; + VkShaderModule vert_shader = NULL; + VkShaderModule frag_shader = NULL; + VkShaderModule comp_shader = NULL; + static int dsCount[RA_VARTYPE_COUNT] = {0}; VkDescriptorSetLayoutBinding *bindings = NULL; int num_bindings = 0; @@ -943,7 +1018,7 @@ static struct ra_renderpass *vk_renderpass_create(struct ra *ra, .stageFlags = stageFlags[params->type], }; - MP_TARRAY_APPEND(pass, bindings, num_bindings, desc); + MP_TARRAY_APPEND(tmp, bindings, num_bindings, desc); dsCount[inp->type]++; break; } @@ -953,6 +1028,7 @@ static struct ra_renderpass *vk_renderpass_create(struct ra *ra, VkDescriptorPoolSize *dsPoolSizes = NULL; int poolSizeCount = 0; + for (enum ra_vartype t = 0; t < RA_VARTYPE_COUNT; t++) { if (dsCount[t] > 0) { VkDescriptorPoolSize dssize = { @@ -960,7 +1036,7 @@ static struct ra_renderpass *vk_renderpass_create(struct ra *ra, .descriptorCount = dsCount[t] * MPVK_NUM_DS, }; - MP_TARRAY_APPEND(pass, dsPoolSizes, poolSizeCount, dssize); + MP_TARRAY_APPEND(tmp, dsPoolSizes, poolSizeCount, dssize); } } @@ -972,7 +1048,6 @@ static struct ra_renderpass *vk_renderpass_create(struct ra *ra, }; VK(vkCreateDescriptorPool(vk->dev, &pinfo, MPVK_ALLOCATOR, &pass_vk->dsPool)); - talloc_free(dsPoolSizes); pass_vk->dswrite = talloc_array(pass, VkWriteDescriptorSet, num_bindings); pass_vk->dsiinfo = talloc_array(pass, VkDescriptorImageInfo, num_bindings); @@ -1009,13 +1084,35 @@ static struct ra_renderpass *vk_renderpass_create(struct ra *ra, VK(vkCreatePipelineLayout(vk->dev, &linfo, MPVK_ALLOCATOR, &pass_vk->pipeLayout)); + struct bstr vert = {0}, frag = {0}, comp = {0}, pipecache = {0}; + if (vk_use_cached_program(params, vk->spirv, &vert, &frag, &comp, &pipecache)) { + MP_VERBOSE(ra, "Using cached SPIR-V and VkPipeline.\n"); + } else { + pipecache.len = 0; + switch (params->type) { + case RA_RENDERPASS_TYPE_RASTER: + VK(vk_compile_glsl(ra, tmp, GLSL_SHADER_VERTEX, + params->vertex_shader, &vert)); + VK(vk_compile_glsl(ra, tmp, GLSL_SHADER_FRAGMENT, + params->frag_shader, &frag)); + comp.len = 0; + break; + case RA_RENDERPASS_TYPE_COMPUTE: + VK(vk_compile_glsl(ra, tmp, GLSL_SHADER_COMPUTE, + params->compute_shader, &comp)); + frag.len = 0; + vert.len = 0; + break; + } + } + VkPipelineCacheCreateInfo pcinfo = { .sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, - .pInitialData = params->cached_program.start, - .initialDataSize = params->cached_program.len, + .pInitialData = pipecache.start, + .initialDataSize = pipecache.len, }; - VK(vkCreatePipelineCache(vk->dev, &pcinfo, MPVK_ALLOCATOR, &pass_vk->pipeCache)); + VK(vkCreatePipelineCache(vk->dev, &pcinfo, MPVK_ALLOCATOR, &pipeCache)); VkShaderModuleCreateInfo sinfo = { .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, @@ -1023,33 +1120,15 @@ static struct ra_renderpass *vk_renderpass_create(struct ra *ra, switch (params->type) { case RA_RENDERPASS_TYPE_RASTER: { - sinfo.pCode = (uint32_t *)params->vertex_shader; - sinfo.codeSize = strlen(params->vertex_shader); - VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &pass_vk->vert)); + sinfo.pCode = (uint32_t *)vert.start; + sinfo.codeSize = vert.len; + VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &vert_shader)); - sinfo.pCode = (uint32_t *)params->frag_shader; - sinfo.codeSize = strlen(params->frag_shader); - VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &pass_vk->frag)); + sinfo.pCode = (uint32_t *)frag.start; + sinfo.codeSize = frag.len; + VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &frag_shader)); - VK(vk_create_render_pass(vk->dev, params->target_format, - params->enable_blend, &pass_vk->renderPass)); - - VkPipelineShaderStageCreateInfo stages[] = { - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_VERTEX_BIT, - .module = pass_vk->vert, - .pName = "main", - }, - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = pass_vk->frag, - .pName = "main", - } - }; - - VkVertexInputAttributeDescription *attrs = talloc_array(pass, + VkVertexInputAttributeDescription *attrs = talloc_array(tmp, VkVertexInputAttributeDescription, params->num_vertex_attribs); for (int i = 0; i < params->num_vertex_attribs; i++) { @@ -1066,6 +1145,8 @@ static struct ra_renderpass *vk_renderpass_create(struct ra *ra, goto error; } } + VK(vk_create_render_pass(vk->dev, params->target_format, + params->enable_blend, &pass_vk->renderPass)); static const VkBlendFactor blendFactors[] = { [RA_BLEND_ZERO] = VK_BLEND_FACTOR_ZERO, @@ -1074,24 +1155,22 @@ static struct ra_renderpass *vk_renderpass_create(struct ra *ra, [RA_BLEND_ONE_MINUS_SRC_ALPHA] = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, }; - VkPipelineColorBlendAttachmentState binfo = { - .blendEnable = params->enable_blend, - .colorBlendOp = VK_BLEND_OP_ADD, - .srcColorBlendFactor = blendFactors[params->blend_src_rgb], - .dstColorBlendFactor = blendFactors[params->blend_dst_rgb], - .alphaBlendOp = VK_BLEND_OP_ADD, - .srcAlphaBlendFactor = blendFactors[params->blend_src_alpha], - .dstAlphaBlendFactor = blendFactors[params->blend_dst_alpha], - .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | - VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT | - VK_COLOR_COMPONENT_A_BIT, - }; - VkGraphicsPipelineCreateInfo cinfo = { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .stageCount = MP_ARRAY_SIZE(stages), - .pStages = &stages[0], + .stageCount = 2, + .pStages = (VkPipelineShaderStageCreateInfo[]) { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = vert_shader, + .pName = "main", + }, { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = frag_shader, + .pName = "main", + } + }, .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, .vertexBindingDescriptionCount = 1, @@ -1125,7 +1204,19 @@ static struct ra_renderpass *vk_renderpass_create(struct ra *ra, .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, .attachmentCount = 1, - .pAttachments = &binfo, + .pAttachments = &(VkPipelineColorBlendAttachmentState) { + .blendEnable = params->enable_blend, + .colorBlendOp = VK_BLEND_OP_ADD, + .srcColorBlendFactor = blendFactors[params->blend_src_rgb], + .dstColorBlendFactor = blendFactors[params->blend_dst_rgb], + .alphaBlendOp = VK_BLEND_OP_ADD, + .srcAlphaBlendFactor = blendFactors[params->blend_src_alpha], + .dstAlphaBlendFactor = blendFactors[params->blend_dst_alpha], + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | + VK_COLOR_COMPONENT_A_BIT, + }, }, .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, @@ -1139,43 +1230,73 @@ static struct ra_renderpass *vk_renderpass_create(struct ra *ra, .renderPass = pass_vk->renderPass, }; - VK(vkCreateGraphicsPipelines(vk->dev, pass_vk->pipeCache, 1, &cinfo, + VK(vkCreateGraphicsPipelines(vk->dev, pipeCache, 1, &cinfo, MPVK_ALLOCATOR, &pass_vk->pipe)); break; } case RA_RENDERPASS_TYPE_COMPUTE: { - sinfo.pCode = (uint32_t *)params->compute_shader; - sinfo.codeSize = strlen(params->compute_shader); - VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &pass_vk->comp)); + sinfo.pCode = (uint32_t *)comp.start; + sinfo.codeSize = comp.len; + VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &comp_shader)); VkComputePipelineCreateInfo cinfo = { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .stage = { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = pass_vk->comp, + .module = comp_shader, .pName = "main", }, .layout = pass_vk->pipeLayout, }; - VK(vkCreateComputePipelines(vk->dev, pass_vk->pipeCache, 1, &cinfo, + VK(vkCreateComputePipelines(vk->dev, pipeCache, 1, &cinfo, MPVK_ALLOCATOR, &pass_vk->pipe)); break; } } - // Update cached program - bstr *prog = &pass->params.cached_program; - VK(vkGetPipelineCacheData(vk->dev, pass_vk->pipeCache, &prog->len, NULL)); - prog->start = talloc_size(pass, prog->len); - VK(vkGetPipelineCacheData(vk->dev, pass_vk->pipeCache, &prog->len, prog->start)); + // Update params->cached_program + struct bstr cache = {0}; + VK(vkGetPipelineCacheData(vk->dev, pipeCache, &cache.len, NULL)); + cache.start = talloc_size(tmp, cache.len); + VK(vkGetPipelineCacheData(vk->dev, pipeCache, &cache.len, cache.start)); + + struct vk_cache_header header = { + .cache_version = vk_cache_version, + .compiler_version = vk->spirv->compiler_version, + .vert_spirv_len = vert.len, + .frag_spirv_len = frag.len, + .comp_spirv_len = comp.len, + .pipecache_len = cache.len, + }; - return pass; + for (int i = 0; i < MP_ARRAY_SIZE(header.magic); i++) + header.magic[i] = vk_cache_magic[i]; + for (int i = 0; i < sizeof(vk->spirv->name); i++) + header.compiler[i] = vk->spirv->name[i]; + + struct bstr *prog = &pass->params.cached_program; + bstr_xappend(pass, prog, (struct bstr){ (char *) &header, sizeof(header) }); + bstr_xappend(pass, prog, vert); + bstr_xappend(pass, prog, frag); + bstr_xappend(pass, prog, comp); + bstr_xappend(pass, prog, cache); + + success = true; error: - vk_renderpass_destroy(ra, pass); - return NULL; + if (!success) { + vk_renderpass_destroy(ra, pass); + pass = NULL; + } + + vkDestroyShaderModule(vk->dev, vert_shader, MPVK_ALLOCATOR); + vkDestroyShaderModule(vk->dev, frag_shader, MPVK_ALLOCATOR); + vkDestroyShaderModule(vk->dev, comp_shader, MPVK_ALLOCATOR); + vkDestroyPipelineCache(vk->dev, pipeCache, MPVK_ALLOCATOR); + talloc_free(tmp); + return pass; } static void vk_update_descriptor(struct ra *ra, struct vk_cmd *cmd, diff --git a/video/out/vulkan/spirv_nvidia.c b/video/out/vulkan/spirv_nvidia.c new file mode 100644 index 0000000000..6cc43a5619 --- /dev/null +++ b/video/out/vulkan/spirv_nvidia.c @@ -0,0 +1,54 @@ +#include "video/out/gpu/spirv.h" + +#include "common.h" +#include "context.h" +#include "utils.h" + +static bool nv_glsl_compile(struct spirv_compiler *spirv, void *tactx, + enum glsl_shader type, const char *glsl, + struct bstr *out_spirv) +{ + // The nvidia extension literally assumes your SPIRV is in fact valid GLSL + *out_spirv = bstr0(glsl); + return true; +} + +static bool nv_glsl_init(struct ra_ctx *ctx) +{ + struct mpvk_ctx *vk = ra_vk_ctx_get(ctx); + if (!vk) + return false; + + struct spirv_compiler *spv = ctx->spirv; + spv->required_ext = VK_NV_GLSL_SHADER_EXTENSION_NAME; + spv->glsl_version = 450; // impossible to query, so hard-code it.. + spv->ra_caps = RA_CAP_NESTED_ARRAY; + + // Make sure the extension is actually available, and fail gracefully + // if it isn't + VkExtensionProperties *props = NULL; + uint32_t extnum = 0; + VK(vkEnumerateDeviceExtensionProperties(vk->physd, NULL, &extnum, NULL)); + props = talloc_array(NULL, VkExtensionProperties, extnum); + VK(vkEnumerateDeviceExtensionProperties(vk->physd, NULL, &extnum, props)); + + bool ret = true; + for (int e = 0; e < extnum; e++) { + if (strncmp(props[e].extensionName, spv->required_ext, + VK_MAX_EXTENSION_NAME_SIZE) == 0) + goto done; + } + +error: + MP_VERBOSE(ctx, "Device doesn't support VK_NV_glsl_shader, skipping..\n"); + ret = false; + +done: + talloc_free(props); + return ret; +} + +const struct spirv_compiler_fns spirv_nvidia_builtin = { + .compile_glsl = nv_glsl_compile, + .init = nv_glsl_init, +}; diff --git a/video/out/vulkan/utils.c b/video/out/vulkan/utils.c index 43e446bc36..659da9159a 100644 --- a/video/out/vulkan/utils.c +++ b/video/out/vulkan/utils.c @@ -1,5 +1,6 @@ #include +#include "video/out/gpu/spirv.h" #include "utils.h" #include "malloc.h" @@ -445,13 +446,12 @@ error: bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts) { assert(vk->physd); - - VkQueueFamilyProperties *qfs = NULL; - int qfnum; + void *tmp = talloc_new(NULL); // Enumerate the queue families and find suitable families for each task + int qfnum; vkGetPhysicalDeviceQueueFamilyProperties(vk->physd, &qfnum, NULL); - qfs = talloc_array(NULL, VkQueueFamilyProperties, qfnum); + VkQueueFamilyProperties *qfs = talloc_array(tmp, VkQueueFamilyProperties, qfnum); vkGetPhysicalDeviceQueueFamilyProperties(vk->physd, &qfnum, qfs); MP_VERBOSE(vk, "Queue families supported by device:\n"); @@ -503,20 +503,24 @@ bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts) .pQueuePriorities = priorities, }; - static const char *exts[] = { - VK_KHR_SWAPCHAIN_EXTENSION_NAME, - VK_NV_GLSL_SHADER_EXTENSION_NAME, - }; + const char **exts = NULL; + int num_exts = 0; + MP_TARRAY_APPEND(tmp, exts, num_exts, VK_KHR_SWAPCHAIN_EXTENSION_NAME); + if (vk->spirv->required_ext) + MP_TARRAY_APPEND(tmp, exts, num_exts, vk->spirv->required_ext); VkDeviceCreateInfo dinfo = { .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, .queueCreateInfoCount = 1, .pQueueCreateInfos = &qinfo, .ppEnabledExtensionNames = exts, - .enabledExtensionCount = MP_ARRAY_SIZE(exts), + .enabledExtensionCount = num_exts, }; - MP_VERBOSE(vk, "Creating vulkan device...\n"); + MP_VERBOSE(vk, "Creating vulkan device with extensions:\n"); + for (int i = 0; i < num_exts; i++) + MP_VERBOSE(vk, " %s\n", exts[i]); + VK(vkCreateDevice(vk->physd, &dinfo, MPVK_ALLOCATOR, &vk->dev)); vk_malloc_init(vk); @@ -525,12 +529,12 @@ bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts) if (!vk_cmdpool_init(vk, qinfo, qfs[idx], &vk->pool)) goto error; - talloc_free(qfs); + talloc_free(tmp); return true; error: MP_ERR(vk, "Failed creating logical device!\n"); - talloc_free(qfs); + talloc_free(tmp); return false; } diff --git a/wscript b/wscript index 964b7878c7..694ead0736 100644 --- a/wscript +++ b/wscript @@ -806,6 +806,10 @@ video_output_features = [ 'name': '--vulkan', 'desc': 'Vulkan context support', 'func': check_cc(header_name='vulkan/vulkan.h', lib='vulkan'), + }, { + 'name': '--shaderc', + 'desc': 'libshaderc SPIR-V compiler', + 'func': check_cc(header_name='shaderc/shaderc.h', lib='shaderc_shared'), }, { 'name': 'egl-helpers', 'desc': 'EGL helper functions', diff --git a/wscript_build.py b/wscript_build.py index 86b51daaa2..9fd3280836 100644 --- a/wscript_build.py +++ b/wscript_build.py @@ -390,6 +390,8 @@ def build(ctx): ( "video/out/gpu/lcms.c" ), ( "video/out/gpu/osd.c" ), ( "video/out/gpu/ra.c" ), + ( "video/out/gpu/spirv.c" ), + ( "video/out/gpu/spirv_shaderc.c", "shaderc" ), ( "video/out/gpu/shader_cache.c" ), ( "video/out/gpu/user_shaders.c" ), ( "video/out/gpu/utils.c" ), @@ -451,6 +453,7 @@ def build(ctx): ( "video/out/vulkan/ra_vk.c", "vulkan" ), ( "video/out/vulkan/context.c", "vulkan" ), ( "video/out/vulkan/context_xlib.c", "vulkan && x11" ), + ( "video/out/vulkan/spirv_nvidia.c", "vulkan" ), ( "video/out/win32/exclusive_hack.c", "gl-win32" ), ( "video/out/wayland_common.c", "wayland" ), ( "video/out/wayland/buffer.c", "wayland" ), -- cgit v1.2.3