From 258487370fd840b018a404225277d74f74899c59 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.xyz>
Date: Wed, 13 Sep 2017 03:09:48 +0200
Subject: vo_gpu: vulkan: generalize SPIR-V compiler

In addition to the built-in nvidia compiler, we now also support a
backend based on libshaderc. shaderc is sort of like glslang except it
has a C API and is available as a dynamic library.

The generated SPIR-V is now cached alongside the VkPipeline in the
cached_program. We use a special cache header to ensure validity of this
cache before passing it blindly to the vulkan implementation, since
passing invalid SPIR-V can cause all sorts of nasty things. It's also
designed to self-invalidate if the compiler gets better, by offering a
catch-all `int compiler_version` that implementations can use as a cache
invalidation marker.
---
 DOCS/man/options.rst            |  17 +++
 options/options.c               |   2 +
 options/options.h               |   1 +
 video/out/gpu/context.c         |  18 ++-
 video/out/gpu/context.h         |   1 +
 video/out/gpu/spirv.c           |  78 ++++++++++++
 video/out/gpu/spirv.h           |  41 ++++++
 video/out/gpu/spirv_shaderc.c   | 123 ++++++++++++++++++
 video/out/vulkan/common.h       |   1 +
 video/out/vulkan/context.c      |  16 +++
 video/out/vulkan/context.h      |   3 +
 video/out/vulkan/ra_vk.c        | 275 +++++++++++++++++++++++++++++-----------
 video/out/vulkan/spirv_nvidia.c |  54 ++++++++
 video/out/vulkan/utils.c        |  28 ++--
 wscript                         |   4 +
 wscript_build.py                |   3 +
 16 files changed, 571 insertions(+), 94 deletions(-)
 create mode 100644 video/out/gpu/spirv.c
 create mode 100644 video/out/gpu/spirv.h
 create mode 100644 video/out/gpu/spirv_shaderc.c
 create mode 100644 video/out/vulkan/spirv_nvidia.c
diff --git a/DOCS/man/options.rst b/DOCS/man/options.rst
index 80e7350292..078981be24 100644
--- a/DOCS/man/options.rst
+++ b/DOCS/man/options.rst
@@ -4205,6 +4205,23 @@ The following video options are currently all specific to ``--vo=gpu`` and
     as mpv's vulkan implementation currently does not try and protect textures
     against concurrent access.
 
+``--spirv-compiler=<compiler>``
+    Controls which compiler is used to translate GLSL to SPIR-V. This is
+    (currently) only relevant for ``--gpu-api=vulkan``. The possible choices
+    are:
+
+    auto
+        Use the first available compiler. (Default)
+    shaderc
+        Use libshaderc, which is an API wrapper around glslang. This is
+        generally the most preferred, if available.
+    nvidia
+        Use nvidia's built-in compiler. Only works for nvidia GPUs. Can be
+        buggy, but also supports some features glslang does not. Only works
+        with vulkan. WARNING: Pretty buggy, handles push constants incorrectly
+        (this causes graphical corruption with e.g. ``--temporal-dither``)! Use
+        only for testing.
+
 ``--glsl-shaders=<file-list>``
     Custom GLSL hooks. These are a flexible way to add custom fragment shaders,
     which can be injected at almost arbitrary points in the rendering pipeline,
diff --git a/options/options.c b/options/options.c
index 6467468691..b96ed5ef02 100644
--- a/options/options.c
+++ b/options/options.c
@@ -90,6 +90,7 @@ extern const struct m_obj_list ao_obj_list;
 
 extern const struct m_sub_options opengl_conf;
 extern const struct m_sub_options vulkan_conf;
+extern const struct m_sub_options spirv_conf;
 extern const struct m_sub_options angle_conf;
 extern const struct m_sub_options cocoa_conf;
 
@@ -686,6 +687,7 @@ const m_option_t mp_opts[] = {
     OPT_SUBSTRUCT("", demux_opts, demux_conf, 0),
 
     OPT_SUBSTRUCT("", gl_video_opts, gl_video_conf, 0),
+    OPT_SUBSTRUCT("", spirv_opts, spirv_conf, 0),
 
 #if HAVE_GL
     OPT_SUBSTRUCT("", opengl_opts, opengl_conf, 0),
diff --git a/options/options.h b/options/options.h
index 63dee03612..d2d0ea3cf9 100644
--- a/options/options.h
+++ b/options/options.h
@@ -330,6 +330,7 @@ typedef struct MPOpts {
     struct angle_opts *angle_opts;
     struct opengl_opts *opengl_opts;
     struct vulkan_opts *vulkan_opts;
+    struct spirv_opts *spirv_opts;
     struct cocoa_opts *cocoa_opts;
     struct dvd_opts *dvd_opts;
 
diff --git a/video/out/gpu/context.c b/video/out/gpu/context.c
index 25e2a754bf..69f322c422 100644
--- a/video/out/gpu/context.c
+++ b/video/out/gpu/context.c
@@ -31,6 +31,7 @@
 #include "video/out/vo.h"
 
 #include "context.h"
+#include "spirv.h"
 
 extern const struct ra_ctx_fns ra_ctx_glx;
 extern const struct ra_ctx_fns ra_ctx_glx_probe;
@@ -185,10 +186,17 @@ struct ra_ctx *ra_ctx_create(struct vo *vo, const char *context_type,
     return NULL;
 }
 
-void ra_ctx_destroy(struct ra_ctx **ctx)
+void ra_ctx_destroy(struct ra_ctx **ctx_ptr)
 {
-    if (*ctx)
-        (*ctx)->fns->uninit(*ctx);
-    talloc_free(*ctx);
-    *ctx = NULL;
+    struct ra_ctx *ctx = *ctx_ptr;
+    if (!ctx)
+        return;
+
+    if (ctx->spirv && ctx->spirv->fns->uninit)
+        ctx->spirv->fns->uninit(ctx);
+
+    ctx->fns->uninit(ctx);
+    talloc_free(ctx);
+
+    *ctx_ptr = NULL;
 }
diff --git a/video/out/gpu/context.h b/video/out/gpu/context.h
index f74087592d..78c0441cdf 100644
--- a/video/out/gpu/context.h
+++ b/video/out/gpu/context.h
@@ -22,6 +22,7 @@ struct ra_ctx {
     struct ra_ctx_opts opts;
     const struct ra_ctx_fns *fns;
     struct ra_swapchain *swapchain;
+    struct spirv_compiler *spirv;
 
     void *priv;
 };
diff --git a/video/out/gpu/spirv.c b/video/out/gpu/spirv.c
new file mode 100644
index 0000000000..9375fd9508
--- /dev/null
+++ b/video/out/gpu/spirv.c
@@ -0,0 +1,78 @@
+#include "common/msg.h"
+#include "options/m_config.h"
+
+#include "spirv.h"
+#include "config.h"
+
+extern const struct spirv_compiler_fns spirv_shaderc;
+extern const struct spirv_compiler_fns spirv_nvidia_builtin;
+
+// in probe-order
+enum {
+    SPIRV_AUTO = 0,
+    SPIRV_SHADERC, // generally preferred, but not packaged everywhere
+    SPIRV_NVIDIA,  // can be useful for testing, only available on nvidia
+};
+
+static const struct spirv_compiler_fns *compilers[] = {
+#if HAVE_SHADERC
+    [SPIRV_SHADERC] = &spirv_shaderc,
+#endif
+#if HAVE_VULKAN
+    [SPIRV_NVIDIA]  = &spirv_nvidia_builtin,
+#endif
+};
+
+static const struct m_opt_choice_alternatives compiler_choices[] = {
+    {"auto",        SPIRV_AUTO},
+#if HAVE_SHADERC
+    {"shaderc",     SPIRV_SHADERC},
+#endif
+#if HAVE_VULKAN
+    {"nvidia",      SPIRV_NVIDIA},
+#endif
+    {0}
+};
+
+struct spirv_opts {
+    int compiler;
+};
+
+#define OPT_BASE_STRUCT struct spirv_opts
+const struct m_sub_options spirv_conf = {
+    .opts = (const struct m_option[]) {
+        OPT_CHOICE_C("spirv-compiler", compiler, 0, compiler_choices),
+        {0}
+    },
+    .size = sizeof(struct spirv_opts),
+};
+
+bool spirv_compiler_init(struct ra_ctx *ctx)
+{
+    void *tmp = talloc_new(NULL);
+    struct spirv_opts *opts = mp_get_config_group(tmp, ctx->global, &spirv_conf);
+    int compiler = opts->compiler;
+    talloc_free(tmp);
+
+    for (int i = SPIRV_AUTO+1; i < MP_ARRAY_SIZE(compilers); i++) {
+        if (compiler != SPIRV_AUTO && i != compiler)
+            continue;
+        if (!compilers[i])
+            continue;
+
+        ctx->spirv = talloc_zero(NULL, struct spirv_compiler);
+        ctx->spirv->log = ctx->log,
+        ctx->spirv->fns = compilers[i];
+
+        const char *name = m_opt_choice_str(compiler_choices, i);
+        strncpy(ctx->spirv->name, name, sizeof(ctx->spirv->name));
+        MP_VERBOSE(ctx, "Initializing SPIR-V compiler '%s'\n", name);
+        if (ctx->spirv->fns->init(ctx))
+            return true;
+        talloc_free(ctx->spirv);
+        ctx->spirv = NULL;
+    }
+
+    MP_ERR(ctx, "Failed initializing SPIR-V compiler!\n");
+    return false;
+}
diff --git a/video/out/gpu/spirv.h b/video/out/gpu/spirv.h
new file mode 100644
index 0000000000..e3dbd4f52a
--- /dev/null
+++ b/video/out/gpu/spirv.h
@@ -0,0 +1,41 @@
+#pragma once
+
+#include "common/msg.h"
+#include "common/common.h"
+#include "context.h"
+
+enum glsl_shader {
+    GLSL_SHADER_VERTEX,
+    GLSL_SHADER_FRAGMENT,
+    GLSL_SHADER_COMPUTE,
+};
+
+#define SPIRV_NAME_MAX_LEN 32
+
+struct spirv_compiler {
+    char name[SPIRV_NAME_MAX_LEN];
+    const struct spirv_compiler_fns *fns;
+    struct mp_log *log;
+    void *priv;
+
+    const char *required_ext; // or NULL
+    int glsl_version;         // GLSL version supported
+    int compiler_version;     // for cache invalidation, may be left as 0
+    int ra_caps;              // RA_CAP_* provided by this implementation, if any
+};
+
+struct spirv_compiler_fns {
+    // Compile GLSL to SPIR-V, under GL_KHR_vulkan_glsl semantics.
+    bool (*compile_glsl)(struct spirv_compiler *spirv, void *tactx,
+                         enum glsl_shader type, const char *glsl,
+                         struct bstr *out_spirv);
+
+    // Called by spirv_compiler_init / ra_ctx_destroy. These don't need to
+    // allocate/free ctx->spirv, that is done by the caller
+    bool (*init)(struct ra_ctx *ctx);
+    void (*uninit)(struct ra_ctx *ctx); // optional
+};
+
+// Initializes ctx->spirv to a valid SPIR-V compiler, or returns false on
+// failure. Cleanup will be handled by ra_ctx_destroy.
+bool spirv_compiler_init(struct ra_ctx *ctx);
diff --git a/video/out/gpu/spirv_shaderc.c b/video/out/gpu/spirv_shaderc.c
new file mode 100644
index 0000000000..9b429ca2c2
--- /dev/null
+++ b/video/out/gpu/spirv_shaderc.c
@@ -0,0 +1,123 @@
+#include "common/msg.h"
+
+#include "context.h"
+#include "spirv.h"
+
+#include <shaderc/shaderc.h>
+
+struct priv {
+    shaderc_compiler_t compiler;
+    shaderc_compile_options_t opts;
+};
+
+static void shaderc_uninit(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->spirv->priv;
+    if (!p)
+        return;
+
+    shaderc_compile_options_release(p->opts);
+    shaderc_compiler_release(p->compiler);
+}
+
+static bool shaderc_init(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->spirv->priv = talloc_zero(ctx->spirv, struct priv);
+
+    p->compiler = shaderc_compiler_initialize();
+    if (!p->compiler)
+        goto error;
+    p->opts = shaderc_compile_options_initialize();
+    if (!p->opts)
+        goto error;
+
+    shaderc_compile_options_set_optimization_level(p->opts,
+                                            shaderc_optimization_level_size);
+
+    int ver, rev;
+    shaderc_get_spv_version(&ver, &rev);
+    ctx->spirv->compiler_version = ver * 100 + rev; // forwards compatibility
+    ctx->spirv->glsl_version = 450; // impossible to query?
+    return true;
+
+error:
+    shaderc_uninit(ctx);
+    return false;
+}
+
+static shaderc_compilation_result_t compile(struct priv *p,
+                                            enum glsl_shader type,
+                                            const char *glsl, bool debug)
+{
+    static const shaderc_shader_kind kinds[] = {
+        [GLSL_SHADER_VERTEX]   = shaderc_glsl_vertex_shader,
+        [GLSL_SHADER_FRAGMENT] = shaderc_glsl_fragment_shader,
+        [GLSL_SHADER_COMPUTE]  = shaderc_glsl_compute_shader,
+    };
+
+    if (debug) {
+        return shaderc_compile_into_spv_assembly(p->compiler, glsl, strlen(glsl),
+                                        kinds[type], "input", "main", p->opts);
+    } else {
+        return shaderc_compile_into_spv(p->compiler, glsl, strlen(glsl),
+                                        kinds[type], "input", "main", p->opts);
+    }
+}
+
+static bool shaderc_compile(struct spirv_compiler *spirv, void *tactx,
+                            enum glsl_shader type, const char *glsl,
+                            struct bstr *out_spirv)
+{
+    struct priv *p = spirv->priv;
+
+    shaderc_compilation_result_t res = compile(p, type, glsl, false);
+    int errs = shaderc_result_get_num_errors(res),
+        warn = shaderc_result_get_num_warnings(res),
+        msgl = errs ? MSGL_ERR : warn ? MSGL_WARN : MSGL_V;
+
+    const char *msg = shaderc_result_get_error_message(res);
+    if (msg[0])
+        MP_MSG(spirv, msgl, "shaderc output:\n%s", msg);
+
+    int s = shaderc_result_get_compilation_status(res);
+    bool success = s == shaderc_compilation_status_success;
+
+    static const char *results[] = {
+        [shaderc_compilation_status_success]            = "success",
+        [shaderc_compilation_status_invalid_stage]      = "invalid stage",
+        [shaderc_compilation_status_compilation_error]  = "error",
+        [shaderc_compilation_status_internal_error]     = "internal error",
+        [shaderc_compilation_status_null_result_object] = "no result",
+        [shaderc_compilation_status_invalid_assembly]   = "invalid assembly",
+    };
+
+    const char *status = s < MP_ARRAY_SIZE(results) ? results[s] : "unknown";
+    MP_MSG(spirv, msgl, "shaderc compile status '%s' (%d errors, %d warnings)\n",
+           status, errs, warn);
+
+    if (success) {
+        void *bytes = (void *) shaderc_result_get_bytes(res);
+        out_spirv->len = shaderc_result_get_length(res);
+        out_spirv->start = talloc_memdup(tactx, bytes, out_spirv->len);
+    }
+
+    // Also print SPIR-V disassembly for debugging purposes. Unfortunately
+    // there doesn't seem to be a way to get this except compiling the shader
+    // a second time..
+    if (mp_msg_test(spirv->log, MSGL_TRACE)) {
+        shaderc_compilation_result_t dis = compile(p, type, glsl, true);
+        MP_TRACE(spirv, "Generated SPIR-V:\n%.*s",
+                 (int)shaderc_result_get_length(dis),
+                 shaderc_result_get_bytes(dis));
+        shaderc_result_release(dis);
+    }
+
+    shaderc_result_release(res);
+    return success;
+}
+
+const struct spirv_compiler_fns spirv_shaderc = {
+    .compile_glsl = shaderc_compile,
+    .init = shaderc_init,
+    .uninit = shaderc_uninit,
+};
diff --git a/video/out/vulkan/common.h b/video/out/vulkan/common.h
index 4c0e783f0e..d0d14e28c2 100644
--- a/video/out/vulkan/common.h
+++ b/video/out/vulkan/common.h
@@ -45,6 +45,7 @@ struct mpvk_ctx {
     struct vk_malloc *alloc; // memory allocator for this device
     struct vk_cmdpool *pool; // primary command pool for this device
     struct vk_cmd *last_cmd; // most recently submitted command
+    struct spirv_compiler *spirv; // GLSL -> SPIR-V compiler
 
     // Cached capabilities
     VkPhysicalDeviceLimits limits;
diff --git a/video/out/vulkan/context.c b/video/out/vulkan/context.c
index bd456d214c..d2445fbda7 100644
--- a/video/out/vulkan/context.c
+++ b/video/out/vulkan/context.c
@@ -16,6 +16,8 @@
  */
 
 #include "options/m_config.h"
+#include "video/out/gpu/spirv.h"
+
 #include "context.h"
 #include "ra_vk.h"
 #include "utils.h"
@@ -125,6 +127,17 @@ struct priv {
     int last_imgidx;          // the image index last acquired (for submit)
 };
 
+static const struct ra_swapchain_fns vulkan_swapchain;
+
+struct mpvk_ctx *ra_vk_ctx_get(struct ra_ctx *ctx)
+{
+    if (ctx->swapchain->fns != &vulkan_swapchain)
+        return NULL;
+
+    struct priv *p = ctx->swapchain->priv;
+    return p->vk;
+}
+
 static bool update_swapchain_info(struct priv *p,
                                   VkSwapchainCreateInfoKHR *info)
 {
@@ -265,6 +278,9 @@ bool ra_vk_ctx_init(struct ra_ctx *ctx, struct mpvk_ctx *vk,
 
     if (!mpvk_find_phys_device(vk, p->opts->device, ctx->opts.allow_sw))
         goto error;
+    if (!spirv_compiler_init(ctx))
+        goto error;
+    vk->spirv = ctx->spirv;
     if (!mpvk_pick_surface_format(vk))
         goto error;
     if (!mpvk_device_init(vk, p->opts->dev_opts))
diff --git a/video/out/vulkan/context.h b/video/out/vulkan/context.h
index 3f630bc10e..a64d39f125 100644
--- a/video/out/vulkan/context.h
+++ b/video/out/vulkan/context.h
@@ -8,3 +8,6 @@ void ra_vk_ctx_uninit(struct ra_ctx *ctx);
 bool ra_vk_ctx_init(struct ra_ctx *ctx, struct mpvk_ctx *vk,
                     VkPresentModeKHR preferred_mode);
 bool ra_vk_ctx_resize(struct ra_swapchain *sw, int w, int h);
+
+// May be called on a ra_ctx of any type.
+struct mpvk_ctx *ra_vk_ctx_get(struct ra_ctx *ctx);
diff --git a/video/out/vulkan/ra_vk.c b/video/out/vulkan/ra_vk.c
index ce0cbc66e9..897b2e1ff1 100644
--- a/video/out/vulkan/ra_vk.c
+++ b/video/out/vulkan/ra_vk.c
@@ -1,6 +1,8 @@
+#include "video/out/gpu/utils.h"
+#include "video/out/gpu/spirv.h"
+
 #include "ra_vk.h"
 #include "malloc.h"
-#include "video/out/opengl/utils.h"
 
 static struct ra_fns ra_fns_vk;
 
@@ -185,13 +187,10 @@ struct ra *ra_create_vk(struct mpvk_ctx *vk, struct mp_log *log)
     struct ra_vk *p = ra->priv = talloc_zero(ra, struct ra_vk);
     p->vk = vk;
 
-    // There's no way to query the supported GLSL version from VK_NV_glsl_shader
-    // (thanks nvidia), so just pick the GL version that modern nvidia devices
-    // support..
-    ra->glsl_version = 450;
+    ra->caps |= vk->spirv->ra_caps;
+    ra->glsl_version = vk->spirv->glsl_version;
     ra->glsl_vulkan = true;
     ra->max_shmem = vk->limits.maxComputeSharedMemorySize;
-    ra->caps = RA_CAP_NESTED_ARRAY;
 
     if (vk->pool->props.queueFlags & VK_QUEUE_COMPUTE_BIT)
         ra->caps |= RA_CAP_COMPUTE;
@@ -821,14 +820,9 @@ error:
 
 // For ra_renderpass.priv
 struct ra_renderpass_vk {
-    // Compiled shaders
-    VkShaderModule vert;
-    VkShaderModule frag;
-    VkShaderModule comp;
     // Pipeline / render pass
     VkPipeline pipe;
     VkPipelineLayout pipeLayout;
-    VkPipelineCache pipeCache;
     VkRenderPass renderPass;
     // Descriptor set (bindings)
     VkDescriptorSetLayout dsLayout;
@@ -854,14 +848,10 @@ static void vk_renderpass_destroy(struct ra *ra, struct ra_renderpass *pass)
 
     ra_buf_pool_uninit(ra, &pass_vk->vbo);
     vkDestroyPipeline(vk->dev, pass_vk->pipe, MPVK_ALLOCATOR);
-    vkDestroyPipelineCache(vk->dev, pass_vk->pipeCache, MPVK_ALLOCATOR);
     vkDestroyRenderPass(vk->dev, pass_vk->renderPass, MPVK_ALLOCATOR);
     vkDestroyPipelineLayout(vk->dev, pass_vk->pipeLayout, MPVK_ALLOCATOR);
     vkDestroyDescriptorPool(vk->dev, pass_vk->dsPool, MPVK_ALLOCATOR);
     vkDestroyDescriptorSetLayout(vk->dev, pass_vk->dsLayout, MPVK_ALLOCATOR);
-    vkDestroyShaderModule(vk->dev, pass_vk->vert, MPVK_ALLOCATOR);
-    vkDestroyShaderModule(vk->dev, pass_vk->frag, MPVK_ALLOCATOR);
-    vkDestroyShaderModule(vk->dev, pass_vk->comp, MPVK_ALLOCATOR);
 
     talloc_free(pass);
 }
@@ -909,6 +899,82 @@ static bool vk_get_input_format(struct ra *ra, struct ra_renderpass_input *inp,
     return false;
 }
 
+static const char vk_cache_magic[4] = {'R','A','V','K'};
+static const int vk_cache_version = 2;
+
+struct vk_cache_header {
+    char magic[sizeof(vk_cache_magic)];
+    int cache_version;
+    char compiler[SPIRV_NAME_MAX_LEN];
+    int compiler_version;
+    size_t vert_spirv_len;
+    size_t frag_spirv_len;
+    size_t comp_spirv_len;
+    size_t pipecache_len;
+};
+
+static bool vk_use_cached_program(const struct ra_renderpass_params *params,
+                                  const struct spirv_compiler *spirv,
+                                  struct bstr *vert_spirv,
+                                  struct bstr *frag_spirv,
+                                  struct bstr *comp_spirv,
+                                  struct bstr *pipecache)
+{
+    struct bstr cache = params->cached_program;
+    if (cache.len < sizeof(struct vk_cache_header))
+        return false;
+
+    struct vk_cache_header *header = (struct vk_cache_header *)cache.start;
+    cache = bstr_cut(cache, sizeof(*header));
+
+    if (strncmp(header->magic, vk_cache_magic, sizeof(vk_cache_magic)) != 0)
+        return false;
+    if (header->cache_version != vk_cache_version)
+        return false;
+    if (strncmp(header->compiler, spirv->name, sizeof(header->compiler)) != 0)
+        return false;
+    if (header->compiler_version != spirv->compiler_version)
+        return false;
+
+#define GET(ptr) \
+    if (cache.len < header->ptr##_len)                      \
+            return false;                                   \
+        *ptr = bstr_splice(cache, 0, header->ptr##_len);    \
+        cache = bstr_cut(cache, ptr->len);
+
+    GET(vert_spirv);
+    GET(frag_spirv);
+    GET(comp_spirv);
+    GET(pipecache);
+    return true;
+}
+
+static VkResult vk_compile_glsl(struct ra *ra, void *tactx,
+                                enum glsl_shader type, const char *glsl,
+                                struct bstr *spirv)
+{
+    struct mpvk_ctx *vk = ra_vk_get(ra);
+    VkResult ret = VK_SUCCESS;
+    int msgl = MSGL_DEBUG;
+
+    if (!vk->spirv->fns->compile_glsl(vk->spirv, tactx, type, glsl, spirv)) {
+        ret = VK_ERROR_INVALID_SHADER_NV;
+        msgl = MSGL_ERR;
+    }
+
+    static const char *shader_names[] = {
+        [GLSL_SHADER_VERTEX]   = "vertex",
+        [GLSL_SHADER_FRAGMENT] = "fragment",
+        [GLSL_SHADER_COMPUTE]  = "compute",
+    };
+
+    if (mp_msg_test(ra->log, msgl)) {
+        MP_MSG(ra, msgl, "%s shader source:\n", shader_names[type]);
+        mp_log_source(ra->log, msgl, glsl);
+    }
+    return ret;
+}
+
 static const VkPipelineStageFlagBits stageFlags[] = {
     [RA_RENDERPASS_TYPE_RASTER]  = VK_SHADER_STAGE_FRAGMENT_BIT,
     [RA_RENDERPASS_TYPE_COMPUTE] = VK_SHADER_STAGE_COMPUTE_BIT,
@@ -918,6 +984,8 @@ static struct ra_renderpass *vk_renderpass_create(struct ra *ra,
                                     const struct ra_renderpass_params *params)
 {
     struct mpvk_ctx *vk = ra_vk_get(ra);
+    bool success = false;
+    assert(vk->spirv);
 
     struct ra_renderpass *pass = talloc_zero(NULL, struct ra_renderpass);
     pass->params = *ra_renderpass_params_copy(pass, params);
@@ -925,6 +993,13 @@ static struct ra_renderpass *vk_renderpass_create(struct ra *ra,
     struct ra_renderpass_vk *pass_vk = pass->priv =
         talloc_zero(pass, struct ra_renderpass_vk);
 
+    // temporary allocations/objects
+    void *tmp = talloc_new(NULL);
+    VkPipelineCache pipeCache = NULL;
+    VkShaderModule vert_shader = NULL;
+    VkShaderModule frag_shader = NULL;
+    VkShaderModule comp_shader = NULL;
+
     static int dsCount[RA_VARTYPE_COUNT] = {0};
     VkDescriptorSetLayoutBinding *bindings = NULL;
     int num_bindings = 0;
@@ -943,7 +1018,7 @@ static struct ra_renderpass *vk_renderpass_create(struct ra *ra,
                 .stageFlags = stageFlags[params->type],
             };
 
-            MP_TARRAY_APPEND(pass, bindings, num_bindings, desc);
+            MP_TARRAY_APPEND(tmp, bindings, num_bindings, desc);
             dsCount[inp->type]++;
             break;
         }
@@ -953,6 +1028,7 @@ static struct ra_renderpass *vk_renderpass_create(struct ra *ra,
 
     VkDescriptorPoolSize *dsPoolSizes = NULL;
     int poolSizeCount = 0;
+
     for (enum ra_vartype t = 0; t < RA_VARTYPE_COUNT; t++) {
         if (dsCount[t] > 0) {
             VkDescriptorPoolSize dssize = {
@@ -960,7 +1036,7 @@ static struct ra_renderpass *vk_renderpass_create(struct ra *ra,
                 .descriptorCount = dsCount[t] * MPVK_NUM_DS,
             };
 
-            MP_TARRAY_APPEND(pass, dsPoolSizes, poolSizeCount, dssize);
+            MP_TARRAY_APPEND(tmp, dsPoolSizes, poolSizeCount, dssize);
         }
     }
 
@@ -972,7 +1048,6 @@ static struct ra_renderpass *vk_renderpass_create(struct ra *ra,
     };
 
     VK(vkCreateDescriptorPool(vk->dev, &pinfo, MPVK_ALLOCATOR, &pass_vk->dsPool));
-    talloc_free(dsPoolSizes);
 
     pass_vk->dswrite = talloc_array(pass, VkWriteDescriptorSet, num_bindings);
     pass_vk->dsiinfo = talloc_array(pass, VkDescriptorImageInfo, num_bindings);
@@ -1009,13 +1084,35 @@ static struct ra_renderpass *vk_renderpass_create(struct ra *ra,
     VK(vkCreatePipelineLayout(vk->dev, &linfo, MPVK_ALLOCATOR,
                               &pass_vk->pipeLayout));
 
+    struct bstr vert = {0}, frag = {0}, comp = {0}, pipecache = {0};
+    if (vk_use_cached_program(params, vk->spirv, &vert, &frag, &comp, &pipecache)) {
+        MP_VERBOSE(ra, "Using cached SPIR-V and VkPipeline.\n");
+    } else {
+        pipecache.len = 0;
+        switch (params->type) {
+        case RA_RENDERPASS_TYPE_RASTER:
+            VK(vk_compile_glsl(ra, tmp, GLSL_SHADER_VERTEX,
+                               params->vertex_shader, &vert));
+            VK(vk_compile_glsl(ra, tmp, GLSL_SHADER_FRAGMENT,
+                               params->frag_shader, &frag));
+            comp.len = 0;
+            break;
+        case RA_RENDERPASS_TYPE_COMPUTE:
+            VK(vk_compile_glsl(ra, tmp, GLSL_SHADER_COMPUTE,
+                               params->compute_shader, &comp));
+            frag.len = 0;
+            vert.len = 0;
+            break;
+        }
+    }
+
     VkPipelineCacheCreateInfo pcinfo = {
         .sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO,
-        .pInitialData = params->cached_program.start,
-        .initialDataSize = params->cached_program.len,
+        .pInitialData = pipecache.start,
+        .initialDataSize = pipecache.len,
     };
 
-    VK(vkCreatePipelineCache(vk->dev, &pcinfo, MPVK_ALLOCATOR, &pass_vk->pipeCache));
+    VK(vkCreatePipelineCache(vk->dev, &pcinfo, MPVK_ALLOCATOR, &pipeCache));
 
     VkShaderModuleCreateInfo sinfo = {
         .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
@@ -1023,33 +1120,15 @@ static struct ra_renderpass *vk_renderpass_create(struct ra *ra,
 
     switch (params->type) {
     case RA_RENDERPASS_TYPE_RASTER: {
-        sinfo.pCode = (uint32_t *)params->vertex_shader;
-        sinfo.codeSize = strlen(params->vertex_shader);
-        VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &pass_vk->vert));
+        sinfo.pCode = (uint32_t *)vert.start;
+        sinfo.codeSize = vert.len;
+        VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &vert_shader));
 
-        sinfo.pCode = (uint32_t *)params->frag_shader;
-        sinfo.codeSize = strlen(params->frag_shader);
-        VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &pass_vk->frag));
+        sinfo.pCode = (uint32_t *)frag.start;
+        sinfo.codeSize = frag.len;
+        VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &frag_shader));
 
-        VK(vk_create_render_pass(vk->dev, params->target_format,
-                                 params->enable_blend, &pass_vk->renderPass));
-
-        VkPipelineShaderStageCreateInfo stages[] = {
-            {
-                .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-                .stage = VK_SHADER_STAGE_VERTEX_BIT,
-                .module = pass_vk->vert,
-                .pName = "main",
-            },
-            {
-                .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-                .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
-                .module = pass_vk->frag,
-                .pName = "main",
-            }
-        };
-
-        VkVertexInputAttributeDescription *attrs = talloc_array(pass,
+        VkVertexInputAttributeDescription *attrs = talloc_array(tmp,
                 VkVertexInputAttributeDescription, params->num_vertex_attribs);
 
         for (int i = 0; i < params->num_vertex_attribs; i++) {
@@ -1066,6 +1145,8 @@ static struct ra_renderpass *vk_renderpass_create(struct ra *ra,
                 goto error;
             }
         }
+        VK(vk_create_render_pass(vk->dev, params->target_format,
+                                 params->enable_blend, &pass_vk->renderPass));
 
         static const VkBlendFactor blendFactors[] = {
             [RA_BLEND_ZERO]                = VK_BLEND_FACTOR_ZERO,
@@ -1074,24 +1155,22 @@ static struct ra_renderpass *vk_renderpass_create(struct ra *ra,
             [RA_BLEND_ONE_MINUS_SRC_ALPHA] = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
         };
 
-        VkPipelineColorBlendAttachmentState binfo = {
-            .blendEnable = params->enable_blend,
-            .colorBlendOp = VK_BLEND_OP_ADD,
-            .srcColorBlendFactor = blendFactors[params->blend_src_rgb],
-            .dstColorBlendFactor = blendFactors[params->blend_dst_rgb],
-            .alphaBlendOp = VK_BLEND_OP_ADD,
-            .srcAlphaBlendFactor = blendFactors[params->blend_src_alpha],
-            .dstAlphaBlendFactor = blendFactors[params->blend_dst_alpha],
-            .colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
-                              VK_COLOR_COMPONENT_G_BIT |
-                              VK_COLOR_COMPONENT_B_BIT |
-                              VK_COLOR_COMPONENT_A_BIT,
-        };
-
         VkGraphicsPipelineCreateInfo cinfo = {
             .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
-            .stageCount = MP_ARRAY_SIZE(stages),
-            .pStages = &stages[0],
+            .stageCount = 2,
+            .pStages = (VkPipelineShaderStageCreateInfo[]) {
+                {
+                    .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+                    .stage = VK_SHADER_STAGE_VERTEX_BIT,
+                    .module = vert_shader,
+                    .pName = "main",
+                }, {
+                    .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+                    .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+                    .module = frag_shader,
+                    .pName = "main",
+                }
+            },
             .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) {
                 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
                 .vertexBindingDescriptionCount = 1,
@@ -1125,7 +1204,19 @@ static struct ra_renderpass *vk_renderpass_create(struct ra *ra,
             .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
                 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
                 .attachmentCount = 1,
-                .pAttachments = &binfo,
+                .pAttachments = &(VkPipelineColorBlendAttachmentState) {
+                    .blendEnable = params->enable_blend,
+                    .colorBlendOp = VK_BLEND_OP_ADD,
+                    .srcColorBlendFactor = blendFactors[params->blend_src_rgb],
+                    .dstColorBlendFactor = blendFactors[params->blend_dst_rgb],
+                    .alphaBlendOp = VK_BLEND_OP_ADD,
+                    .srcAlphaBlendFactor = blendFactors[params->blend_src_alpha],
+                    .dstAlphaBlendFactor = blendFactors[params->blend_dst_alpha],
+                    .colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
+                                      VK_COLOR_COMPONENT_G_BIT |
+                                      VK_COLOR_COMPONENT_B_BIT |
+                                      VK_COLOR_COMPONENT_A_BIT,
+                },
             },
             .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
                 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
@@ -1139,43 +1230,73 @@ static struct ra_renderpass *vk_renderpass_create(struct ra *ra,
             .renderPass = pass_vk->renderPass,
         };
 
-        VK(vkCreateGraphicsPipelines(vk->dev, pass_vk->pipeCache, 1, &cinfo,
+        VK(vkCreateGraphicsPipelines(vk->dev, pipeCache, 1, &cinfo,
                                      MPVK_ALLOCATOR, &pass_vk->pipe));
         break;
     }
     case RA_RENDERPASS_TYPE_COMPUTE: {
-        sinfo.pCode = (uint32_t *)params->compute_shader;
-        sinfo.codeSize = strlen(params->compute_shader);
-        VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &pass_vk->comp));
+        sinfo.pCode = (uint32_t *)comp.start;
+        sinfo.codeSize = comp.len;
+        VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &comp_shader));
 
         VkComputePipelineCreateInfo cinfo = {
             .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
             .stage = {
                 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
                 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
-                .module = pass_vk->comp,
+                .module = comp_shader,
                 .pName = "main",
             },
             .layout = pass_vk->pipeLayout,
         };
 
-        VK(vkCreateComputePipelines(vk->dev, pass_vk->pipeCache, 1, &cinfo,
+        VK(vkCreateComputePipelines(vk->dev, pipeCache, 1, &cinfo,
                                     MPVK_ALLOCATOR, &pass_vk->pipe));
         break;
     }
     }
 
-    // Update cached program
-    bstr *prog = &pass->params.cached_program;
-    VK(vkGetPipelineCacheData(vk->dev, pass_vk->pipeCache, &prog->len, NULL));
-    prog->start = talloc_size(pass, prog->len);
-    VK(vkGetPipelineCacheData(vk->dev, pass_vk->pipeCache, &prog->len, prog->start));
+    // Update params->cached_program
+    struct bstr cache = {0};
+    VK(vkGetPipelineCacheData(vk->dev, pipeCache, &cache.len, NULL));
+    cache.start = talloc_size(tmp, cache.len);
+    VK(vkGetPipelineCacheData(vk->dev, pipeCache, &cache.len, cache.start));
+
+    struct vk_cache_header header = {
+        .cache_version = vk_cache_version,
+        .compiler_version = vk->spirv->compiler_version,
+        .vert_spirv_len = vert.len,
+        .frag_spirv_len = frag.len,
+        .comp_spirv_len = comp.len,
+        .pipecache_len = cache.len,
+    };
 
-    return pass;
+    for (int i = 0; i < MP_ARRAY_SIZE(header.magic); i++)
+        header.magic[i] = vk_cache_magic[i];
+    for (int i = 0; i < sizeof(vk->spirv->name); i++)
+        header.compiler[i] = vk->spirv->name[i];
+
+    struct bstr *prog = &pass->params.cached_program;
+    bstr_xappend(pass, prog, (struct bstr){ (char *) &header, sizeof(header) });
+    bstr_xappend(pass, prog, vert);
+    bstr_xappend(pass, prog, frag);
+    bstr_xappend(pass, prog, comp);
+    bstr_xappend(pass, prog, cache);
+
+    success = true;
 
 error:
-    vk_renderpass_destroy(ra, pass);
-    return NULL;
+    if (!success) {
+        vk_renderpass_destroy(ra, pass);
+        pass = NULL;
+    }
+
+    vkDestroyShaderModule(vk->dev, vert_shader, MPVK_ALLOCATOR);
+    vkDestroyShaderModule(vk->dev, frag_shader, MPVK_ALLOCATOR);
+    vkDestroyShaderModule(vk->dev, comp_shader, MPVK_ALLOCATOR);
+    vkDestroyPipelineCache(vk->dev, pipeCache, MPVK_ALLOCATOR);
+    talloc_free(tmp);
+    return pass;
 }
 
 static void vk_update_descriptor(struct ra *ra, struct vk_cmd *cmd,
diff --git a/video/out/vulkan/spirv_nvidia.c b/video/out/vulkan/spirv_nvidia.c
new file mode 100644
index 0000000000..6cc43a5619
--- /dev/null
+++ b/video/out/vulkan/spirv_nvidia.c
@@ -0,0 +1,54 @@
+#include "video/out/gpu/spirv.h"
+
+#include "common.h"
+#include "context.h"
+#include "utils.h"
+
+static bool nv_glsl_compile(struct spirv_compiler *spirv, void *tactx,
+                            enum glsl_shader type, const char *glsl,
+                            struct bstr *out_spirv)
+{
+    // The nvidia extension literally assumes your SPIRV is in fact valid GLSL
+    *out_spirv = bstr0(glsl);
+    return true;
+}
+
+static bool nv_glsl_init(struct ra_ctx *ctx)
+{
+    struct mpvk_ctx *vk = ra_vk_ctx_get(ctx);
+    if (!vk)
+        return false;
+
+    struct spirv_compiler *spv = ctx->spirv;
+    spv->required_ext = VK_NV_GLSL_SHADER_EXTENSION_NAME;
+    spv->glsl_version = 450; // impossible to query, so hard-code it..
+    spv->ra_caps = RA_CAP_NESTED_ARRAY;
+
+    // Make sure the extension is actually available, and fail gracefully
+    // if it isn't
+    VkExtensionProperties *props = NULL;
+    uint32_t extnum = 0;
+    VK(vkEnumerateDeviceExtensionProperties(vk->physd, NULL, &extnum, NULL));
+    props = talloc_array(NULL, VkExtensionProperties, extnum);
+    VK(vkEnumerateDeviceExtensionProperties(vk->physd, NULL, &extnum, props));
+
+    bool ret = true;
+    for (int e = 0; e < extnum; e++) {
+        if (strncmp(props[e].extensionName, spv->required_ext,
+                    VK_MAX_EXTENSION_NAME_SIZE) == 0)
+            goto done;
+    }
+
+error:
+    MP_VERBOSE(ctx, "Device doesn't support VK_NV_glsl_shader, skipping..\n");
+    ret = false;
+
+done:
+    talloc_free(props);
+    return ret;
+}
+
+const struct spirv_compiler_fns spirv_nvidia_builtin = {
+    .compile_glsl = nv_glsl_compile,
+    .init = nv_glsl_init,
+};
diff --git a/video/out/vulkan/utils.c b/video/out/vulkan/utils.c
index 43e446bc36..659da9159a 100644
--- a/video/out/vulkan/utils.c
+++ b/video/out/vulkan/utils.c
@@ -1,5 +1,6 @@
 #include <libavutil/macros.h>
 
+#include "video/out/gpu/spirv.h"
 #include "utils.h"
 #include "malloc.h"
 
@@ -445,13 +446,12 @@ error:
 bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts)
 {
     assert(vk->physd);
-
-    VkQueueFamilyProperties *qfs = NULL;
-    int qfnum;
+    void *tmp = talloc_new(NULL);
 
     // Enumerate the queue families and find suitable families for each task
+    int qfnum;
     vkGetPhysicalDeviceQueueFamilyProperties(vk->physd, &qfnum, NULL);
-    qfs = talloc_array(NULL, VkQueueFamilyProperties, qfnum);
+    VkQueueFamilyProperties *qfs = talloc_array(tmp, VkQueueFamilyProperties, qfnum);
     vkGetPhysicalDeviceQueueFamilyProperties(vk->physd, &qfnum, qfs);
 
     MP_VERBOSE(vk, "Queue families supported by device:\n");
@@ -503,20 +503,24 @@ bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts)
         .pQueuePriorities = priorities,
     };
 
-    static const char *exts[] = {
-        VK_KHR_SWAPCHAIN_EXTENSION_NAME,
-        VK_NV_GLSL_SHADER_EXTENSION_NAME,
-    };
+    const char **exts = NULL;
+    int num_exts = 0;
+    MP_TARRAY_APPEND(tmp, exts, num_exts, VK_KHR_SWAPCHAIN_EXTENSION_NAME);
+    if (vk->spirv->required_ext)
+        MP_TARRAY_APPEND(tmp, exts, num_exts, vk->spirv->required_ext);
 
     VkDeviceCreateInfo dinfo = {
         .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
         .queueCreateInfoCount = 1,
         .pQueueCreateInfos = &qinfo,
         .ppEnabledExtensionNames = exts,
-        .enabledExtensionCount = MP_ARRAY_SIZE(exts),
+        .enabledExtensionCount = num_exts,
     };
 
-    MP_VERBOSE(vk, "Creating vulkan device...\n");
+    MP_VERBOSE(vk, "Creating vulkan device with extensions:\n");
+    for (int i = 0; i < num_exts; i++)
+        MP_VERBOSE(vk, "    %s\n", exts[i]);
+
     VK(vkCreateDevice(vk->physd, &dinfo, MPVK_ALLOCATOR, &vk->dev));
 
     vk_malloc_init(vk);
@@ -525,12 +529,12 @@ bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts)
     if (!vk_cmdpool_init(vk, qinfo, qfs[idx], &vk->pool))
         goto error;
 
-    talloc_free(qfs);
+    talloc_free(tmp);
     return true;
 
 error:
     MP_ERR(vk, "Failed creating logical device!\n");
-    talloc_free(qfs);
+    talloc_free(tmp);
     return false;
 }
 
diff --git a/wscript b/wscript
index 964b7878c7..694ead0736 100644
--- a/wscript
+++ b/wscript
@@ -806,6 +806,10 @@ video_output_features = [
         'name': '--vulkan',
         'desc':  'Vulkan context support',
         'func': check_cc(header_name='vulkan/vulkan.h', lib='vulkan'),
+    }, {
+        'name': '--shaderc',
+        'desc': 'libshaderc SPIR-V compiler',
+        'func': check_cc(header_name='shaderc/shaderc.h', lib='shaderc_shared'),
     }, {
         'name': 'egl-helpers',
         'desc': 'EGL helper functions',
diff --git a/wscript_build.py b/wscript_build.py
index 86b51daaa2..9fd3280836 100644
--- a/wscript_build.py
+++ b/wscript_build.py
@@ -390,6 +390,8 @@ def build(ctx):
         ( "video/out/gpu/lcms.c" ),
         ( "video/out/gpu/osd.c" ),
         ( "video/out/gpu/ra.c" ),
+        ( "video/out/gpu/spirv.c" ),
+        ( "video/out/gpu/spirv_shaderc.c",       "shaderc" ),
         ( "video/out/gpu/shader_cache.c" ),
         ( "video/out/gpu/user_shaders.c" ),
         ( "video/out/gpu/utils.c" ),
@@ -451,6 +453,7 @@ def build(ctx):
         ( "video/out/vulkan/ra_vk.c",            "vulkan" ),
         ( "video/out/vulkan/context.c",          "vulkan" ),
         ( "video/out/vulkan/context_xlib.c",     "vulkan && x11" ),
+        ( "video/out/vulkan/spirv_nvidia.c",     "vulkan" ),
         ( "video/out/win32/exclusive_hack.c",    "gl-win32" ),
         ( "video/out/wayland_common.c",          "wayland" ),
         ( "video/out/wayland/buffer.c",          "wayland" ),
-- 
cgit v1.2.3