From 333cae74ef0fa62e0355e85d21f0f41ced3963e7 Mon Sep 17 00:00:00 2001
From: wm4 <wm4@nowhere>
Date: Sat, 5 Aug 2017 14:20:14 +0200
Subject: vo_opengl: move shader handling to ra

Now all GL-specifics of shader compilation are abstracted through ra.
Of course we still have everything hardcoded to GLSL - that isn't going
to change.

Some things will probably change later - in particular, the way we pass
uniforms and textures to the shader. Currently, there is a confusing
mismatch between "primitive" uniforms like floats, and others like
textures.

Also, SSBOs are not abstracted yet.
---
 video/out/opengl/gl_utils.c     |  26 +-
 video/out/opengl/gl_utils.h     |  18 +-
 video/out/opengl/osd.c          |  39 +-
 video/out/opengl/osd.h          |   3 +-
 video/out/opengl/ra.c           |  45 +++
 video/out/opengl/ra.h           | 164 ++++++++
 video/out/opengl/ra_gl.c        | 382 ++++++++++++++++++-
 video/out/opengl/ra_gl.h        |  12 +
 video/out/opengl/shader_cache.c | 809 ++++++++++++++++------------------------
 video/out/opengl/shader_cache.h |  52 +--
 video/out/opengl/video.c        |  83 ++---
 11 files changed, 1016 insertions(+), 617 deletions(-)

(limited to 'video')

diff --git a/video/out/opengl/gl_utils.c b/video/out/opengl/gl_utils.c
index c870756b1e..df6f0543ad 100644
--- a/video/out/opengl/gl_utils.c
+++ b/video/out/opengl/gl_utils.c
@@ -150,17 +150,32 @@ static void gl_vao_enable_attribs(struct gl_vao *vao)
 {
     GL *gl = vao->gl;
 
-    for (int n = 0; vao->entries[n].name; n++) {
-        const struct gl_vao_entry *e = &vao->entries[n];
+    for (int n = 0; n < vao->num_entries; n++) {
+        const struct ra_renderpass_input *e = &vao->entries[n];
+        GLenum type = 0;
+        bool normalized = false;
+        switch (e->type) {
+        case RA_VARTYPE_FLOAT:
+            type = GL_FLOAT;
+            break;
+        case RA_VARTYPE_BYTE_UNORM:
+            type = GL_UNSIGNED_BYTE;
+            normalized = true;
+            break;
+        default:
+            abort();
+        }
+        assert(e->dim_m == 1);
 
         gl->EnableVertexAttribArray(n);
-        gl->VertexAttribPointer(n, e->num_elems, e->type, e->normalized,
-                                vao->stride, (void *)(intptr_t)e->offset);
+        gl->VertexAttribPointer(n, e->dim_v, type, normalized,
+                                vao->stride, (void *)(intptr_t)e->binding);
     }
 }
 
 void gl_vao_init(struct gl_vao *vao, GL *gl, int stride,
-                 const struct gl_vao_entry *entries)
+                 const struct ra_renderpass_input *entries,
+                 int num_entries)
 {
     assert(!vao->vao);
     assert(!vao->buffer);
@@ -169,6 +184,7 @@ void gl_vao_init(struct gl_vao *vao, GL *gl, int stride,
         .gl = gl,
         .stride = stride,
         .entries = entries,
+        .num_entries = num_entries,
     };
 
     gl->GenBuffers(1, &vao->buffer);
diff --git a/video/out/opengl/gl_utils.h b/video/out/opengl/gl_utils.h
index 5ae8d1590b..6192a6b312 100644
--- a/video/out/opengl/gl_utils.h
+++ b/video/out/opengl/gl_utils.h
@@ -34,32 +34,22 @@ void gl_upload_tex(GL *gl, GLenum target, GLenum format, GLenum type,
 
 mp_image_t *gl_read_fbo_contents(GL *gl, int fbo, int w, int h);
 
-const char* mp_sampler_type(GLenum texture_target);
-
 // print a multi line string with line numbers (e.g. for shader sources)
 // log, lev: module and log level, as in mp_msg()
 void mp_log_source(struct mp_log *log, int lev, const char *src);
 
-struct gl_vao_entry {
-    // used for shader / glBindAttribLocation
-    const char *name;
-    // glVertexAttribPointer() arguments
-    int num_elems;      // size (number of elements)
-    GLenum type;
-    bool normalized;
-    int offset;
-};
-
 struct gl_vao {
     GL *gl;
     GLuint vao;     // the VAO object, or 0 if unsupported by driver
     GLuint buffer;  // GL_ARRAY_BUFFER used for the data
     int stride;     // size of each element (interleaved elements are assumed)
-    const struct gl_vao_entry *entries;
+    const struct ra_renderpass_input *entries;
+    int num_entries;
 };
 
 void gl_vao_init(struct gl_vao *vao, GL *gl, int stride,
-                 const struct gl_vao_entry *entries);
+                 const struct ra_renderpass_input *entries,
+                 int num_entries);
 void gl_vao_uninit(struct gl_vao *vao);
 void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num);
 
diff --git a/video/out/opengl/osd.c b/video/out/opengl/osd.c
index 89820693ab..a656451c2e 100644
--- a/video/out/opengl/osd.c
+++ b/video/out/opengl/osd.c
@@ -22,17 +22,16 @@
 #include <libavutil/common.h>
 
 #include "formats.h"
-#include "ra_gl.h"
 #include "osd.h"
 
 #define GLSL(x) gl_sc_add(sc, #x "\n");
 
 // glBlendFuncSeparate() arguments
 static const int blend_factors[SUBBITMAP_COUNT][4] = {
-    [SUBBITMAP_LIBASS] = {GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA,
-                          GL_ONE,       GL_ONE_MINUS_SRC_ALPHA},
-    [SUBBITMAP_RGBA] =   {GL_ONE,       GL_ONE_MINUS_SRC_ALPHA,
-                          GL_ONE,       GL_ONE_MINUS_SRC_ALPHA},
+    [SUBBITMAP_LIBASS] = {RA_BLEND_SRC_ALPHA, RA_BLEND_ONE_MINUS_SRC_ALPHA,
+                          RA_BLEND_ONE,       RA_BLEND_ONE_MINUS_SRC_ALPHA},
+    [SUBBITMAP_RGBA] =   {RA_BLEND_ONE,       RA_BLEND_ONE_MINUS_SRC_ALPHA,
+                          RA_BLEND_ONE,       RA_BLEND_ONE_MINUS_SRC_ALPHA},
 };
 
 struct vertex {
@@ -41,10 +40,10 @@ struct vertex {
     uint8_t ass_color[4];
 };
 
-static const struct gl_vao_entry vertex_vao[] = {
-    {"position",    2, GL_FLOAT,         false, offsetof(struct vertex, position)},
-    {"texcoord" ,   2, GL_FLOAT,         false, offsetof(struct vertex, texcoord)},
-    {"ass_color",   4, GL_UNSIGNED_BYTE, true,  offsetof(struct vertex, ass_color)},
+static const struct ra_renderpass_input vertex_vao[] = {
+    {"position",  RA_VARTYPE_FLOAT,      2, 1, offsetof(struct vertex, position)},
+    {"texcoord" , RA_VARTYPE_FLOAT,      2, 1, offsetof(struct vertex, texcoord)},
+    {"ass_color", RA_VARTYPE_BYTE_UNORM, 4, 1, offsetof(struct vertex, ass_color)},
     {0}
 };
 
@@ -53,7 +52,6 @@ struct mpgl_osd_part {
     int change_id;
     struct ra_tex *texture;
     int w, h;
-    struct gl_pbo_upload pbo;
     int num_subparts;
     int prev_num_subparts;
     struct sub_bitmap *subparts;
@@ -65,7 +63,6 @@ struct mpgl_osd {
     struct mp_log *log;
     struct osd_state *osd;
     struct ra *ra;
-    GL *gl;
     struct mpgl_osd_part *parts[MAX_OSD_PARTS];
     const struct ra_format *fmt_table[SUBBITMAP_COUNT];
     bool formats[SUBBITMAP_COUNT];
@@ -79,14 +76,11 @@ struct mpgl_osd {
 struct mpgl_osd *mpgl_osd_init(struct ra *ra, struct mp_log *log,
                                struct osd_state *osd)
 {
-    struct ra_gl *ra_gl = ra->priv;
-
     struct mpgl_osd *ctx = talloc_ptrtype(NULL, ctx);
     *ctx = (struct mpgl_osd) {
         .log = log,
         .osd = osd,
         .ra = ra,
-        .gl = ra_gl->gl,
         .scratch = talloc_zero_size(ctx, 1),
     };
 
@@ -289,9 +283,8 @@ static void get_3d_side_by_side(int stereo_mode, int div[2])
 }
 
 void mpgl_osd_draw_finish(struct mpgl_osd *ctx, int vp_w, int vp_h, int index,
-                          struct gl_shader_cache *sc)
+                          struct gl_shader_cache *sc, struct ra_tex *target)
 {
-    GL *gl = ctx->gl;
     struct mpgl_osd_part *part = ctx->parts[index];
 
     int div[2];
@@ -313,20 +306,10 @@ void mpgl_osd_draw_finish(struct mpgl_osd *ctx, int vp_w, int vp_h, int index,
         }
     }
 
-    if (!part->num_vertices)
-        return;
-
-    gl->Enable(GL_BLEND);
-
     const int *factors = &blend_factors[part->format][0];
-    gl->BlendFuncSeparate(factors[0], factors[1], factors[2], factors[3]);
-
-    ctx->gl->Viewport(0, 0, vp_w, abs(vp_h));
-
-    gl_sc_draw_data(sc, GL_TRIANGLES, part->vertices, part->num_vertices);
+    gl_sc_blend(sc, factors[0], factors[1], factors[2], factors[3]);
 
-    gl->BindTexture(GL_TEXTURE_2D, 0);
-    gl->Disable(GL_BLEND);
+    gl_sc_dispatch_draw(sc, target, part->vertices, part->num_vertices);
 }
 
 static void set_res(struct mpgl_osd *ctx, struct mp_osd_res res, int stereo_mode)
diff --git a/video/out/opengl/osd.h b/video/out/opengl/osd.h
index a0aa104a48..08c143ac0e 100644
--- a/video/out/opengl/osd.h
+++ b/video/out/opengl/osd.h
@@ -5,7 +5,6 @@
 #include <inttypes.h>
 
 #include "utils.h"
-#include "gl_utils.h"
 #include "shader_cache.h"
 #include "sub/osd.h"
 
@@ -19,7 +18,7 @@ void mpgl_osd_resize(struct mpgl_osd *ctx, struct mp_osd_res res, int stereo_mod
 bool mpgl_osd_draw_prepare(struct mpgl_osd *ctx, int index,
                            struct gl_shader_cache *sc);
 void mpgl_osd_draw_finish(struct mpgl_osd *ctx, int vp_w, int vp_h, int index,
-                          struct gl_shader_cache *sc);
+                          struct gl_shader_cache *sc, struct ra_tex *target);
 int64_t mpgl_get_change_counter(struct mpgl_osd *ctx);
 
 #endif
diff --git a/video/out/opengl/ra.c b/video/out/opengl/ra.c
index 9096a50148..e67dd6ebdf 100644
--- a/video/out/opengl/ra.c
+++ b/video/out/opengl/ra.c
@@ -16,6 +16,51 @@ void ra_tex_free(struct ra *ra, struct ra_tex **tex)
     *tex = NULL;
 }
 
+static size_t vartype_size(enum ra_vartype type)
+{
+    switch (type) {
+    case RA_VARTYPE_INT:        return sizeof(int);
+    case RA_VARTYPE_FLOAT:      return sizeof(float);
+    case RA_VARTYPE_BYTE_UNORM: return 1;
+    default: return 0;
+    }
+}
+
+// Return the size of the data ra_renderpass_input_val.data is going to point
+// to. This returns 0 for non-primitive types such as textures.
+size_t ra_render_pass_input_data_size(struct ra_renderpass_input *input)
+{
+    size_t el_size = vartype_size(input->type);
+    return el_size * input->dim_v * input->dim_m;
+}
+
+static struct ra_renderpass_input *dup_inputs(void *ta_parent,
+            const struct ra_renderpass_input *inputs, int num_inputs)
+{
+    struct ra_renderpass_input *res =
+        talloc_memdup(ta_parent, (void *)inputs, num_inputs * sizeof(inputs[0]));
+    for (int n = 0; n < num_inputs; n++)
+        res[n].name = talloc_strdup(res, res[n].name);
+    return res;
+}
+
+// Return a newly allocated deep-copy of params.
+struct ra_renderpass_params *ra_render_pass_params_copy(void *ta_parent,
+        const struct ra_renderpass_params *params)
+{
+    struct ra_renderpass_params *res = talloc_ptrtype(ta_parent, res);
+    *res = *params;
+    res->inputs = dup_inputs(res, res->inputs, res->num_inputs);
+    res->vertex_attribs =
+        dup_inputs(res, res->vertex_attribs, res->num_vertex_attribs);
+    res->cached_program = bstrdup(res, res->cached_program);
+    res->vertex_shader = talloc_strdup(res, res->vertex_shader);
+    res->frag_shader = talloc_strdup(res, res->frag_shader);
+    res->compute_shader = talloc_strdup(res, res->compute_shader);
+    return res;
+};
+
+
 // Return whether this is a tightly packed format with no external padding and
 // with the same bit size/depth in all components.
 static bool ra_format_is_regular(const struct ra_format *fmt)
diff --git a/video/out/opengl/ra.h b/video/out/opengl/ra.h
index ab3027b78a..75ba0e5fe7 100644
--- a/video/out/opengl/ra.h
+++ b/video/out/opengl/ra.h
@@ -1,12 +1,16 @@
 #pragma once
 
 #include "common/common.h"
+#include "misc/bstr.h"
 
 // Handle for a rendering API backend.
 struct ra {
     struct ra_fns *fns;
     void *priv;
 
+    int glsl_version;       // GLSL version (e.g. 300 => 3.0)
+    bool glsl_es;           // use ES dialect
+
     struct mp_log *log;
 
     // RA_CAP_* bit field. The RA backend must set supported features at init
@@ -31,6 +35,9 @@ enum {
     RA_CAP_TEX_1D = 1 << 0,     // supports 1D textures (as shader source textures)
     RA_CAP_TEX_3D = 1 << 1,     // supports 3D textures (as shader source textures)
     RA_CAP_BLIT   = 1 << 2,     // supports ra_fns.blit
+    RA_CAP_COMPUTE = 1 << 3,    // supports compute shaders
+    RA_CAP_PBO    = 1 << 4,     // supports ra.use_pbo
+    RA_CAP_NESTED_ARRAY = 1 << 5,
 };
 
 enum ra_ctype {
@@ -81,6 +88,7 @@ struct ra_tex_params {
                             // if true, repeat texture coordinates
     bool non_normalized;    // hack for GL_TEXTURE_RECTANGLE OSX idiocy
                             // always set to false, except in OSX code
+    bool external_oes;      // hack for GL_TEXTURE_EXTERNAL_OES idiocy
     // If non-NULL, the texture will be created with these contents, and is
     // considered immutable afterwards (no upload, mapping, or rendering to it).
     void *initial_data;
@@ -108,6 +116,149 @@ struct ra_mapped_buffer {
     size_t size;            // total size of the mapping, starting at data
 };
 
+// Type of a shader uniform variable, or a vertex attribute. In all cases,
+// vectors are matrices are done by having more than 1 value.
+enum ra_vartype {
+    RA_VARTYPE_INVALID,
+    RA_VARTYPE_INT,             // C: int, GLSL: int, ivec*
+    RA_VARTYPE_FLOAT,           // C: float, GLSL: float, vec*, mat*
+    RA_VARTYPE_TEX,             // C: ra_tex*, GLSL: various sampler types
+                                // ra_tex.params.render_src must be true
+    RA_VARTYPE_IMG_W,           // C: ra_tex*, GLSL: various image types
+                                // write-only (W) image for compute shaders
+    RA_VARTYPE_BYTE_UNORM,      // C: uint8_t, GLSL: int, vec* (vertex data only)
+    RA_VARTYPE_SSBO,            // a hack for GL
+};
+
+// Represents a uniform, texture input parameter, and similar things.
+struct ra_renderpass_input {
+    const char *name;       // name as used in the shader
+    enum ra_vartype type;
+    // The total number of values is given by dim_v * dim_m.
+    int dim_v;              // vector dimension (1 for non-vector and non-matrix)
+    int dim_m;              // additional matrix dimension (dim_v x dim_m)
+    // Vertex data: byte offset of the attribute into the vertex struct
+    // RA_VARTYPE_TEX: texture unit
+    // RA_VARTYPE_IMG_W: image unit
+    // RA_VARTYPE_SSBO: whatever?
+    // Other uniforms: unused
+    int binding;
+};
+
+size_t ra_render_pass_input_data_size(struct ra_renderpass_input *input);
+
+enum ra_blend {
+    RA_BLEND_ZERO,
+    RA_BLEND_ONE,
+    RA_BLEND_SRC_ALPHA,
+    RA_BLEND_ONE_MINUS_SRC_ALPHA,
+};
+
+enum ra_renderpass_type {
+    RA_RENDERPASS_TYPE_INVALID,
+    RA_RENDERPASS_TYPE_RASTER,  // vertex+fragment shader
+    RA_RENDERPASS_TYPE_COMPUTE, // compute shader
+};
+
+// Static part of a rendering pass. It conflates the following:
+//  - compiled shader and its list of uniforms
+//  - vertex attributes and its shader mappings
+//  - blending parameters
+// (For Vulkan, this would be shader module + pipeline state.)
+// Upon creation, the values of dynamic values such as uniform contents (whose
+// initial values are not provided here) are required to be 0.
+struct ra_renderpass_params {
+    enum ra_renderpass_type type;
+
+    // Uniforms, including texture/sampler inputs.
+    struct ra_renderpass_input *inputs;
+    int num_inputs;
+
+    // Highly implementation-specific byte array storing a compiled version
+    // of the program. Can be used to speed up shader compilation. A backend
+    // xan read this in renderpass_create, or set this on the newly created
+    // ra_renderpass params field.
+    bstr cached_program;
+
+    // --- type==RA_RENDERPASS_TYPE_RASTER only
+
+    // Describes the format of the vertex data.
+    struct ra_renderpass_input *vertex_attribs;
+    int num_vertex_attribs;
+    int vertex_stride;
+
+    // Shader text, in GLSL. (Yes, you need a GLSL compiler.)
+    // These are complete shaders, including prelude and declarations.
+    const char *vertex_shader;
+    const char *frag_shader;
+
+    // Target blending mode. If enable_blend is false, the blend_ fields can
+    // be ignored.
+    bool enable_blend;
+    enum ra_blend blend_src_rgb;
+    enum ra_blend blend_dst_rgb;
+    enum ra_blend blend_src_alpha;
+    enum ra_blend blend_dst_alpha;
+
+    // --- type==RA_RENDERPASS_TYPE_COMPUTE only
+
+    // Shader text, like vertex_shader/frag_shader.
+    const char *compute_shader;
+};
+
+struct ra_renderpass_params *ra_render_pass_params_copy(void *ta_parent,
+        const struct ra_renderpass_params *params);
+
+// Conflates the following typical GPU API concepts:
+// - various kinds of shaders
+// - rendering pipelines
+// - descriptor sets, uniforms, other bindings
+// - all synchronization necessary
+// - the current values of all uniforms (this one makes it relatively stateful
+//   from an API perspective)
+struct ra_renderpass {
+    // All fields are read-only after creation.
+    struct ra_renderpass_params params;
+    void *priv;
+};
+
+// An input value (see ra_renderpass_input).
+struct ra_renderpass_input_val {
+    int index;  // index into ra_renderpass_params.inputs[]
+    void *data; // pointer to data according to ra_renderpass_input
+                // (e.g. type==RA_VARTYPE_FLOAT+dim_v=3,dim_m=3 => float[9])
+};
+
+// Parameters for performing a rendering pass (basically the dynamic params).
+// These change potentially every time.
+struct ra_renderpass_run_params {
+    struct ra_renderpass *pass;
+
+    // Generally this lists parameters only which changed since the last
+    // invocation and need to be updated. The ra_renderpass instance is
+    // supposed to keep unchanged values from the previous run.
+    // For non-primitive types like textures, these entries are always added,
+    // even if they do not change.
+    struct ra_renderpass_input_val *values;
+    int num_values;
+
+    // --- pass->params.type==RA_RENDERPASS_TYPE_RASTER only
+
+    // target->params.render_dst must be true.
+    struct ra_tex *target;
+    struct mp_rect viewport;
+    struct mp_rect scissors;
+
+    // (The primitive type is always a triangle list.)
+    void *vertex_data;
+    int vertex_count;   // number of vertex elements, not bytes
+
+    // --- pass->params.type==RA_RENDERPASS_TYPE_COMPUTE only
+
+    // Number of work groups to be run in X/Y/Z dimensions.
+    int compute_groups[3];
+};
+
 enum {
     // Flags for the texture_upload flags parameter.
     RA_TEX_UPLOAD_DISCARD = 1 << 0, // discard pre-existing data not in the region
@@ -183,6 +334,19 @@ struct ra_fns {
     // not be called, even if it's non-NULL).
     void (*blit)(struct ra *ra, struct ra_tex *dst, struct ra_tex *src,
                  int dst_x, int dst_y, struct mp_rect *src_rc);
+
+    // Compile a shader and create a pipeline. This is a rare operation.
+    // The params pointer and anything it points to must stay valid until
+    // renderpass_destroy.
+    struct ra_renderpass *(*renderpass_create)(struct ra *ra,
+                                    const struct ra_renderpass_params *params);
+
+    void (*renderpass_destroy)(struct ra *ra, struct ra_renderpass *pass);
+
+    // Perform a render pass, basically drawing a list of triangles to a FBO.
+    // This is an extremely common operation.
+    void (*renderpass_run)(struct ra *ra,
+                           const struct ra_renderpass_run_params *params);
 };
 
 struct ra_tex *ra_tex_create(struct ra *ra, const struct ra_tex_params *params);
diff --git a/video/out/opengl/ra_gl.c b/video/out/opengl/ra_gl.c
index e1ecd337e4..7d52063295 100644
--- a/video/out/opengl/ra_gl.c
+++ b/video/out/opengl/ra_gl.c
@@ -1,5 +1,6 @@
-#include "formats.h"
+#include <libavutil/intreadwrite.h>
 
+#include "formats.h"
 #include "ra_gl.h"
 
 static struct ra_fns ra_fns_gl;
@@ -22,6 +23,14 @@ int ra_init_gl(struct ra *ra, GL *gl)
         ra->caps |= RA_CAP_TEX_3D;
     if (gl->BlitFramebuffer)
         ra->caps |= RA_CAP_BLIT;
+    if (gl->mpgl_caps & MPGL_CAP_COMPUTE_SHADER)
+        ra->caps |= RA_CAP_COMPUTE;
+    if (gl->MapBufferRange)
+        ra->caps |= RA_CAP_PBO;
+    if (gl->mpgl_caps & MPGL_CAP_NESTED_ARRAY)
+        ra->caps |= RA_CAP_NESTED_ARRAY;
+    ra->glsl_version = gl->glsl_version;
+    ra->glsl_es = gl->es > 0;
 
     int gl_fmt_features = gl_format_feature_flags(gl);
 
@@ -271,6 +280,7 @@ static struct ra_tex *wrap_tex_fbo(struct ra *ra, GLuint gl_obj, bool is_fbo,
             .render_dst = is_fbo,
             .render_src = !is_fbo,
             .non_normalized = gl_target == GL_TEXTURE_RECTANGLE,
+            .external_oes = gl_target == GL_TEXTURE_EXTERNAL_OES,
         },
     };
 
@@ -310,6 +320,12 @@ struct ra_tex *ra_create_wrapped_fb(struct ra *ra, GLuint gl_fbo, int w, int h)
     return wrap_tex_fbo(ra, gl_fbo, true, 0, GL_RGBA, 0, 0, w, h);
 }
 
+GL *ra_gl_get(struct ra *ra)
+{
+    struct ra_gl *p = ra->priv;
+    return p->gl;
+}
+
 static void gl_tex_upload(struct ra *ra, struct ra_tex *tex,
                           const void *src, ptrdiff_t stride,
                           struct mp_rect *rc, uint64_t flags,
@@ -478,6 +494,367 @@ static void gl_blit(struct ra *ra, struct ra_tex *dst, struct ra_tex *src,
     gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
 }
 
+static void gl_renderpass_destroy(struct ra *ra, struct ra_renderpass *pass)
+{
+    struct ra_gl *p = ra->priv;
+    struct ra_renderpass_gl *pass_gl = pass->priv;
+    p->gl->DeleteProgram(pass_gl->program);
+    gl_vao_uninit(&pass_gl->vao);
+
+    talloc_free(pass_gl);
+    talloc_free(pass);
+}
+
+static const char *shader_typestr(GLenum type)
+{
+    switch (type) {
+    case GL_VERTEX_SHADER:   return "vertex";
+    case GL_FRAGMENT_SHADER: return "fragment";
+    case GL_COMPUTE_SHADER:  return "compute";
+    default: abort();
+    }
+}
+
+static void compile_attach_shader(struct ra *ra, GLuint program,
+                                  GLenum type, const char *source, bool *ok)
+{
+    struct ra_gl *p = ra->priv;
+    GL *gl = p->gl;
+
+    GLuint shader = gl->CreateShader(type);
+    gl->ShaderSource(shader, 1, &source, NULL);
+    gl->CompileShader(shader);
+    GLint status = 0;
+    gl->GetShaderiv(shader, GL_COMPILE_STATUS, &status);
+    GLint log_length = 0;
+    gl->GetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length);
+
+    int pri = status ? (log_length > 1 ? MSGL_V : MSGL_DEBUG) : MSGL_ERR;
+    const char *typestr = shader_typestr(type);
+    if (mp_msg_test(ra->log, pri)) {
+        MP_MSG(ra, pri, "%s shader source:\n", typestr);
+        mp_log_source(ra->log, pri, source);
+    }
+    if (log_length > 1) {
+        GLchar *logstr = talloc_zero_size(NULL, log_length + 1);
+        gl->GetShaderInfoLog(shader, log_length, NULL, logstr);
+        MP_MSG(ra, pri, "%s shader compile log (status=%d):\n%s\n",
+               typestr, status, logstr);
+        talloc_free(logstr);
+    }
+    if (gl->GetTranslatedShaderSourceANGLE && mp_msg_test(ra->log, MSGL_DEBUG)) {
+        GLint len = 0;
+        gl->GetShaderiv(shader, GL_TRANSLATED_SHADER_SOURCE_LENGTH_ANGLE, &len);
+        if (len > 0) {
+            GLchar *sstr = talloc_zero_size(NULL, len + 1);
+            gl->GetTranslatedShaderSourceANGLE(shader, len, NULL, sstr);
+            MP_DBG(ra, "Translated shader:\n");
+            mp_log_source(ra->log, MSGL_DEBUG, sstr);
+        }
+    }
+
+    gl->AttachShader(program, shader);
+    gl->DeleteShader(shader);
+
+    *ok &= status;
+}
+
+static void link_shader(struct ra *ra, GLuint program, bool *ok)
+{
+    struct ra_gl *p = ra->priv;
+    GL *gl = p->gl;
+
+    gl->LinkProgram(program);
+    GLint status = 0;
+    gl->GetProgramiv(program, GL_LINK_STATUS, &status);
+    GLint log_length = 0;
+    gl->GetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length);
+
+    int pri = status ? (log_length > 1 ? MSGL_V : MSGL_DEBUG) : MSGL_ERR;
+    if (mp_msg_test(ra->log, pri)) {
+        GLchar *logstr = talloc_zero_size(NULL, log_length + 1);
+        gl->GetProgramInfoLog(program, log_length, NULL, logstr);
+        MP_MSG(ra, pri, "shader link log (status=%d): %s\n", status, logstr);
+        talloc_free(logstr);
+    }
+
+    *ok &= status;
+}
+
+// either 'compute' or both 'vertex' and 'frag' are needed
+static GLuint compile_program(struct ra *ra, const struct ra_renderpass_params *p)
+{
+    struct ra_gl *priv = ra->priv;
+    GL *gl = priv->gl;
+
+    GLuint prog = gl->CreateProgram();
+    bool ok = true;
+    if (p->type == RA_RENDERPASS_TYPE_COMPUTE)
+        compile_attach_shader(ra, prog, GL_COMPUTE_SHADER, p->compute_shader, &ok);
+    if (p->type == RA_RENDERPASS_TYPE_RASTER) {
+        compile_attach_shader(ra, prog, GL_VERTEX_SHADER, p->vertex_shader, &ok);
+        compile_attach_shader(ra, prog, GL_FRAGMENT_SHADER, p->frag_shader, &ok);
+        for (int n = 0; n < p->num_vertex_attribs; n++)
+            gl->BindAttribLocation(prog, n, p->vertex_attribs[n].name);
+    }
+    link_shader(ra, prog, &ok);
+    if (!ok) {
+        gl->DeleteProgram(prog);
+        prog = 0;
+    }
+    return prog;
+}
+
+static GLuint load_program(struct ra *ra, const struct ra_renderpass_params *p,
+                           bstr *out_cached_data)
+{
+    struct ra_gl *priv = ra->priv;
+    GL *gl = priv->gl;
+
+    GLuint prog = 0;
+
+    if (gl->ProgramBinary && p->cached_program.len > 4) {
+        GLenum format = AV_RL32(p->cached_program.start);
+        prog = gl->CreateProgram();
+        gl_check_error(gl, ra->log, "before loading program");
+        gl->ProgramBinary(prog, format, p->cached_program.start + 4,
+                                        p->cached_program.len - 4);
+        gl->GetError(); // discard potential useless error
+        GLint status = 0;
+        gl->GetProgramiv(prog, GL_LINK_STATUS, &status);
+        if (status) {
+            MP_VERBOSE(ra, "Loading binary program succeeded.\n");
+        } else {
+            gl->DeleteProgram(prog);
+            prog = 0;
+        }
+    }
+
+    if (!prog) {
+        prog = compile_program(ra, p);
+
+        if (gl->GetProgramBinary && prog) {
+            GLint size = 0;
+            gl->GetProgramiv(prog, GL_PROGRAM_BINARY_LENGTH, &size);
+            uint8_t *buffer = talloc_size(NULL, size + 4);
+            GLsizei actual_size = 0;
+            GLenum binary_format = 0;
+            gl->GetProgramBinary(prog, size, &actual_size, &binary_format,
+                                 buffer + 4);
+            AV_WL32(buffer, binary_format);
+            if (actual_size)
+                *out_cached_data = (bstr){buffer, actual_size + 4};
+        }
+    }
+
+    return prog;
+}
+
+static struct ra_renderpass *gl_renderpass_create(struct ra *ra,
+                                    const struct ra_renderpass_params *params)
+{
+    struct ra_gl *p = ra->priv;
+    GL *gl = p->gl;
+
+    struct ra_renderpass *pass = talloc_zero(NULL, struct ra_renderpass);
+    pass->params = *ra_render_pass_params_copy(pass, params);
+    pass->params.cached_program = (bstr){0};
+    struct ra_renderpass_gl *pass_gl = pass->priv =
+        talloc_zero(NULL, struct ra_renderpass_gl);
+
+    bstr cached = {0};
+    pass_gl->program = load_program(ra, params, &cached);
+    if (!pass_gl->program) {
+        gl_renderpass_destroy(ra, pass);
+        return NULL;
+    }
+
+    talloc_steal(pass, cached.start);
+    pass->params.cached_program = cached;
+
+    for (int n = 0; n < params->num_inputs; n++) {
+        GLint loc =
+            gl->GetUniformLocation(pass_gl->program, params->inputs[n].name);
+        MP_TARRAY_APPEND(pass_gl, pass_gl->uniform_loc, pass_gl->num_uniform_loc,
+                         loc);
+    }
+
+    gl_vao_init(&pass_gl->vao, gl, params->vertex_stride, params->vertex_attribs,
+                params->num_vertex_attribs);
+
+    pass_gl->first_run = true;
+
+    return pass;
+}
+
+static GLenum map_blend(enum ra_blend blend)
+{
+    switch (blend) {
+    case RA_BLEND_ZERO:                 return GL_ZERO;
+    case RA_BLEND_ONE:                  return GL_ONE;
+    case RA_BLEND_SRC_ALPHA:            return GL_SRC_ALPHA;
+    case RA_BLEND_ONE_MINUS_SRC_ALPHA:  return GL_ONE_MINUS_SRC_ALPHA;
+    default: return 0;
+    }
+}
+
+// Assumes program is current (gl->UseProgram(program)).
+static void update_uniform(struct ra *ra, struct ra_renderpass *pass,
+                           struct ra_renderpass_input_val *val)
+{
+    struct ra_gl *p = ra->priv;
+    GL *gl = p->gl;
+    struct ra_renderpass_gl *pass_gl = pass->priv;
+
+    struct ra_renderpass_input *input = &pass->params.inputs[val->index];
+    assert(val->index >= 0 && val->index < pass_gl->num_uniform_loc);
+    GLint loc = pass_gl->uniform_loc[val->index];
+
+    switch (input->type) {
+    case RA_VARTYPE_INT: {
+        assert(input->dim_v * input->dim_m == 1);
+        if (loc < 0)
+            break;
+        gl->Uniform1i(loc, *(int *)val->data);
+        break;
+    }
+    case RA_VARTYPE_FLOAT: {
+        float *f = val->data;
+        if (loc < 0)
+            break;
+        if (input->dim_m == 1) {
+            switch (input->dim_v) {
+            case 1: gl->Uniform1f(loc, f[0]); break;
+            case 2: gl->Uniform2f(loc, f[0], f[1]); break;
+            case 3: gl->Uniform3f(loc, f[0], f[1], f[2]); break;
+            case 4: gl->Uniform4f(loc, f[0], f[1], f[2], f[3]); break;
+            default: abort();
+            }
+        } else if (input->dim_v == 2 && input->dim_m == 2) {
+            gl->UniformMatrix2fv(loc, 1, GL_FALSE, f);
+        } else if (input->dim_v == 3 && input->dim_m == 3) {
+            gl->UniformMatrix3fv(loc, 1, GL_FALSE, f);
+        } else {
+            abort();
+        }
+        break;
+    }
+    case RA_VARTYPE_IMG_W: /* fall through */
+    case RA_VARTYPE_TEX: {
+        struct ra_tex *tex = *(struct ra_tex **)val->data;
+        struct ra_tex_gl *tex_gl = tex->priv;
+        assert(tex->params.render_src);
+        if (pass_gl->first_run)
+            gl->Uniform1i(loc, input->binding);
+        if (input->type == RA_VARTYPE_TEX) {
+            gl->ActiveTexture(GL_TEXTURE0 + input->binding);
+            gl->BindTexture(tex_gl->target, tex_gl->texture);
+        } else {
+            gl->BindImageTexture(input->binding, tex_gl->texture, 0, GL_FALSE, 0,
+                                 GL_WRITE_ONLY, tex_gl->internal_format);
+        }
+        break;
+    }
+    case RA_VARTYPE_SSBO: {
+        gl->BindBufferBase(GL_SHADER_STORAGE_BUFFER, input->binding,
+                           *(int *)val->data);
+        break;
+    }
+    default:
+        abort();
+    }
+}
+
+static void disable_binding(struct ra *ra, struct ra_renderpass *pass,
+                           struct ra_renderpass_input_val *val)
+{
+    struct ra_gl *p = ra->priv;
+    GL *gl = p->gl;
+
+    struct ra_renderpass_input *input = &pass->params.inputs[val->index];
+
+    switch (input->type) {
+    case RA_VARTYPE_IMG_W: /* fall  through */
+    case RA_VARTYPE_TEX: {
+        struct ra_tex *tex = *(struct ra_tex **)val->data;
+        struct ra_tex_gl *tex_gl = tex->priv;
+        assert(tex->params.render_src);
+        if (input->type == RA_VARTYPE_TEX) {
+            gl->ActiveTexture(GL_TEXTURE0 + input->binding);
+            gl->BindTexture(tex_gl->target, 0);
+        } else {
+            gl->BindImageTexture(input->binding, 0, 0, GL_FALSE, 0,
+                                 GL_WRITE_ONLY, tex_gl->internal_format);
+        }
+        break;
+    }
+    case RA_VARTYPE_SSBO: {
+        gl->BindBufferBase(GL_SHADER_STORAGE_BUFFER, input->binding, 0);
+        break;
+    }
+    }
+}
+
+static void gl_renderpass_run(struct ra *ra,
+                              const struct ra_renderpass_run_params *params)
+{
+    struct ra_gl *p = ra->priv;
+    GL *gl = p->gl;
+    struct ra_renderpass *pass = params->pass;
+    struct ra_renderpass_gl *pass_gl = pass->priv;
+
+    gl->UseProgram(pass_gl->program);
+
+    for (int n = 0; n < params->num_values; n++)
+        update_uniform(ra, pass, &params->values[n]);
+    gl->ActiveTexture(GL_TEXTURE0);
+
+    switch (pass->params.type) {
+    case RA_RENDERPASS_TYPE_RASTER: {
+        struct ra_tex_gl *target_gl = params->target->priv;
+        assert(params->target->params.render_dst);
+        gl->BindFramebuffer(GL_FRAMEBUFFER, target_gl->fbo);
+        gl->Viewport(params->viewport.x0, params->viewport.y0,
+                     mp_rect_w(params->viewport),
+                     mp_rect_h(params->viewport));
+        gl->Scissor(params->scissors.x0, params->scissors.y0,
+                    mp_rect_w(params->scissors),
+                    mp_rect_h(params->scissors));
+        gl->Enable(GL_SCISSOR_TEST);
+        if (pass->params.enable_blend) {
+            gl->BlendFuncSeparate(map_blend(pass->params.blend_src_rgb),
+                                  map_blend(pass->params.blend_dst_rgb),
+                                  map_blend(pass->params.blend_src_alpha),
+                                  map_blend(pass->params.blend_dst_alpha));
+            gl->Enable(GL_BLEND);
+        }
+        gl_vao_draw_data(&pass_gl->vao, GL_TRIANGLES, params->vertex_data,
+                         params->vertex_count);
+        gl->Disable(GL_SCISSOR_TEST);
+        gl->Disable(GL_BLEND);
+        gl->BindFramebuffer(GL_FRAMEBUFFER, 0);
+        break;
+    }
+    case RA_RENDERPASS_TYPE_COMPUTE: {
+        gl->DispatchCompute(params->compute_groups[0],
+                            params->compute_groups[1],
+                            params->compute_groups[2]);
+
+        gl->MemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT);
+        break;
+    }
+    default: abort();
+    }
+
+    for (int n = 0; n < params->num_values; n++)
+        disable_binding(ra, pass, &params->values[n]);
+    gl->ActiveTexture(GL_TEXTURE0);
+
+    gl->UseProgram(0);
+
+    pass_gl->first_run = false;
+}
+
 static struct ra_fns ra_fns_gl = {
     .destroy                = gl_destroy,
     .tex_create             = gl_tex_create,
@@ -488,4 +865,7 @@ static struct ra_fns ra_fns_gl = {
     .poll_mapped_buffer     = gl_poll_mapped_buffer,
     .clear                  = gl_clear,
     .blit                   = gl_blit,
+    .renderpass_create      = gl_renderpass_create,
+    .renderpass_destroy     = gl_renderpass_destroy,
+    .renderpass_run         = gl_renderpass_run,
 };
diff --git a/video/out/opengl/ra_gl.h b/video/out/opengl/ra_gl.h
index 016ce13419..0d3828c978 100644
--- a/video/out/opengl/ra_gl.h
+++ b/video/out/opengl/ra_gl.h
@@ -28,9 +28,21 @@ struct ra_mapped_buffer_gl {
     GLsync fence;
 };
 
+// For ra_renderpass.priv
+struct ra_renderpass_gl {
+    GLuint program;
+    // 1 entry for each ra_renderpass_params.inputs[] entry
+    GLint *uniform_loc;
+    int num_uniform_loc; // == ra_renderpass_params.num_inputs
+    struct gl_vao vao;
+    bool first_run;
+};
+
 int ra_init_gl(struct ra *ra, GL *gl);
 struct ra_tex *ra_create_wrapped_texture(struct ra *ra, GLuint gl_texture,
                                          GLenum gl_target, GLint gl_iformat,
                                          GLenum gl_format, GLenum gl_type,
                                          int w, int h);
 struct ra_tex *ra_create_wrapped_fb(struct ra *ra, GLuint gl_fbo, int w, int h);
+
+GL *ra_gl_get(struct ra *ra);
diff --git a/video/out/opengl/shader_cache.c b/video/out/opengl/shader_cache.c
index 7f8b37be64..fdfafbeb60 100644
--- a/video/out/opengl/shader_cache.c
+++ b/video/out/opengl/shader_cache.c
@@ -6,7 +6,6 @@
 #include <assert.h>
 
 #include <libavutil/sha.h>
-#include <libavutil/intreadwrite.h>
 #include <libavutil/mem.h>
 
 #include "osdep/io.h"
@@ -17,63 +16,37 @@
 #include "shader_cache.h"
 #include "formats.h"
 #include "ra_gl.h"
-#include "gl_utils.h"
 
 // Force cache flush if more than this number of shaders is created.
 #define SC_MAX_ENTRIES 48
 
-enum uniform_type {
-    UT_invalid,
-    UT_i,
-    UT_f,
-    UT_m,
-};
-
 union uniform_val {
-    GLfloat f[9];
-    GLint i[4];
+    float f[9];         // RA_VARTYPE_FLOAT
+    int i[4];           // RA_VARTYPE_INT, RA_VARTYPE_SSBO
+    struct ra_tex *tex; // RA_VARTYPE_TEX, RA_VARTYPE_IMG_*
 };
 
 struct sc_uniform {
-    char *name;
-    enum uniform_type type;
+    struct ra_renderpass_input input;
     const char *glsl_type;
-    int size;
-    GLint loc;
     union uniform_val v;
-    // Set for sampler uniforms.
-    GLenum tex_target;
-    GLuint tex_handle;
-    // Set for image uniforms
-    GLuint img_handle;
-    GLenum img_access;
-    GLenum img_iformat;
-};
-
-struct sc_buffer {
-    char *name;
-    char *format;
-    GLuint binding;
-    GLuint ssbo;
+    char *ssbo_format;
 };
 
 struct sc_cached_uniform {
-    GLint loc;
     union uniform_val v;
 };
 
 struct sc_entry {
-    GLuint gl_shader;
-    struct sc_cached_uniform *uniforms;
-    int num_uniforms;
-    bstr frag;
-    bstr vert;
-    bstr comp;
+    struct ra_renderpass *pass;
+    struct sc_cached_uniform *cached_uniforms;
+    int num_cached_uniforms;
+    bstr total;
     struct gl_timer *timer;
-    struct gl_vao vao;
 };
 
 struct gl_shader_cache {
+    struct ra *ra;
     GL *gl;
     struct mp_log *log;
 
@@ -88,20 +61,19 @@ struct gl_shader_cache {
     int next_texture_unit;
     int next_image_unit;
     int next_buffer_binding;
-    struct gl_vao *vao; // deprecated
 
-    struct sc_entry *entries;
+    struct ra_renderpass_params params;
+
+    struct sc_entry **entries;
     int num_entries;
 
     struct sc_entry *current_shader; // set by gl_sc_generate()
 
     struct sc_uniform *uniforms;
     int num_uniforms;
-    struct sc_buffer *buffers;
-    int num_buffers;
 
-    const struct gl_vao_entry *vertex_entries;
-    size_t vertex_size;
+    struct ra_renderpass_input_val *values;
+    int num_values;
 
     // For checking that the user is calling gl_sc_reset() properly.
     bool needs_reset;
@@ -109,18 +81,23 @@ struct gl_shader_cache {
     bool error_state; // true if an error occurred
 
     // temporary buffers (avoids frequent reallocations)
-    bstr tmp[5];
+    bstr tmp[6];
 
     // For the disk-cache.
     char *cache_dir;
     struct mpv_global *global; // can be NULL
 };
 
-struct gl_shader_cache *gl_sc_create(GL *gl, struct mp_log *log)
+static void gl_sc_reset(struct gl_shader_cache *sc);
+
+struct gl_shader_cache *gl_sc_create(struct ra *ra, struct mpv_global *global,
+                                     struct mp_log *log)
 {
     struct gl_shader_cache *sc = talloc_ptrtype(NULL, sc);
     *sc = (struct gl_shader_cache){
-        .gl = gl,
+        .ra = ra,
+        .gl = ra_gl_get(ra),
+        .global = global,
         .log = log,
     };
     gl_sc_reset(sc);
@@ -129,50 +106,24 @@ struct gl_shader_cache *gl_sc_create(GL *gl, struct mp_log *log)
 
 // Reset the previous pass. This must be called after
 // Unbind all GL state managed by sc - the current program and texture units.
-void gl_sc_reset(struct gl_shader_cache *sc)
+static void gl_sc_reset(struct gl_shader_cache *sc)
 {
     GL *gl = sc->gl;
 
-    if (sc->needs_reset) {
+    if (sc->needs_reset)
         gl_timer_stop(gl);
-        gl->UseProgram(0);
-
-        for (int n = 0; n < sc->num_uniforms; n++) {
-            struct sc_uniform *u = &sc->uniforms[n];
-            if (u->type == UT_i && u->tex_target) {
-                gl->ActiveTexture(GL_TEXTURE0 + u->v.i[0]);
-                gl->BindTexture(u->tex_target, 0);
-            }
-            if (u->type == UT_i && u->img_access) {
-                gl->BindImageTexture(u->v.i[0], 0, 0, GL_FALSE, 0,
-                                     u->img_access, u->img_iformat);
-            }
-        }
-        gl->ActiveTexture(GL_TEXTURE0);
-
-        for (int n = 0; n < sc->num_buffers; n++) {
-            struct sc_buffer *b = &sc->buffers[n];
-            gl->BindBufferBase(GL_SHADER_STORAGE_BUFFER, b->binding, 0);
-        }
-    }
 
     sc->prelude_text.len = 0;
     sc->header_text.len = 0;
     sc->text.len = 0;
     for (int n = 0; n < sc->num_uniforms; n++)
-        talloc_free(sc->uniforms[n].name);
+        talloc_free((void *)sc->uniforms[n].input.name);
     sc->num_uniforms = 0;
-    for (int n = 0; n < sc->num_buffers; n++) {
-        talloc_free(sc->buffers[n].name);
-        talloc_free(sc->buffers[n].format);
-    }
-    sc->num_buffers = 0;
     sc->next_texture_unit = 1; // not 0, as 0 is "free for use"
     sc->next_image_unit = 1;
     sc->next_buffer_binding = 1;
-    sc->vertex_entries = NULL;
-    sc->vertex_size = 0;
     sc->current_shader = NULL;
+    sc->params = (struct ra_renderpass_params){0};
     sc->needs_reset = false;
 }
 
@@ -181,14 +132,11 @@ static void sc_flush_cache(struct gl_shader_cache *sc)
     MP_VERBOSE(sc, "flushing shader cache\n");
 
     for (int n = 0; n < sc->num_entries; n++) {
-        struct sc_entry *e = &sc->entries[n];
-        sc->gl->DeleteProgram(e->gl_shader);
-        talloc_free(e->vert.start);
-        talloc_free(e->frag.start);
-        talloc_free(e->comp.start);
-        talloc_free(e->uniforms);
+        struct sc_entry *e = sc->entries[n];
+        if (e->pass)
+            sc->ra->fns->renderpass_destroy(sc->ra, e->pass);
         gl_timer_free(e->timer);
-        gl_vao_uninit(&e->vao);
+        talloc_free(e);
     }
     sc->num_entries = 0;
 }
@@ -265,144 +213,102 @@ void gl_sc_paddf(struct gl_shader_cache *sc, const char *textf, ...)
 static struct sc_uniform *find_uniform(struct gl_shader_cache *sc,
                                        const char *name)
 {
-    for (int n = 0; n < sc->num_uniforms; n++) {
-        if (strcmp(sc->uniforms[n].name, name) == 0)
-            return &sc->uniforms[n];
-    }
-    // not found -> add it
     struct sc_uniform new = {
-        .loc = -1,
-        .name = talloc_strdup(NULL, name),
-    };
-    MP_TARRAY_APPEND(sc, sc->uniforms, sc->num_uniforms, new);
-    return &sc->uniforms[sc->num_uniforms - 1];
-}
-
-static struct sc_buffer *find_buffer(struct gl_shader_cache *sc,
-                                     const char *name)
-{
-    for (int n = 0; n < sc->num_buffers; n++) {
-        if (strcmp(sc->buffers[n].name, name) == 0)
-            return &sc->buffers[n];
-    }
-    // not found -> add it
-    struct sc_buffer new = {
-        .name = talloc_strdup(NULL, name),
+        .input = {
+            .dim_v = 1,
+            .dim_m = 1,
+        },
     };
-    MP_TARRAY_APPEND(sc, sc->buffers, sc->num_buffers, new);
-    return &sc->buffers[sc->num_buffers - 1];
-}
 
-const char *mp_sampler_type(GLenum texture_target)
-{
-    switch (texture_target) {
-    case GL_TEXTURE_1D:         return "sampler1D";
-    case GL_TEXTURE_2D:         return "sampler2D";
-    case GL_TEXTURE_RECTANGLE:  return "sampler2DRect";
-    case GL_TEXTURE_EXTERNAL_OES: return "samplerExternalOES";
-    case GL_TEXTURE_3D:         return "sampler3D";
-    default: abort();
+    for (int n = 0; n < sc->num_uniforms; n++) {
+        struct sc_uniform *u = &sc->uniforms[n];
+        if (strcmp(u->input.name, name) == 0) {
+            const char *allocname = u->input.name;
+            *u = new;
+            u->input.name = allocname;
+            return u;
+        }
     }
-}
 
-void gl_sc_uniform_tex(struct gl_shader_cache *sc, char *name, GLenum target,
-                       GLuint texture)
-{
-    struct sc_uniform *u = find_uniform(sc, name);
-    u->type = UT_i;
-    u->size = 1;
-    u->glsl_type = mp_sampler_type(target);
-    u->v.i[0] = sc->next_texture_unit++;
-    u->tex_target = target;
-    u->tex_handle = texture;
-}
-
-void gl_sc_uniform_tex_ui(struct gl_shader_cache *sc, char *name, GLuint texture)
-{
-    struct sc_uniform *u = find_uniform(sc, name);
-    u->type = UT_i;
-    u->size = 1;
-    u->glsl_type = sc->gl->es ? "highp usampler2D" : "usampler2D";
-    u->v.i[0] = sc->next_texture_unit++;
-    u->tex_target = GL_TEXTURE_2D;
-    u->tex_handle = texture;
+    // not found -> add it
+    new.input.name = talloc_strdup(NULL, name);
+    MP_TARRAY_APPEND(sc, sc->uniforms, sc->num_uniforms, new);
+    return &sc->uniforms[sc->num_uniforms - 1];
 }
 
 void gl_sc_uniform_texture(struct gl_shader_cache *sc, char *name,
                            struct ra_tex *tex)
 {
-    struct ra_tex_gl *tex_gl = tex->priv;
-    if (tex->params.format->ctype == RA_CTYPE_UINT) {
-        gl_sc_uniform_tex_ui(sc, name, tex_gl->texture);
-    } else {
-        gl_sc_uniform_tex(sc, name, tex_gl->target, tex_gl->texture);
+    const char *glsl_type = "sampler2D";
+    if (tex->params.dimensions == 1) {
+        glsl_type = "sampler1D";
+    } else if (tex->params.dimensions == 3) {
+        glsl_type = "sampler3D";
+    } else if (tex->params.non_normalized) {
+        glsl_type = "sampler2DRect";
+    } else if (tex->params.external_oes) {
+        glsl_type = "samplerExternalOES";
+    } else if (tex->params.format->ctype == RA_CTYPE_UINT) {
+        glsl_type = sc->ra->glsl_es ? "highp usampler2D" : "usampler2D";
     }
-}
 
-static const char *mp_image2D_type(GLenum access)
-{
-    switch (access) {
-    case GL_WRITE_ONLY: return "writeonly image2D";
-    case GL_READ_ONLY:  return "readonly image2D";
-    case GL_READ_WRITE: return "image2D";
-    default: abort();
-    }
+    struct sc_uniform *u = find_uniform(sc, name);
+    u->input.type = RA_VARTYPE_TEX;
+    u->glsl_type = glsl_type;
+    u->input.binding = sc->next_texture_unit++;
+    u->v.tex = tex;
 }
 
-void gl_sc_uniform_image2D(struct gl_shader_cache *sc, const char *name,
-                           GLuint texture, GLuint iformat, GLenum access)
+void gl_sc_uniform_image2D_wo(struct gl_shader_cache *sc, const char *name,
+                              struct ra_tex *tex)
 {
     gl_sc_enable_extension(sc, "GL_ARB_shader_image_load_store");
 
     struct sc_uniform *u = find_uniform(sc, name);
-    u->type = UT_i;
-    u->size = 1;
-    u->glsl_type = mp_image2D_type(access);
-    u->v.i[0] = sc->next_image_unit++;
-    u->img_handle = texture;
-    u->img_access = access;
-    u->img_iformat = iformat;
+    u->input.type = RA_VARTYPE_IMG_W;
+    u->glsl_type = "writeonly image2D";
+    u->input.binding = sc->next_image_unit++;
+    u->v.tex = tex;
 }
 
-void gl_sc_ssbo(struct gl_shader_cache *sc, char *name, GLuint ssbo,
+void gl_sc_ssbo(struct gl_shader_cache *sc, char *name, int gl_ssbo,
                 char *format, ...)
 {
     gl_sc_enable_extension(sc, "GL_ARB_shader_storage_buffer_object");
 
-    struct sc_buffer *b = find_buffer(sc, name);
-    b->binding = sc->next_buffer_binding++;
-    b->ssbo = ssbo;
-    b->format = format;
+    struct sc_uniform *u = find_uniform(sc, name);
+    u->input.type = RA_VARTYPE_SSBO;
+    u->glsl_type = "";
+    u->input.binding = sc->next_buffer_binding++;
+    u->v.i[0] = gl_ssbo;
 
     va_list ap;
     va_start(ap, format);
-    b->format = ta_vasprintf(sc, format, ap);
+    u->ssbo_format = ta_vasprintf(sc, format, ap);
     va_end(ap);
 }
 
-void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, GLfloat f)
+void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, float f)
 {
     struct sc_uniform *u = find_uniform(sc, name);
-    u->type = UT_f;
-    u->size = 1;
+    u->input.type = RA_VARTYPE_FLOAT;
     u->glsl_type = "float";
     u->v.f[0] = f;
 }
 
-void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, GLint i)
+void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, int i)
 {
     struct sc_uniform *u = find_uniform(sc, name);
-    u->type = UT_i;
-    u->size = 1;
+    u->input.type = RA_VARTYPE_INT;
     u->glsl_type = "int";
     u->v.i[0] = i;
 }
 
-void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, GLfloat f[2])
+void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, float f[2])
 {
     struct sc_uniform *u = find_uniform(sc, name);
-    u->type = UT_f;
-    u->size = 2;
+    u->input.type = RA_VARTYPE_FLOAT;
+    u->input.dim_v = 2;
     u->glsl_type = "vec2";
     u->v.f[0] = f[0];
     u->v.f[1] = f[1];
@@ -411,8 +317,8 @@ void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, GLfloat f[2])
 void gl_sc_uniform_vec3(struct gl_shader_cache *sc, char *name, GLfloat f[3])
 {
     struct sc_uniform *u = find_uniform(sc, name);
-    u->type = UT_f;
-    u->size = 3;
+    u->input.type = RA_VARTYPE_FLOAT;
+    u->input.dim_v = 3;
     u->glsl_type = "vec3";
     u->v.f[0] = f[0];
     u->v.f[1] = f[1];
@@ -428,8 +334,9 @@ void gl_sc_uniform_mat2(struct gl_shader_cache *sc, char *name,
                         bool transpose, GLfloat *v)
 {
     struct sc_uniform *u = find_uniform(sc, name);
-    u->type = UT_m;
-    u->size = 2;
+    u->input.type = RA_VARTYPE_FLOAT;
+    u->input.dim_v = 2;
+    u->input.dim_m = 2;
     u->glsl_type = "mat2";
     for (int n = 0; n < 4; n++)
         u->v.f[n] = v[n];
@@ -448,8 +355,9 @@ void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name,
                         bool transpose, GLfloat *v)
 {
     struct sc_uniform *u = find_uniform(sc, name);
-    u->type = UT_m;
-    u->size = 3;
+    u->input.type = RA_VARTYPE_FLOAT;
+    u->input.dim_v = 3;
+    u->input.dim_m = 3;
     u->glsl_type = "mat3";
     for (int n = 0; n < 9; n++)
         u->v.f[n] = v[n];
@@ -461,17 +369,33 @@ void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name,
 // data layout and attribute names. The entries array is terminated with a {0}
 // entry. The array memory must remain valid indefinitely (for now).
 void gl_sc_set_vertex_format(struct gl_shader_cache *sc,
-                             const struct gl_vao_entry *entries,
-                             size_t vertex_size)
+                             const struct ra_renderpass_input *entries,
+                             int vertex_stride)
 {
-    sc->vertex_entries = entries;
-    sc->vertex_size = vertex_size;
+    sc->params.vertex_attribs = (struct ra_renderpass_input *)entries;
+    sc->params.num_vertex_attribs = 0;
+    while (entries[sc->params.num_vertex_attribs].name)
+        sc->params.num_vertex_attribs++;
+    sc->params.vertex_stride = vertex_stride;
 }
 
-static const char *vao_glsl_type(const struct gl_vao_entry *e)
+void gl_sc_blend(struct gl_shader_cache *sc,
+                 enum ra_blend blend_src_rgb,
+                 enum ra_blend blend_dst_rgb,
+                 enum ra_blend blend_src_alpha,
+                 enum ra_blend blend_dst_alpha)
+{
+    sc->params.enable_blend = true;
+    sc->params.blend_src_rgb = blend_src_rgb;
+    sc->params.blend_dst_rgb = blend_dst_rgb;
+    sc->params.blend_src_alpha = blend_src_alpha;
+    sc->params.blend_dst_alpha = blend_dst_alpha;
+}
+
+static const char *vao_glsl_type(const struct ra_renderpass_input *e)
 {
     // pretty dumb... too dumb, but works for us
-    switch (e->num_elems) {
+    switch (e->dim_v) {
     case 1: return "float";
     case 2: return "vec2";
     case 3: return "vec3";
@@ -480,165 +404,36 @@ static const char *vao_glsl_type(const struct gl_vao_entry *e)
     }
 }
 
-// Assumes program is current (gl->UseProgram(program)).
-static void update_uniform(GL *gl, struct sc_entry *e, struct sc_uniform *u, int n)
+static void update_uniform(struct gl_shader_cache *sc, struct sc_entry *e,
+                           struct sc_uniform *u, int n)
 {
-    struct sc_cached_uniform *un = &e->uniforms[n];
-    GLint loc = un->loc;
-    if (loc < 0)
-        return;
-    switch (u->type) {
-    case UT_i:
-        assert(u->size == 1);
-        if (memcmp(un->v.i, u->v.i, sizeof(u->v.i)) != 0) {
-            memcpy(un->v.i, u->v.i, sizeof(u->v.i));
-            gl->Uniform1i(loc, u->v.i[0]);
-        }
-        // For samplers: set the actual texture.
-        if (u->tex_target) {
-            gl->ActiveTexture(GL_TEXTURE0 + u->v.i[0]);
-            gl->BindTexture(u->tex_target, u->tex_handle);
-        }
-        if (u->img_handle) {
-            gl->BindImageTexture(u->v.i[0], u->img_handle, 0, GL_FALSE, 0,
-                                 u->img_access, u->img_iformat);
-        }
-        break;
-    case UT_f:
-        if (memcmp(un->v.f, u->v.f, sizeof(u->v.f)) != 0) {
-            memcpy(un->v.f, u->v.f, sizeof(u->v.f));
-            switch (u->size) {
-            case 1: gl->Uniform1f(loc, u->v.f[0]); break;
-            case 2: gl->Uniform2f(loc, u->v.f[0], u->v.f[1]); break;
-            case 3: gl->Uniform3f(loc, u->v.f[0], u->v.f[1], u->v.f[2]); break;
-            case 4: gl->Uniform4f(loc, u->v.f[0], u->v.f[1], u->v.f[2],
-                                  u->v.f[3]); break;
-            default: abort();
-            }
-        }
-        break;
-    case UT_m:
-        if (memcmp(un->v.f, u->v.f, sizeof(u->v.f)) != 0) {
-            memcpy(un->v.f, u->v.f, sizeof(u->v.f));
-            switch (u->size) {
-            case 2: gl->UniformMatrix2fv(loc, 1, GL_FALSE, &u->v.f[0]); break;
-            case 3: gl->UniformMatrix3fv(loc, 1, GL_FALSE, &u->v.f[0]); break;
-            default: abort();
-            }
-        }
-        break;
-    default:
-        abort();
+    struct sc_cached_uniform *un = &e->cached_uniforms[n];
+    struct ra_renderpass_input *input = &e->pass->params.inputs[n];
+    size_t size = ra_render_pass_input_data_size(input);
+    bool changed = true;
+    if (size > 0)
+        changed = memcmp(&un->v, &u->v, size) != 0;
+
+    if (changed) {
+        un->v = u->v;
+        struct ra_renderpass_input_val value = {
+            .index = n,
+            .data = &un->v,
+        };
+        MP_TARRAY_APPEND(sc, sc->values, sc->num_values, value);
     }
 }
 
-void gl_sc_set_cache_dir(struct gl_shader_cache *sc, struct mpv_global *global,
-                         const char *dir)
+void gl_sc_set_cache_dir(struct gl_shader_cache *sc, const char *dir)
 {
     talloc_free(sc->cache_dir);
     sc->cache_dir = talloc_strdup(sc, dir);
-    sc->global = global;
 }
 
-static const char *shader_typestr(GLenum type)
+static void create_pass(struct gl_shader_cache *sc, struct sc_entry *entry)
 {
-    switch (type) {
-    case GL_VERTEX_SHADER:   return "vertex";
-    case GL_FRAGMENT_SHADER: return "fragment";
-    case GL_COMPUTE_SHADER:  return "compute";
-    default: abort();
-    }
-}
-
-static void compile_attach_shader(struct gl_shader_cache *sc, GLuint program,
-                                  GLenum type, const char *source)
-{
-    GL *gl = sc->gl;
-
-    GLuint shader = gl->CreateShader(type);
-    gl->ShaderSource(shader, 1, &source, NULL);
-    gl->CompileShader(shader);
-    GLint status = 0;
-    gl->GetShaderiv(shader, GL_COMPILE_STATUS, &status);
-    GLint log_length = 0;
-    gl->GetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length);
-
-    int pri = status ? (log_length > 1 ? MSGL_V : MSGL_DEBUG) : MSGL_ERR;
-    const char *typestr = shader_typestr(type);
-    if (mp_msg_test(sc->log, pri)) {
-        MP_MSG(sc, pri, "%s shader source:\n", typestr);
-        mp_log_source(sc->log, pri, source);
-    }
-    if (log_length > 1) {
-        GLchar *logstr = talloc_zero_size(NULL, log_length + 1);
-        gl->GetShaderInfoLog(shader, log_length, NULL, logstr);
-        MP_MSG(sc, pri, "%s shader compile log (status=%d):\n%s\n",
-               typestr, status, logstr);
-        talloc_free(logstr);
-    }
-    if (gl->GetTranslatedShaderSourceANGLE && mp_msg_test(sc->log, MSGL_DEBUG)) {
-        GLint len = 0;
-        gl->GetShaderiv(shader, GL_TRANSLATED_SHADER_SOURCE_LENGTH_ANGLE, &len);
-        if (len > 0) {
-            GLchar *sstr = talloc_zero_size(NULL, len + 1);
-            gl->GetTranslatedShaderSourceANGLE(shader, len, NULL, sstr);
-            MP_DBG(sc, "Translated shader:\n");
-            mp_log_source(sc->log, MSGL_DEBUG, sstr);
-        }
-    }
-
-    gl->AttachShader(program, shader);
-    gl->DeleteShader(shader);
-
-    if (!status)
-        sc->error_state = true;
-}
-
-static void link_shader(struct gl_shader_cache *sc, GLuint program)
-{
-    GL *gl = sc->gl;
-    gl->LinkProgram(program);
-    GLint status = 0;
-    gl->GetProgramiv(program, GL_LINK_STATUS, &status);
-    GLint log_length = 0;
-    gl->GetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length);
-
-    int pri = status ? (log_length > 1 ? MSGL_V : MSGL_DEBUG) : MSGL_ERR;
-    if (mp_msg_test(sc->log, pri)) {
-        GLchar *logstr = talloc_zero_size(NULL, log_length + 1);
-        gl->GetProgramInfoLog(program, log_length, NULL, logstr);
-        MP_MSG(sc, pri, "shader link log (status=%d): %s\n", status, logstr);
-        talloc_free(logstr);
-    }
-
-    if (!status)
-        sc->error_state = true;
-}
-
-// either 'compute' or both 'vertex' and 'frag' are needed
-static GLuint compile_program(struct gl_shader_cache *sc, struct bstr *vertex,
-                              struct bstr *frag, struct bstr *compute)
-{
-    GL *gl = sc->gl;
-    GLuint prog = gl->CreateProgram();
-    if (compute)
-        compile_attach_shader(sc, prog, GL_COMPUTE_SHADER, compute->start);
-    if (vertex && frag) {
-        compile_attach_shader(sc, prog, GL_VERTEX_SHADER, vertex->start);
-        compile_attach_shader(sc, prog, GL_FRAGMENT_SHADER, frag->start);
-        for (int n = 0; sc->vertex_entries[n].name; n++) {
-            char *vname = mp_tprintf(80, "vertex_%s", sc->vertex_entries[n].name);
-            gl->BindAttribLocation(prog, n, vname);
-        }
-    }
-    link_shader(sc, prog);
-    return prog;
-}
-
-static GLuint load_program(struct gl_shader_cache *sc, struct bstr *vertex,
-                           struct bstr *frag, struct bstr *compute)
-{
-    GL *gl = sc->gl;
+    void *tmp = talloc_new(NULL);
+    struct ra_renderpass_params params = sc->params;
 
     MP_VERBOSE(sc, "new shader program:\n");
     if (sc->header_text.len) {
@@ -649,98 +444,94 @@ static GLuint load_program(struct gl_shader_cache *sc, struct bstr *vertex,
     if (sc->text.len)
         mp_log_source(sc->log, MSGL_V, sc->text.start);
 
-    if (!sc->cache_dir || !sc->cache_dir[0] || !gl->ProgramBinary)
-        return compile_program(sc, vertex, frag, compute);
-
-    // Try to load it from a disk cache, or compiling + saving it.
-
-    GLuint prog = 0;
-    void *tmp = talloc_new(NULL);
-    char *dir = mp_get_user_path(tmp, sc->global, sc->cache_dir);
-
-    struct AVSHA *sha = av_sha_alloc();
-    if (!sha)
-        abort();
-    av_sha_init(sha, 256);
-
-    if (vertex)
-        av_sha_update(sha, vertex->start, vertex->len + 1);
-    if (frag)
-        av_sha_update(sha, frag->start, frag->len + 1);
-    if (compute)
-        av_sha_update(sha, compute->start, compute->len + 1);
-
-    // In theory, the array could change order, breaking old binaries.
-    for (int n = 0; sc->vertex_entries[n].name; n++) {
-        av_sha_update(sha, sc->vertex_entries[n].name,
-                      strlen(sc->vertex_entries[n].name) + 1);
+    // The vertex shader uses mangled names for the vertex attributes, so that
+    // the fragment shader can use the "real" names. But the shader is expecting
+    // the vertex attribute names (at least with older GLSL targets for GL).
+    params.vertex_attribs = talloc_memdup(tmp, params.vertex_attribs,
+                params.num_vertex_attribs * sizeof(params.vertex_attribs[0]));
+    for (int n = 0; n < params.num_vertex_attribs; n++) {
+        struct ra_renderpass_input *attrib = &params.vertex_attribs[n];
+        attrib->name = talloc_asprintf(tmp, "vertex_%s", attrib->name);
     }
 
-    uint8_t hash[256 / 8];
-    av_sha_final(sha, hash);
-    av_free(sha);
-
-    char hashstr[256 / 8 * 2 + 1];
-    for (int n = 0; n < 256 / 8; n++)
-        snprintf(hashstr + n * 2, sizeof(hashstr) - n * 2, "%02X", hash[n]);
-
-    const char *header = "mpv shader cache v1\n";
-    size_t header_size = strlen(header) + 4;
-
-    char *filename = mp_path_join(tmp, dir, hashstr);
-    if (stat(filename, &(struct stat){0}) == 0) {
-        MP_VERBOSE(sc, "Trying to load shader from disk...\n");
-        struct bstr cachedata = stream_read_file(filename, tmp, sc->global,
-                                                 1000000000); // 1 GB
-        if (cachedata.len > header_size) {
-            GLenum format = AV_RL32(cachedata.start + header_size - 4);
-            prog = gl->CreateProgram();
-            gl_check_error(gl, sc->log, "before loading program");
-            gl->ProgramBinary(prog, format, cachedata.start + header_size,
-                                            cachedata.len - header_size);
-            gl->GetError(); // discard potential useless error
-            GLint status = 0;
-            gl->GetProgramiv(prog, GL_LINK_STATUS, &status);
-            if (!status) {
-                gl->DeleteProgram(prog);
-                prog = 0;
-            }
+    const char *cache_header = "mpv shader cache v1\n";
+    char *cache_filename = NULL;
+    char *cache_dir = NULL;
+
+    if (sc->cache_dir && sc->cache_dir[0]) {
+        // Try to load it from a disk cache.
+        cache_dir = mp_get_user_path(tmp, sc->global, sc->cache_dir);
+
+        struct AVSHA *sha = av_sha_alloc();
+        if (!sha)
+            abort();
+        av_sha_init(sha, 256);
+        av_sha_update(sha, entry->total.start, entry->total.len);
+
+        uint8_t hash[256 / 8];
+        av_sha_final(sha, hash);
+        av_free(sha);
+
+        char hashstr[256 / 8 * 2 + 1];
+        for (int n = 0; n < 256 / 8; n++)
+            snprintf(hashstr + n * 2, sizeof(hashstr) - n * 2, "%02X", hash[n]);
+
+        cache_filename = mp_path_join(tmp, cache_dir, hashstr);
+        if (stat(cache_filename, &(struct stat){0}) == 0) {
+            MP_VERBOSE(sc, "Trying to load shader from disk...\n");
+            struct bstr cachedata =
+                stream_read_file(cache_filename, tmp, sc->global, 1000000000);
+            if (bstr_eatstart0(&cachedata, cache_header))
+                params.cached_program = cachedata;
         }
-        MP_VERBOSE(sc, "Loading cached shader %s.\n", prog ? "ok" : "failed");
     }
 
-    if (!prog) {
-        prog = compile_program(sc, vertex, frag, compute);
+    entry->pass = sc->ra->fns->renderpass_create(sc->ra, &params);
 
-        GLint size = 0;
-        gl->GetProgramiv(prog, GL_PROGRAM_BINARY_LENGTH, &size);
-        uint8_t *buffer = talloc_size(tmp, size + header_size);
-        GLsizei actual_size = 0;
-        GLenum binary_format = 0;
-        gl->GetProgramBinary(prog, size, &actual_size, &binary_format,
-                             buffer + header_size);
-        memcpy(buffer, header, header_size - 4);
-        AV_WL32(buffer + header_size - 4, binary_format);
+    if (!entry->pass)
+        sc->error_state = true;
 
-        if (actual_size) {
-            mp_mkdirp(dir);
+    if (entry->pass && cache_filename) {
+        bstr nc = entry->pass->params.cached_program;
+        if (nc.len && !bstr_equals(params.cached_program, nc)) {
+            mp_mkdirp(cache_dir);
 
-            MP_VERBOSE(sc, "Writing shader cache file: %s\n", filename);
-            FILE *out = fopen(filename, "wb");
+            MP_VERBOSE(sc, "Writing shader cache file: %s\n", cache_filename);
+            FILE *out = fopen(cache_filename, "wb");
             if (out) {
-                fwrite(buffer, header_size + actual_size, 1, out);
+                fwrite(cache_header, strlen(cache_header), 1, out);
+                fwrite(nc.start, nc.len, 1, out);
                 fclose(out);
             }
         }
     }
 
     talloc_free(tmp);
-    return prog;
 }
 
 #define ADD(x, ...) bstr_xappend_asprintf(sc, (x), __VA_ARGS__)
 #define ADD_BSTR(x, s) bstr_xappend(sc, (x), (s))
 
+static void add_uniforms(struct gl_shader_cache *sc, bstr *dst)
+{
+    for (int n = 0; n < sc->num_uniforms; n++) {
+        struct sc_uniform *u = &sc->uniforms[n];
+        switch (u->input.type) {
+        case RA_VARTYPE_INT:
+        case RA_VARTYPE_FLOAT:
+        case RA_VARTYPE_TEX:
+        case RA_VARTYPE_IMG_W:
+            ADD(dst, "uniform %s %s;\n", u->glsl_type, u->input.name);
+            break;
+        case RA_VARTYPE_SSBO:
+            ADD(dst, "layout(std430, binding=%d) buffer %s { %s };\n",
+                u->input.binding, u->input.name, u->ssbo_format);
+            break;
+        default: abort();
+        }
+    }
+}
+
 // 1. Generate vertex and fragment shaders from the fragment shader text added
 //    with gl_sc_add(). The generated shader program is cached (based on the
 //    text), so actual compilation happens only the first time.
@@ -753,39 +544,44 @@ static GLuint load_program(struct gl_shader_cache *sc, struct bstr *vertex,
 // The return value is a mp_pass_perf containing performance metrics for the
 // execution of the generated shader. (Note: execution is measured up until
 // the corresponding gl_sc_reset call)
-// 'type' can be either GL_FRAGMENT_SHADER or GL_COMPUTE_SHADER
-struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc, GLenum type)
+// 'type' must be valid
+static struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc,
+                                          enum ra_renderpass_type type)
 {
-    GL *gl = sc->gl;
+    int glsl_version = sc->ra->glsl_version;
+    int glsl_es = sc->ra->glsl_es ? glsl_version : 0;
+
+    sc->params.type = type;
 
     // gl_sc_reset() must be called after ending the previous render process,
     // and before starting a new one.