summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--video/out/opengl/common.c2
-rw-r--r--video/out/opengl/ra.h1
-rw-r--r--video/out/opengl/ra_gl.c1
-rw-r--r--video/out/opengl/shader_cache.c177
4 files changed, 167 insertions, 14 deletions
diff --git a/video/out/opengl/common.c b/video/out/opengl/common.c
index 0b83555ad2..3d03c478b9 100644
--- a/video/out/opengl/common.c
+++ b/video/out/opengl/common.c
@@ -628,7 +628,7 @@ void mpgl_load_functions2(GL *gl, void *(*get_fn)(void *ctx, const char *n),
if (shader && sscanf(shader, "%d.%d", &glsl_major, &glsl_minor) == 2)
gl->glsl_version = glsl_major * 100 + glsl_minor;
// restrict GLSL version to be forwards compatible
- gl->glsl_version = MPMIN(gl->glsl_version, 430);
+ gl->glsl_version = MPMIN(gl->glsl_version, 440);
}
if (is_software_gl(gl)) {
diff --git a/video/out/opengl/ra.h b/video/out/opengl/ra.h
index bcd65e1027..4a728fbb5f 100644
--- a/video/out/opengl/ra.h
+++ b/video/out/opengl/ra.h
@@ -47,6 +47,7 @@ enum {
RA_CAP_BUF_RW = 1 << 6, // supports RA_VARTYPE_BUF_RW
RA_CAP_NESTED_ARRAY = 1 << 7, // supports nested arrays
RA_CAP_SHARED_BINDING = 1 << 8, // sampler/image/buffer namespaces are disjoint
+ RA_CAP_GLOBAL_UNIFORM = 1 << 9, // supports using "naked" uniforms (not UBO)
};
enum ra_ctype {
diff --git a/video/out/opengl/ra_gl.c b/video/out/opengl/ra_gl.c
index e18beb58f3..bcc3f57760 100644
--- a/video/out/opengl/ra_gl.c
+++ b/video/out/opengl/ra_gl.c
@@ -97,6 +97,7 @@ static int ra_init_gl(struct ra *ra, GL *gl)
static const int caps_map[][2] = {
{RA_CAP_DIRECT_UPLOAD, 0},
{RA_CAP_SHARED_BINDING, 0},
+ {RA_CAP_GLOBAL_UNIFORM, 0},
{RA_CAP_TEX_1D, MPGL_CAP_1D_TEX},
{RA_CAP_TEX_3D, MPGL_CAP_3D_TEX},
{RA_CAP_COMPUTE, MPGL_CAP_COMPUTE_SHADER},
diff --git a/video/out/opengl/shader_cache.c b/video/out/opengl/shader_cache.c
index e5bddd7960..5612e8783a 100644
--- a/video/out/opengl/shader_cache.c
+++ b/video/out/opengl/shader_cache.c
@@ -32,10 +32,17 @@ struct sc_uniform {
const char *glsl_type;
union uniform_val v;
char *buffer_format;
+ // for UBO entries: these are all assumed to be arrays as far as updating
+ // is concerned. ("regular" values are treated like arrays of length 1)
+ size_t ubo_length; // number of array elements (or 0 if not using UBO)
+ size_t ubo_rowsize; // size of data in each array row
+ size_t ubo_stride; // stride of each array row
+ size_t ubo_offset; // byte offset within the uniform buffer
};
struct sc_cached_uniform {
union uniform_val v;
+ int index; // for ra_renderpass_input_val
};
struct sc_entry {
@@ -44,6 +51,8 @@ struct sc_entry {
int num_cached_uniforms;
bstr total;
struct timer_pool *timer;
+ struct ra_buf *ubo;
+ int ubo_index; // for ra_renderpass_input_val.index
};
struct gl_shader_cache {
@@ -73,6 +82,9 @@ struct gl_shader_cache {
struct sc_uniform *uniforms;
int num_uniforms;
+ int ubo_binding;
+ size_t ubo_size;
+
struct ra_renderpass_input_val *values;
int num_values;
@@ -114,6 +126,8 @@ static void gl_sc_reset(struct gl_shader_cache *sc)
for (int n = 0; n < sc->num_uniforms; n++)
talloc_free((void *)sc->uniforms[n].input.name);
sc->num_uniforms = 0;
+ sc->ubo_binding = 0;
+ sc->ubo_size = 0;
for (int i = 0; i < RA_VARTYPE_COUNT; i++)
sc->next_binding[i] = 0;
sc->current_shader = NULL;
@@ -127,6 +141,7 @@ static void sc_flush_cache(struct gl_shader_cache *sc)
for (int n = 0; n < sc->num_entries; n++) {
struct sc_entry *e = sc->entries[n];
+ ra_buf_free(sc->ra, &e->ubo);
if (e->pass)
sc->ra->fns->renderpass_destroy(sc->ra, e->pass);
timer_pool_destroy(e->timer);
@@ -239,6 +254,50 @@ static int gl_sc_next_binding(struct gl_shader_cache *sc, enum ra_vartype type)
}
}
+// Updates the UBO metadata for the given sc_uniform. Assumes type, dim_v and
+// dim_m are already set. Computes the correct alignment, size and array stride
+// as per the std140 specification.
+static void update_ubo_params(struct gl_shader_cache *sc, struct sc_uniform *u)
+{
+ if (!(sc->ra->caps & RA_CAP_BUF_RO))
+ return;
+
+ // Using UBOs with explicit layout(offset) like we do requires GLSL version
+ // 440 or higher. In theory the UBO code can also use older versions, but
+ // just try and avoid potential headaches. This also ensures they're only
+ // used on drivers that are probably modern enough to actually support them
+ // correctly.
+ if (sc->ra->glsl_version < 440)
+ return;
+
+ size_t el_size;
+ switch (u->input.type) {
+ case RA_VARTYPE_INT: el_size = sizeof(int); break;
+ case RA_VARTYPE_FLOAT: el_size = sizeof(float); break;
+ default: abort();
+ }
+
+ u->ubo_rowsize = el_size * u->input.dim_v;
+
+ // std140 packing rules:
+ // 1. The alignment of generic values is their size in bytes
+ // 2. The alignment of vectors is the vector length * the base count, with
+ // the exception of vec3 which is always aligned like vec4
+ // 3. The alignment of arrays is that of the element size rounded up to
+ // the nearest multiple of vec4
+ // 4. Matrices are treated like arrays of vectors
+ // 5. Arrays/matrices are laid out with a stride equal to the alignment
+ u->ubo_stride = u->ubo_rowsize;
+ if (u->input.dim_v == 3)
+ u->ubo_stride += el_size;
+ if (u->input.dim_m > 1)
+ u->ubo_stride = MP_ALIGN_UP(u->ubo_stride, sizeof(float[4]));
+
+ u->ubo_offset = MP_ALIGN_UP(sc->ubo_size, u->ubo_stride);
+ u->ubo_length = u->input.dim_m;
+ sc->ubo_size = u->ubo_offset + u->ubo_stride * u->ubo_length;
+}
+
void gl_sc_uniform_texture(struct gl_shader_cache *sc, char *name,
struct ra_tex *tex)
{
@@ -297,6 +356,7 @@ void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, float f)
struct sc_uniform *u = find_uniform(sc, name);
u->input.type = RA_VARTYPE_FLOAT;
u->glsl_type = "float";
+ update_ubo_params(sc, u);
u->v.f[0] = f;
}
@@ -305,6 +365,7 @@ void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, int i)
struct sc_uniform *u = find_uniform(sc, name);
u->input.type = RA_VARTYPE_INT;
u->glsl_type = "int";
+ update_ubo_params(sc, u);
u->v.i[0] = i;
}
@@ -314,6 +375,7 @@ void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, float f[2])
u->input.type = RA_VARTYPE_FLOAT;
u->input.dim_v = 2;
u->glsl_type = "vec2";
+ update_ubo_params(sc, u);
u->v.f[0] = f[0];
u->v.f[1] = f[1];
}
@@ -324,6 +386,7 @@ void gl_sc_uniform_vec3(struct gl_shader_cache *sc, char *name, GLfloat f[3])
u->input.type = RA_VARTYPE_FLOAT;
u->input.dim_v = 3;
u->glsl_type = "vec3";
+ update_ubo_params(sc, u);
u->v.f[0] = f[0];
u->v.f[1] = f[1];
u->v.f[2] = f[2];
@@ -342,6 +405,7 @@ void gl_sc_uniform_mat2(struct gl_shader_cache *sc, char *name,
u->input.dim_v = 2;
u->input.dim_m = 2;
u->glsl_type = "mat2";
+ update_ubo_params(sc, u);
for (int n = 0; n < 4; n++)
u->v.f[n] = v[n];
if (transpose)
@@ -363,6 +427,7 @@ void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name,
u->input.dim_v = 3;
u->input.dim_m = 3;
u->glsl_type = "mat3";
+ update_ubo_params(sc, u);
for (int n = 0; n < 9; n++)
u->v.f[n] = v[n];
if (transpose)
@@ -408,6 +473,20 @@ static const char *vao_glsl_type(const struct ra_renderpass_input *e)
}
}
+static void update_ubo(struct ra *ra, struct ra_buf *ubo, struct sc_uniform *u)
+{
+ uintptr_t src = (uintptr_t) &u->v;
+ size_t dst = u->ubo_offset;
+ size_t src_stride = u->ubo_rowsize;
+ size_t dst_stride = u->ubo_stride;
+
+ for (int i = 0; i < u->ubo_length; i++) {
+ ra->fns->buf_update(ra, ubo, dst, (void *)src, src_stride);
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
static void update_uniform(struct gl_shader_cache *sc, struct sc_entry *e,
struct sc_uniform *u, int n)
{
@@ -420,11 +499,17 @@ static void update_uniform(struct gl_shader_cache *sc, struct sc_entry *e,
if (changed) {
un->v = u->v;
- struct ra_renderpass_input_val value = {
- .index = n,
- .data = &un->v,
- };
- MP_TARRAY_APPEND(sc, sc->values, sc->num_values, value);
+
+ if (u->ubo_length) {
+ assert(e->ubo);
+ update_ubo(sc->ra, e->ubo, u);
+ } else {
+ struct ra_renderpass_input_val value = {
+ .index = un->index,
+ .data = &un->v,
+ };
+ MP_TARRAY_APPEND(sc, sc->values, sc->num_values, value);
+ }
}
}
@@ -434,8 +519,10 @@ void gl_sc_set_cache_dir(struct gl_shader_cache *sc, const char *dir)
sc->cache_dir = talloc_strdup(sc, dir);
}
-static void create_pass(struct gl_shader_cache *sc, struct sc_entry *entry)
+static bool create_pass(struct gl_shader_cache *sc, struct sc_entry *entry)
{
+ bool ret = false;
+
void *tmp = talloc_new(NULL);
struct ra_renderpass_params params = sc->params;
@@ -490,10 +577,37 @@ static void create_pass(struct gl_shader_cache *sc, struct sc_entry *entry)
}
}
- entry->pass = sc->ra->fns->renderpass_create(sc->ra, &params);
+ // If using a UBO, also make sure to add it as an input value so the RA
+ // can see it
+ if (sc->ubo_size) {
+ entry->ubo_index = sc->params.num_inputs;
+ struct ra_renderpass_input ubo_input = {
+ .name = "UBO",
+ .type = RA_VARTYPE_BUF_RO,
+ .dim_v = 1,
+ .dim_m = 1,
+ .binding = sc->ubo_binding,
+ };
+ MP_TARRAY_APPEND(sc, params.inputs, params.num_inputs, ubo_input);
+ }
+ entry->pass = sc->ra->fns->renderpass_create(sc->ra, &params);
if (!entry->pass)
- sc->error_state = true;
+ goto error;
+
+ if (sc->ubo_size) {
+ struct ra_buf_params ubo_params = {
+ .type = RA_BUF_TYPE_UNIFORM,
+ .size = sc->ubo_size,
+ .host_mutable = true,
+ };
+
+ entry->ubo = ra_buf_create(sc->ra, &ubo_params);
+ if (!entry->ubo) {
+ MP_ERR(sc, "Failed creating uniform buffer!\n");
+ goto error;
+ }
+ }
if (entry->pass && cache_filename) {
bstr nc = entry->pass->params.cached_program;
@@ -510,7 +624,11 @@ static void create_pass(struct gl_shader_cache *sc, struct sc_entry *entry)
}
}
+ ret = true;
+
+error:
talloc_free(tmp);
+ return ret;
}
#define ADD(x, ...) bstr_xappend_asprintf(sc, (x), __VA_ARGS__)
@@ -518,11 +636,28 @@ static void create_pass(struct gl_shader_cache *sc, struct sc_entry *entry)
static void add_uniforms(struct gl_shader_cache *sc, bstr *dst)
{
+ // Add all of the UBO entries separately as members of their own buffer
+ if (sc->ubo_size > 0) {
+ ADD(dst, "layout(std140, binding=%d) uniform UBO {\n", sc->ubo_binding);
+ for (int n = 0; n < sc->num_uniforms; n++) {
+ struct sc_uniform *u = &sc->uniforms[n];
+ if (!u->ubo_length)
+ continue;
+ ADD(dst, "layout(offset=%zu) %s %s;\n", u->ubo_offset,
+ u->glsl_type, u->input.name);
+ }
+ ADD(dst, "};\n");
+ }
+
for (int n = 0; n < sc->num_uniforms; n++) {
struct sc_uniform *u = &sc->uniforms[n];
+ if (u->ubo_length)
+ continue;
switch (u->input.type) {
case RA_VARTYPE_INT:
case RA_VARTYPE_FLOAT:
+ assert(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM);
+ // fall through
case RA_VARTYPE_TEX:
case RA_VARTYPE_IMG_W:
ADD(dst, "uniform %s %s;\n", u->glsl_type, u->input.name);
@@ -564,6 +699,10 @@ static void gl_sc_generate(struct gl_shader_cache *sc, enum ra_renderpass_type t
// gl_sc_set_vertex_format() must always be called
assert(sc->params.vertex_attribs);
+ // If using a UBO, pick a binding (needed for shader generation)
+ if (sc->ubo_size)
+ sc->ubo_binding = gl_sc_next_binding(sc, RA_VARTYPE_BUF_RO);
+
for (int n = 0; n < MP_ARRAY_SIZE(sc->tmp); n++)
sc->tmp[n].len = 0;
@@ -710,24 +849,36 @@ static void gl_sc_generate(struct gl_shader_cache *sc, enum ra_renderpass_type t
};
for (int n = 0; n < sc->num_uniforms; n++) {
struct sc_cached_uniform u = {0};
+ if (!sc->uniforms[n].ubo_length) {
+ u.index = sc->params.num_inputs;
+ MP_TARRAY_APPEND(sc, sc->params.inputs, sc->params.num_inputs,
+ sc->uniforms[n].input);
+ }
MP_TARRAY_APPEND(entry, entry->cached_uniforms,
entry->num_cached_uniforms, u);
- MP_TARRAY_APPEND(sc, sc->params.inputs, sc->params.num_inputs,
- sc->uniforms[n].input);
}
- create_pass(sc, entry);
+ if (!create_pass(sc, entry))
+ sc->error_state = true;
MP_TARRAY_APPEND(sc, sc->entries, sc->num_entries, entry);
}
- if (!entry->pass)
+ if (sc->error_state)
return;
assert(sc->num_uniforms == entry->num_cached_uniforms);
- assert(sc->num_uniforms == entry->pass->params.num_inputs);
sc->num_values = 0;
for (int n = 0; n < sc->num_uniforms; n++)
update_uniform(sc, entry, &sc->uniforms[n], n);
+ // If we're using a UBO, make sure to bind it as well
+ if (sc->ubo_size) {
+ struct ra_renderpass_input_val ubo_val = {
+ .index = entry->ubo_index,
+ .data = &entry->ubo,
+ };
+ MP_TARRAY_APPEND(sc, sc->values, sc->num_values, ubo_val);
+ }
+
sc->current_shader = entry;
}