summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNiklas Haas <git@haasn.xyz>2017-08-26 05:58:50 +0200
committerNiklas Haas <git@haasn.xyz>2017-08-27 14:36:04 +0200
commit1d47473a7bb49663c197efa9a6ff8d836a9c1693 (patch)
tree0479d2a67982a0dcb1584d6d29be5662a2fa6c3a
parent136cf2b770f08a435710f71fe2fa597c908cc8a0 (diff)
downloadmpv-1d47473a7bb49663c197efa9a6ff8d836a9c1693.tar.bz2
mpv-1d47473a7bb49663c197efa9a6ff8d836a9c1693.tar.xz
vo_opengl: use UBOs where supported/required
This also introduces RA_CAP_GLOBAL_UNIFORM. If this is not set, UBOs *must* be used for non-bindings. Currently the cap is ignored though, and the shader_cache *always* generates UBO-using code where it can. Could be made an option in principle. Only enabled for drivers new enough to support explicit UBO offsets, just in case... No change to performance, which is probably what we expect.
-rw-r--r--video/out/opengl/common.c2
-rw-r--r--video/out/opengl/ra.h1
-rw-r--r--video/out/opengl/ra_gl.c1
-rw-r--r--video/out/opengl/shader_cache.c177
4 files changed, 167 insertions, 14 deletions
diff --git a/video/out/opengl/common.c b/video/out/opengl/common.c
index 0b83555ad2..3d03c478b9 100644
--- a/video/out/opengl/common.c
+++ b/video/out/opengl/common.c
@@ -628,7 +628,7 @@ void mpgl_load_functions2(GL *gl, void *(*get_fn)(void *ctx, const char *n),
if (shader && sscanf(shader, "%d.%d", &glsl_major, &glsl_minor) == 2)
gl->glsl_version = glsl_major * 100 + glsl_minor;
// restrict GLSL version to be forwards compatible
- gl->glsl_version = MPMIN(gl->glsl_version, 430);
+ gl->glsl_version = MPMIN(gl->glsl_version, 440);
}
if (is_software_gl(gl)) {
diff --git a/video/out/opengl/ra.h b/video/out/opengl/ra.h
index bcd65e1027..4a728fbb5f 100644
--- a/video/out/opengl/ra.h
+++ b/video/out/opengl/ra.h
@@ -47,6 +47,7 @@ enum {
RA_CAP_BUF_RW = 1 << 6, // supports RA_VARTYPE_BUF_RW
RA_CAP_NESTED_ARRAY = 1 << 7, // supports nested arrays
RA_CAP_SHARED_BINDING = 1 << 8, // sampler/image/buffer namespaces are disjoint
+ RA_CAP_GLOBAL_UNIFORM = 1 << 9, // supports using "naked" uniforms (not UBO)
};
enum ra_ctype {
diff --git a/video/out/opengl/ra_gl.c b/video/out/opengl/ra_gl.c
index e18beb58f3..bcc3f57760 100644
--- a/video/out/opengl/ra_gl.c
+++ b/video/out/opengl/ra_gl.c
@@ -97,6 +97,7 @@ static int ra_init_gl(struct ra *ra, GL *gl)
static const int caps_map[][2] = {
{RA_CAP_DIRECT_UPLOAD, 0},
{RA_CAP_SHARED_BINDING, 0},
+ {RA_CAP_GLOBAL_UNIFORM, 0},
{RA_CAP_TEX_1D, MPGL_CAP_1D_TEX},
{RA_CAP_TEX_3D, MPGL_CAP_3D_TEX},
{RA_CAP_COMPUTE, MPGL_CAP_COMPUTE_SHADER},
diff --git a/video/out/opengl/shader_cache.c b/video/out/opengl/shader_cache.c
index e5bddd7960..5612e8783a 100644
--- a/video/out/opengl/shader_cache.c
+++ b/video/out/opengl/shader_cache.c
@@ -32,10 +32,17 @@ struct sc_uniform {
const char *glsl_type;
union uniform_val v;
char *buffer_format;
+ // for UBO entries: these are all assumed to be arrays as far as updating
+ // is concerned. ("regular" values are treated like arrays of length 1)
+ size_t ubo_length; // number of array elements (or 0 if not using UBO)
+ size_t ubo_rowsize; // size of data in each array row
+ size_t ubo_stride; // stride of each array row
+ size_t ubo_offset; // byte offset within the uniform buffer
};
struct sc_cached_uniform {
union uniform_val v;
+ int index; // for ra_renderpass_input_val
};
struct sc_entry {
@@ -44,6 +51,8 @@ struct sc_entry {
int num_cached_uniforms;
bstr total;
struct timer_pool *timer;
+ struct ra_buf *ubo;
+ int ubo_index; // for ra_renderpass_input_val.index
};
struct gl_shader_cache {
@@ -73,6 +82,9 @@ struct gl_shader_cache {
struct sc_uniform *uniforms;
int num_uniforms;
+ int ubo_binding;
+ size_t ubo_size;
+
struct ra_renderpass_input_val *values;
int num_values;
@@ -114,6 +126,8 @@ static void gl_sc_reset(struct gl_shader_cache *sc)
for (int n = 0; n < sc->num_uniforms; n++)
talloc_free((void *)sc->uniforms[n].input.name);
sc->num_uniforms = 0;
+ sc->ubo_binding = 0;
+ sc->ubo_size = 0;
for (int i = 0; i < RA_VARTYPE_COUNT; i++)
sc->next_binding[i] = 0;
sc->current_shader = NULL;
@@ -127,6 +141,7 @@ static void sc_flush_cache(struct gl_shader_cache *sc)
for (int n = 0; n < sc->num_entries; n++) {
struct sc_entry *e = sc->entries[n];
+ ra_buf_free(sc->ra, &e->ubo);
if (e->pass)
sc->ra->fns->renderpass_destroy(sc->ra, e->pass);
timer_pool_destroy(e->timer);
@@ -239,6 +254,50 @@ static int gl_sc_next_binding(struct gl_shader_cache *sc, enum ra_vartype type)
}
}
+// Updates the UBO metadata for the given sc_uniform. Assumes type, dim_v and
+// dim_m are already set. Computes the correct alignment, size and array stride
+// as per the std140 specification.
+static void update_ubo_params(struct gl_shader_cache *sc, struct sc_uniform *u)
+{
+ if (!(sc->ra->caps & RA_CAP_BUF_RO))
+ return;
+
+ // Using UBOs with explicit layout(offset) like we do requires GLSL version
+ // 440 or higher. In theory the UBO code can also use older versions, but
+ // just try and avoid potential headaches. This also ensures they're only
+ // used on drivers that are probably modern enough to actually support them
+ // correctly.
+ if (sc->ra->glsl_version < 440)
+ return;
+
+ size_t el_size;
+ switch (u->input.type) {
+ case RA_VARTYPE_INT: el_size = sizeof(int); break;
+ case RA_VARTYPE_FLOAT: el_size = sizeof(float); break;
+ default: abort();
+ }
+
+ u->ubo_rowsize = el_size * u->input.dim_v;
+
+ // std140 packing rules:
+ // 1. The alignment of generic values is their size in bytes
+ // 2. The alignment of vectors is the vector length * the base count, with
+ // the exception of vec3 which is always aligned like vec4
+ // 3. The alignment of arrays is that of the element size rounded up to
+ // the nearest multiple of vec4
+ // 4. Matrices are treated like arrays of vectors
+ // 5. Arrays/matrices are laid out with a stride equal to the alignment
+ u->ubo_stride = u->ubo_rowsize;
+ if (u->input.dim_v == 3)
+ u->ubo_stride += el_size;
+ if (u->input.dim_m > 1)
+ u->ubo_stride = MP_ALIGN_UP(u->ubo_stride, sizeof(float[4]));
+
+ u->ubo_offset = MP_ALIGN_UP(sc->ubo_size, u->ubo_stride);
+ u->ubo_length = u->input.dim_m;
+ sc->ubo_size = u->ubo_offset + u->ubo_stride * u->ubo_length;
+}
+
void gl_sc_uniform_texture(struct gl_shader_cache *sc, char *name,
struct ra_tex *tex)
{
@@ -297,6 +356,7 @@ void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, float f)
struct sc_uniform *u = find_uniform(sc, name);
u->input.type = RA_VARTYPE_FLOAT;
u->glsl_type = "float";
+ update_ubo_params(sc, u);
u->v.f[0] = f;
}
@@ -305,6 +365,7 @@ void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, int i)
struct sc_uniform *u = find_uniform(sc, name);
u->input.type = RA_VARTYPE_INT;
u->glsl_type = "int";
+ update_ubo_params(sc, u);
u->v.i[0] = i;
}
@@ -314,6 +375,7 @@ void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, float f[2])
u->input.type = RA_VARTYPE_FLOAT;
u->input.dim_v = 2;
u->glsl_type = "vec2";
+ update_ubo_params(sc, u);
u->v.f[0] = f[0];
u->v.f[1] = f[1];
}
@@ -324,6 +386,7 @@ void gl_sc_uniform_vec3(struct gl_shader_cache *sc, char *name, GLfloat f[3])
u->input.type = RA_VARTYPE_FLOAT;
u->input.dim_v = 3;
u->glsl_type = "vec3";
+ update_ubo_params(sc, u);
u->v.f[0] = f[0];
u->v.f[1] = f[1];
u->v.f[2] = f[2];
@@ -342,6 +405,7 @@ void gl_sc_uniform_mat2(struct gl_shader_cache *sc, char *name,
u->input.dim_v = 2;
u->input.dim_m = 2;
u->glsl_type = "mat2";
+ update_ubo_params(sc, u);
for (int n = 0; n < 4; n++)
u->v.f[n] = v[n];
if (transpose)
@@ -363,6 +427,7 @@ void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name,
u->input.dim_v = 3;
u->input.dim_m = 3;
u->glsl_type = "mat3";
+ update_ubo_params(sc, u);
for (int n = 0; n < 9; n++)
u->v.f[n] = v[n];
if (transpose)
@@ -408,6 +473,20 @@ static const char *vao_glsl_type(const struct ra_renderpass_input *e)
}
}
+static void update_ubo(struct ra *ra, struct ra_buf *ubo, struct sc_uniform *u)
+{
+ uintptr_t src = (uintptr_t) &u->v;
+ size_t dst = u->ubo_offset;
+ size_t src_stride = u->ubo_rowsize;
+ size_t dst_stride = u->ubo_stride;
+
+ for (int i = 0; i < u->ubo_length; i++) {
+ ra->fns->buf_update(ra, ubo, dst, (void *)src, src_stride);
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
static void update_uniform(struct gl_shader_cache *sc, struct sc_entry *e,
struct sc_uniform *u, int n)
{
@@ -420,11 +499,17 @@ static void update_uniform(struct gl_shader_cache *sc, struct sc_entry *e,
if (changed) {
un->v = u->v;
- struct ra_renderpass_input_val value = {
- .index = n,
- .data = &un->v,
- };
- MP_TARRAY_APPEND(sc, sc->values, sc->num_values, value);
+
+ if (u->ubo_length) {
+ assert(e->ubo);
+ update_ubo(sc->ra, e->ubo, u);
+ } else {
+ struct ra_renderpass_input_val value = {
+ .index = un->index,
+ .data = &un->v,
+ };
+ MP_TARRAY_APPEND(sc, sc->values, sc->num_values, value);
+ }
}
}
@@ -434,8 +519,10 @@ void gl_sc_set_cache_dir(struct gl_shader_cache *sc, const char *dir)
sc->cache_dir = talloc_strdup(sc, dir);
}
-static void create_pass(struct gl_shader_cache *sc, struct sc_entry *entry)
+static bool create_pass(struct gl_shader_cache *sc, struct sc_entry *entry)
{
+ bool ret = false;
+
void *tmp = talloc_new(NULL);
struct ra_renderpass_params params = sc->params;
@@ -490,10 +577,37 @@ static void create_pass(struct gl_shader_cache *sc, struct sc_entry *entry)
}
}
- entry->pass = sc->ra->fns->renderpass_create(sc->ra, &params);
+ // If using a UBO, also make sure to add it as an input value so the RA
+ // can see it
+ if (sc->ubo_size) {
+ entry->ubo_index = sc->params.num_inputs;
+ struct ra_renderpass_input ubo_input = {
+ .name = "UBO",
+ .type = RA_VARTYPE_BUF_RO,
+ .dim_v = 1,
+ .dim_m = 1,
+ .binding = sc->ubo_binding,
+ };
+ MP_TARRAY_APPEND(sc, params.inputs, params.num_inputs, ubo_input);
+ }
+ entry->pass = sc->ra->fns->renderpass_create(sc->ra, &params);
if (!entry->pass)
- sc->error_state = true;
+ goto error;
+
+ if (sc->ubo_size) {
+ struct ra_buf_params ubo_params = {
+ .type = RA_BUF_TYPE_UNIFORM,
+ .size = sc->ubo_size,
+ .host_mutable = true,
+ };
+
+ entry->ubo = ra_buf_create(sc->ra, &ubo_params);
+ if (!entry->ubo) {
+ MP_ERR(sc, "Failed creating uniform buffer!\n");
+ goto error;
+ }
+ }
if (entry->pass && cache_filename) {
bstr nc = entry->pass->params.cached_program;
@@ -510,7 +624,11 @@ static void create_pass(struct gl_shader_cache *sc, struct sc_entry *entry)
}
}
+ ret = true;
+
+error:
talloc_free(tmp);
+ return ret;
}
#define ADD(x, ...) bstr_xappend_asprintf(sc, (x), __VA_ARGS__)
@@ -518,11 +636,28 @@ static void create_pass(struct gl_shader_cache *sc, struct sc_entry *entry)
static void add_uniforms(struct gl_shader_cache *sc, bstr *dst)
{
+ // Add all of the UBO entries separately as members of their own buffer
+ if (sc->ubo_size > 0) {
+ ADD(dst, "layout(std140, binding=%d) uniform UBO {\n", sc->ubo_binding);
+ for (int n = 0; n < sc->num_uniforms; n++) {
+ struct sc_uniform *u = &sc->uniforms[n];
+ if (!u->ubo_length)
+ continue;
+ ADD(dst, "layout(offset=%zu) %s %s;\n", u->ubo_offset,
+ u->glsl_type, u->input.name);
+ }
+ ADD(dst, "};\n");
+ }
+
for (int n = 0; n < sc->num_uniforms; n++) {
struct sc_uniform *u = &sc->uniforms[n];
+ if (u->ubo_length)
+ continue;
switch (u->input.type) {
case RA_VARTYPE_INT:
case RA_VARTYPE_FLOAT:
+ assert(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM);
+ // fall through
case RA_VARTYPE_TEX:
case RA_VARTYPE_IMG_W:
ADD(dst, "uniform %s %s;\n", u->glsl_type, u->input.name);
@@ -564,6 +699,10 @@ static void gl_sc_generate(struct gl_shader_cache *sc, enum ra_renderpass_type t
// gl_sc_set_vertex_format() must always be called
assert(sc->params.vertex_attribs);
+ // If using a UBO, pick a binding (needed for shader generation)
+ if (sc->ubo_size)
+ sc->ubo_binding = gl_sc_next_binding(sc, RA_VARTYPE_BUF_RO);
+
for (int n = 0; n < MP_ARRAY_SIZE(sc->tmp); n++)
sc->tmp[n].len = 0;
@@ -710,24 +849,36 @@ static void gl_sc_generate(struct gl_shader_cache *sc, enum ra_renderpass_type t
};
for (int n = 0; n < sc->num_uniforms; n++) {
struct sc_cached_uniform u = {0};
+ if (!sc->uniforms[n].ubo_length) {
+ u.index = sc->params.num_inputs;
+ MP_TARRAY_APPEND(sc, sc->params.inputs, sc->params.num_inputs,
+ sc->uniforms[n].input);
+ }
MP_TARRAY_APPEND(entry, entry->cached_uniforms,
entry->num_cached_uniforms, u);
- MP_TARRAY_APPEND(sc, sc->params.inputs, sc->params.num_inputs,
- sc->uniforms[n].input);
}
- create_pass(sc, entry);
+ if (!create_pass(sc, entry))
+ sc->error_state = true;
MP_TARRAY_APPEND(sc, sc->entries, sc->num_entries, entry);
}
- if (!entry->pass)
+ if (sc->error_state)
return;
assert(sc->num_uniforms == entry->num_cached_uniforms);
- assert(sc->num_uniforms == entry->pass->params.num_inputs);
sc->num_values = 0;
for (int n = 0; n < sc->num_uniforms; n++)
update_uniform(sc, entry, &sc->uniforms[n], n);
+ // If we're using a UBO, make sure to bind it as well
+ if (sc->ubo_size) {
+ struct ra_renderpass_input_val ubo_val = {
+ .index = entry->ubo_index,
+ .data = &entry->ubo,
+ };
+ MP_TARRAY_APPEND(sc, sc->values, sc->num_values, ubo_val);
+ }
+
sc->current_shader = entry;
}