summaryrefslogtreecommitdiffstats
path: root/video/out
diff options
context:
space:
mode:
authorNiklas Haas <git@haasn.xyz>2017-09-08 04:27:53 +0200
committerNiklas Haas <git@haasn.xyz>2017-09-12 02:57:45 +0200
commit0c2cb69597f52592bdb58312d9987978cb86a9d3 (patch)
tree59883f675f41c3302c5b8603f038754993037cd2 /video/out
parent3faf1fb0a4482712b2177af2f72ef8877f8adc10 (diff)
downloadmpv-0c2cb69597f52592bdb58312d9987978cb86a9d3.tar.bz2
mpv-0c2cb69597f52592bdb58312d9987978cb86a9d3.tar.xz
vo_opengl: generalize UBO packing/handling
This is simultaneously generalized into two directions: 1. Support more sc_uniform types (needed for SC_UNIFORM_TYPE_PUSHC) 2. Support more flexible packing (needed for both PUSHC and ra_d3d11)
Diffstat (limited to 'video/out')
-rw-r--r--video/out/opengl/ra.c18
-rw-r--r--video/out/opengl/ra.h18
-rw-r--r--video/out/opengl/ra_gl.c1
-rw-r--r--video/out/opengl/shader_cache.c109
-rw-r--r--video/out/opengl/utils.c42
-rw-r--r--video/out/opengl/utils.h4
6 files changed, 122 insertions, 70 deletions
diff --git a/video/out/opengl/ra.c b/video/out/opengl/ra.c
index 7f0053955b..208507dfa0 100644
--- a/video/out/opengl/ra.c
+++ b/video/out/opengl/ra.c
@@ -36,7 +36,7 @@ void ra_free(struct ra **ra)
*ra = NULL;
}
-static size_t vartype_size(enum ra_vartype type)
+size_t ra_vartype_size(enum ra_vartype type)
{
switch (type) {
case RA_VARTYPE_INT: return sizeof(int);
@@ -46,12 +46,18 @@ static size_t vartype_size(enum ra_vartype type)
}
}
-// Return the size of the data ra_renderpass_input_val.data is going to point
-// to. This returns 0 for non-primitive types such as textures.
-size_t ra_render_pass_input_data_size(struct ra_renderpass_input *input)
+struct ra_layout ra_renderpass_input_layout(struct ra_renderpass_input *input)
{
- size_t el_size = vartype_size(input->type);
- return el_size * input->dim_v * input->dim_m;
+ size_t el_size = ra_vartype_size(input->type);
+ if (!el_size)
+ return (struct ra_layout){0};
+
+ // host data is always tightly packed
+ return (struct ra_layout) {
+ .align = 1,
+ .stride = el_size * input->dim_v,
+ .size = el_size * input->dim_v * input->dim_m,
+ };
}
static struct ra_renderpass_input *dup_inputs(void *ta_parent,
diff --git a/video/out/opengl/ra.h b/video/out/opengl/ra.h
index 81078da41f..46a69f2ff5 100644
--- a/video/out/opengl/ra.h
+++ b/video/out/opengl/ra.h
@@ -186,6 +186,9 @@ enum ra_vartype {
RA_VARTYPE_COUNT
};
+// Returns the host size of a ra_vartype, or 0 for abstract vartypes (e.g. tex)
+size_t ra_vartype_size(enum ra_vartype type);
+
// Represents a uniform, texture input parameter, and similar things.
struct ra_renderpass_input {
const char *name; // name as used in the shader
@@ -204,7 +207,16 @@ struct ra_renderpass_input {
int binding;
};
-size_t ra_render_pass_input_data_size(struct ra_renderpass_input *input);
+// Represents the layout requirements of an input value
+struct ra_layout {
+ size_t align; // the alignment requirements (always a power of two)
+ size_t stride; // the delta between two rows of an array/matrix
+ size_t size; // the total size of the input
+};
+
+// Returns the host layout of a render pass input. Returns {0} for renderpass
+// inputs without a corresponding host representation (e.g. textures/buffers)
+struct ra_layout ra_renderpass_input_layout(struct ra_renderpass_input *input);
enum ra_blend {
RA_BLEND_ZERO,
@@ -370,6 +382,10 @@ struct ra_fns {
// NULL then all buffers are always usable.
bool (*buf_poll)(struct ra *ra, struct ra_buf *buf);
+ // Returns the layout requirements of a uniform buffer element. Optional,
+ // but must be implemented if RA_CAP_BUF_RO is supported.
+ struct ra_layout (*uniform_layout)(struct ra_renderpass_input *inp);
+
// Clear the dst with the given color (rgba) and within the given scissor.
// dst must have dst->params.render_dst==true. Content outside of the
// scissor is preserved.
diff --git a/video/out/opengl/ra_gl.c b/video/out/opengl/ra_gl.c
index 401847e471..ab5c13242a 100644
--- a/video/out/opengl/ra_gl.c
+++ b/video/out/opengl/ra_gl.c
@@ -1122,6 +1122,7 @@ static struct ra_fns ra_fns_gl = {
.buf_poll = gl_buf_poll,
.clear = gl_clear,
.blit = gl_blit,
+ .uniform_layout = std140_layout,
.renderpass_create = gl_renderpass_create,
.renderpass_destroy = gl_renderpass_destroy,
.renderpass_run = gl_renderpass_run,
diff --git a/video/out/opengl/shader_cache.c b/video/out/opengl/shader_cache.c
index e702d7e4a1..fe5ca3fdbd 100644
--- a/video/out/opengl/shader_cache.c
+++ b/video/out/opengl/shader_cache.c
@@ -27,17 +27,20 @@ union uniform_val {
struct ra_buf *buf; // RA_VARTYPE_BUF_*
};
+enum sc_uniform_type {
+ SC_UNIFORM_TYPE_GLOBAL = 0, // global uniform (RA_CAP_GLOBAL_UNIFORM)
+ SC_UNIFORM_TYPE_UBO = 1, // uniform buffer (RA_CAP_BUF_RO)
+};
+
struct sc_uniform {
+ enum sc_uniform_type type;
struct ra_renderpass_input input;
const char *glsl_type;
union uniform_val v;
char *buffer_format;
- // for UBO entries: these are all assumed to be arrays as far as updating
- // is concerned. ("regular" values are treated like arrays of length 1)
- size_t ubo_length; // number of array elements (or 0 if not using UBO)
- size_t ubo_rowsize; // size of data in each array row
- size_t ubo_stride; // stride of each array row
- size_t ubo_offset; // byte offset within the uniform buffer
+ // for SC_UNIFORM_TYPE_UBO:
+ struct ra_layout layout;
+ size_t offset; // byte offset within the buffer
};
struct sc_cached_uniform {
@@ -254,9 +257,8 @@ static int gl_sc_next_binding(struct gl_shader_cache *sc, enum ra_vartype type)
}
}
-// Updates the UBO metadata for the given sc_uniform. Assumes type, dim_v and
-// dim_m are already set. Computes the correct alignment, size and array stride
-// as per the std140 specification.
+// Updates the UBO metadata for the given sc_uniform. Assumes sc_uniform->input
+// is already set. Also updates sc_uniform->type.
static void update_ubo_params(struct gl_shader_cache *sc, struct sc_uniform *u)
{
if (!(sc->ra->caps & RA_CAP_BUF_RO))
@@ -270,32 +272,10 @@ static void update_ubo_params(struct gl_shader_cache *sc, struct sc_uniform *u)
if (sc->ra->glsl_version < 440)
return;
- size_t el_size;
- switch (u->input.type) {
- case RA_VARTYPE_INT: el_size = sizeof(int); break;
- case RA_VARTYPE_FLOAT: el_size = sizeof(float); break;
- default: abort();
- }
-
- u->ubo_rowsize = el_size * u->input.dim_v;
-
- // std140 packing rules:
- // 1. The alignment of generic values is their size in bytes
- // 2. The alignment of vectors is the vector length * the base count, with
- // the exception of vec3 which is always aligned like vec4
- // 3. The alignment of arrays is that of the element size rounded up to
- // the nearest multiple of vec4
- // 4. Matrices are treated like arrays of vectors
- // 5. Arrays/matrices are laid out with a stride equal to the alignment
- u->ubo_stride = u->ubo_rowsize;
- if (u->input.dim_v == 3)
- u->ubo_stride += el_size;
- if (u->input.dim_m > 1)
- u->ubo_stride = MP_ALIGN_UP(u->ubo_stride, sizeof(float[4]));
-
- u->ubo_offset = MP_ALIGN_UP(sc->ubo_size, u->ubo_stride);
- u->ubo_length = u->input.dim_m;
- sc->ubo_size = u->ubo_offset + u->ubo_stride * u->ubo_length;
+ u->type = SC_UNIFORM_TYPE_UBO;
+ u->layout = sc->ra->fns->uniform_layout(&u->input);
+ u->offset = MP_ALIGN_UP(sc->ubo_size, u->layout.align);
+ sc->ubo_size = u->offset + u->layout.size;
}
void gl_sc_uniform_texture(struct gl_shader_cache *sc, char *name,
@@ -476,14 +456,14 @@ static const char *vao_glsl_type(const struct ra_renderpass_input *e)
static void update_ubo(struct ra *ra, struct ra_buf *ubo, struct sc_uniform *u)
{
uintptr_t src = (uintptr_t) &u->v;
- size_t dst = u->ubo_offset;
- size_t src_stride = u->ubo_rowsize;
- size_t dst_stride = u->ubo_stride;
-
- for (int i = 0; i < u->ubo_length; i++) {
- ra->fns->buf_update(ra, ubo, dst, (void *)src, src_stride);
- src += src_stride;
- dst += dst_stride;
+ size_t dst = u->offset;
+ struct ra_layout src_layout = ra_renderpass_input_layout(&u->input);
+ struct ra_layout dst_layout = u->layout;
+
+ for (int i = 0; i < u->input.dim_m; i++) {
+ ra->fns->buf_update(ra, ubo, dst, (void *)src, src_layout.stride);
+ src += src_layout.stride;
+ dst += dst_layout.stride;
}
}
@@ -491,24 +471,26 @@ static void update_uniform(struct gl_shader_cache *sc, struct sc_entry *e,
struct sc_uniform *u, int n)
{
struct sc_cached_uniform *un = &e->cached_uniforms[n];
- size_t size = ra_render_pass_input_data_size(&u->input);
- bool changed = true;
- if (size > 0)
- changed = memcmp(&un->v, &u->v, size) != 0;
+ struct ra_layout layout = ra_renderpass_input_layout(&u->input);
+ if (layout.size > 0 && memcmp(&un->v, &u->v, layout.size) == 0)
+ return;
- if (changed) {
- un->v = u->v;
+ un->v = u->v;
- if (u->ubo_length) {
- assert(e->ubo);
- update_ubo(sc->ra, e->ubo, u);
- } else {
- struct ra_renderpass_input_val value = {
- .index = un->index,
- .data = &un->v,
- };
- MP_TARRAY_APPEND(sc, sc->values, sc->num_values, value);
- }
+ switch (u->type) {
+ case SC_UNIFORM_TYPE_GLOBAL: {
+ struct ra_renderpass_input_val value = {
+ .index = un->index,
+ .data = &un->v,
+ };
+ MP_TARRAY_APPEND(sc, sc->values, sc->num_values, value);
+ break;
+ }
+ case SC_UNIFORM_TYPE_UBO:
+ assert(e->ubo);
+ update_ubo(sc->ra, e->ubo, u);
+ break;
+ default: abort();
}
}
@@ -640,9 +622,9 @@ static void add_uniforms(struct gl_shader_cache *sc, bstr *dst)
ADD(dst, "layout(std140, binding=%d) uniform UBO {\n", sc->ubo_binding);
for (int n = 0; n < sc->num_uniforms; n++) {
struct sc_uniform *u = &sc->uniforms[n];
- if (!u->ubo_length)
+ if (u->type != SC_UNIFORM_TYPE_UBO)
continue;
- ADD(dst, "layout(offset=%zu) %s %s;\n", u->ubo_offset,
+ ADD(dst, "layout(offset=%zu) %s %s;\n", u->offset,
u->glsl_type, u->input.name);
}
ADD(dst, "};\n");
@@ -650,7 +632,7 @@ static void add_uniforms(struct gl_shader_cache *sc, bstr *dst)
for (int n = 0; n < sc->num_uniforms; n++) {
struct sc_uniform *u = &sc->uniforms[n];
- if (u->ubo_length)
+ if (u->type != SC_UNIFORM_TYPE_GLOBAL)
continue;
switch (u->input.type) {
case RA_VARTYPE_INT:
@@ -875,7 +857,8 @@ static void gl_sc_generate(struct gl_shader_cache *sc,
};
for (int n = 0; n < sc->num_uniforms; n++) {
struct sc_cached_uniform u = {0};
- if (!sc->uniforms[n].ubo_length) {
+ if (sc->uniforms[n].type == SC_UNIFORM_TYPE_GLOBAL) {
+ // global uniforms need to be made visible to the ra_renderpass
u.index = sc->params.num_inputs;
MP_TARRAY_APPEND(sc, sc->params.inputs, sc->params.num_inputs,
sc->uniforms[n].input);
diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c
index b13035697c..b8fc24a52e 100644
--- a/video/out/opengl/utils.c
+++ b/video/out/opengl/utils.c
@@ -124,6 +124,48 @@ bool ra_tex_upload_pbo(struct ra *ra, struct ra_buf_pool *pbo,
return ra->fns->tex_upload(ra, &newparams);
}
+struct ra_layout std140_layout(struct ra_renderpass_input *inp)
+{
+ size_t el_size = ra_vartype_size(inp->type);
+
+ // std140 packing rules:
+ // 1. The alignment of generic values is their size in bytes
+ // 2. The alignment of vectors is the vector length * the base count, with
+ // the exception of vec3 which is always aligned like vec4
+ // 3. The alignment of arrays is that of the element size rounded up to
+ // the nearest multiple of vec4
+ // 4. Matrices are treated like arrays of vectors
+ // 5. Arrays/matrices are laid out with a stride equal to the alignment
+ size_t size = el_size * inp->dim_v;
+ if (inp->dim_v == 3)
+ size += el_size;
+ if (inp->dim_m > 1)
+ size = MP_ALIGN_UP(size, sizeof(float[4]));
+
+ return (struct ra_layout) {
+ .align = size,
+ .stride = size,
+ .size = size * inp->dim_m,
+ };
+}
+
+struct ra_layout std430_layout(struct ra_renderpass_input *inp)
+{
+ size_t el_size = ra_vartype_size(inp->type);
+
+ // std430 packing rules: like std140, except arrays/matrices are always
+ // "tightly" packed, even arrays/matrices of vec3s
+ size_t align = el_size * inp->dim_v;
+ if (inp->dim_v == 3 && inp->dim_m == 1)
+ align += el_size;
+
+ return (struct ra_layout) {
+ .align = align,
+ .stride = align,
+ .size = align * inp->dim_m,
+ };
+}
+
// Create a texture and a FBO using the texture as color attachments.
// fmt: texture internal format
// If the parameters are the same as the previous call, do not touch it.
diff --git a/video/out/opengl/utils.h b/video/out/opengl/utils.h
index 34e459f34c..7d00d26cf5 100644
--- a/video/out/opengl/utils.h
+++ b/video/out/opengl/utils.h
@@ -89,6 +89,10 @@ struct ra_buf *ra_buf_pool_get(struct ra *ra, struct ra_buf_pool *pool,
bool ra_tex_upload_pbo(struct ra *ra, struct ra_buf_pool *pbo,
const struct ra_tex_upload_params *params);
+// Layout rules for GLSL's packing modes
+struct ra_layout std140_layout(struct ra_renderpass_input *inp);
+struct ra_layout std430_layout(struct ra_renderpass_input *inp);
+
struct fbotex {
struct ra *ra;
struct ra_tex *tex;