From 0c2cb69597f52592bdb58312d9987978cb86a9d3 Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Fri, 8 Sep 2017 04:27:53 +0200 Subject: vo_opengl: generalize UBO packing/handling This is simultaneously generalized into two directions: 1. Support more sc_uniform types (needed for SC_UNIFORM_TYPE_PUSHC) 2. Support more flexible packing (needed for both PUSHC and ra_d3d11) --- video/out/opengl/ra.c | 18 ++++--- video/out/opengl/ra.h | 18 ++++++- video/out/opengl/ra_gl.c | 1 + video/out/opengl/shader_cache.c | 109 +++++++++++++++++----------------------- video/out/opengl/utils.c | 42 ++++++++++++++++ video/out/opengl/utils.h | 4 ++ 6 files changed, 122 insertions(+), 70 deletions(-) (limited to 'video/out') diff --git a/video/out/opengl/ra.c b/video/out/opengl/ra.c index 7f0053955b..208507dfa0 100644 --- a/video/out/opengl/ra.c +++ b/video/out/opengl/ra.c @@ -36,7 +36,7 @@ void ra_free(struct ra **ra) *ra = NULL; } -static size_t vartype_size(enum ra_vartype type) +size_t ra_vartype_size(enum ra_vartype type) { switch (type) { case RA_VARTYPE_INT: return sizeof(int); @@ -46,12 +46,18 @@ static size_t vartype_size(enum ra_vartype type) } } -// Return the size of the data ra_renderpass_input_val.data is going to point -// to. This returns 0 for non-primitive types such as textures. -size_t ra_render_pass_input_data_size(struct ra_renderpass_input *input) +struct ra_layout ra_renderpass_input_layout(struct ra_renderpass_input *input) { - size_t el_size = vartype_size(input->type); - return el_size * input->dim_v * input->dim_m; + size_t el_size = ra_vartype_size(input->type); + if (!el_size) + return (struct ra_layout){0}; + + // host data is always tightly packed + return (struct ra_layout) { + .align = 1, + .stride = el_size * input->dim_v, + .size = el_size * input->dim_v * input->dim_m, + }; } static struct ra_renderpass_input *dup_inputs(void *ta_parent, diff --git a/video/out/opengl/ra.h b/video/out/opengl/ra.h index 81078da41f..46a69f2ff5 100644 --- a/video/out/opengl/ra.h +++ b/video/out/opengl/ra.h @@ -186,6 +186,9 @@ enum ra_vartype { RA_VARTYPE_COUNT }; +// Returns the host size of a ra_vartype, or 0 for abstract vartypes (e.g. tex) +size_t ra_vartype_size(enum ra_vartype type); + // Represents a uniform, texture input parameter, and similar things. struct ra_renderpass_input { const char *name; // name as used in the shader @@ -204,7 +207,16 @@ struct ra_renderpass_input { int binding; }; -size_t ra_render_pass_input_data_size(struct ra_renderpass_input *input); +// Represents the layout requirements of an input value +struct ra_layout { + size_t align; // the alignment requirements (always a power of two) + size_t stride; // the delta between two rows of an array/matrix + size_t size; // the total size of the input +}; + +// Returns the host layout of a render pass input. Returns {0} for renderpass +// inputs without a corresponding host representation (e.g. textures/buffers) +struct ra_layout ra_renderpass_input_layout(struct ra_renderpass_input *input); enum ra_blend { RA_BLEND_ZERO, @@ -370,6 +382,10 @@ struct ra_fns { // NULL then all buffers are always usable. bool (*buf_poll)(struct ra *ra, struct ra_buf *buf); + // Returns the layout requirements of a uniform buffer element. Optional, + // but must be implemented if RA_CAP_BUF_RO is supported. + struct ra_layout (*uniform_layout)(struct ra_renderpass_input *inp); + // Clear the dst with the given color (rgba) and within the given scissor. // dst must have dst->params.render_dst==true. Content outside of the // scissor is preserved. diff --git a/video/out/opengl/ra_gl.c b/video/out/opengl/ra_gl.c index 401847e471..ab5c13242a 100644 --- a/video/out/opengl/ra_gl.c +++ b/video/out/opengl/ra_gl.c @@ -1122,6 +1122,7 @@ static struct ra_fns ra_fns_gl = { .buf_poll = gl_buf_poll, .clear = gl_clear, .blit = gl_blit, + .uniform_layout = std140_layout, .renderpass_create = gl_renderpass_create, .renderpass_destroy = gl_renderpass_destroy, .renderpass_run = gl_renderpass_run, diff --git a/video/out/opengl/shader_cache.c b/video/out/opengl/shader_cache.c index e702d7e4a1..fe5ca3fdbd 100644 --- a/video/out/opengl/shader_cache.c +++ b/video/out/opengl/shader_cache.c @@ -27,17 +27,20 @@ union uniform_val { struct ra_buf *buf; // RA_VARTYPE_BUF_* }; +enum sc_uniform_type { + SC_UNIFORM_TYPE_GLOBAL = 0, // global uniform (RA_CAP_GLOBAL_UNIFORM) + SC_UNIFORM_TYPE_UBO = 1, // uniform buffer (RA_CAP_BUF_RO) +}; + struct sc_uniform { + enum sc_uniform_type type; struct ra_renderpass_input input; const char *glsl_type; union uniform_val v; char *buffer_format; - // for UBO entries: these are all assumed to be arrays as far as updating - // is concerned. ("regular" values are treated like arrays of length 1) - size_t ubo_length; // number of array elements (or 0 if not using UBO) - size_t ubo_rowsize; // size of data in each array row - size_t ubo_stride; // stride of each array row - size_t ubo_offset; // byte offset within the uniform buffer + // for SC_UNIFORM_TYPE_UBO: + struct ra_layout layout; + size_t offset; // byte offset within the buffer }; struct sc_cached_uniform { @@ -254,9 +257,8 @@ static int gl_sc_next_binding(struct gl_shader_cache *sc, enum ra_vartype type) } } -// Updates the UBO metadata for the given sc_uniform. Assumes type, dim_v and -// dim_m are already set. Computes the correct alignment, size and array stride -// as per the std140 specification. +// Updates the UBO metadata for the given sc_uniform. Assumes sc_uniform->input +// is already set. Also updates sc_uniform->type. static void update_ubo_params(struct gl_shader_cache *sc, struct sc_uniform *u) { if (!(sc->ra->caps & RA_CAP_BUF_RO)) @@ -270,32 +272,10 @@ static void update_ubo_params(struct gl_shader_cache *sc, struct sc_uniform *u) if (sc->ra->glsl_version < 440) return; - size_t el_size; - switch (u->input.type) { - case RA_VARTYPE_INT: el_size = sizeof(int); break; - case RA_VARTYPE_FLOAT: el_size = sizeof(float); break; - default: abort(); - } - - u->ubo_rowsize = el_size * u->input.dim_v; - - // std140 packing rules: - // 1. The alignment of generic values is their size in bytes - // 2. The alignment of vectors is the vector length * the base count, with - // the exception of vec3 which is always aligned like vec4 - // 3. The alignment of arrays is that of the element size rounded up to - // the nearest multiple of vec4 - // 4. Matrices are treated like arrays of vectors - // 5. Arrays/matrices are laid out with a stride equal to the alignment - u->ubo_stride = u->ubo_rowsize; - if (u->input.dim_v == 3) - u->ubo_stride += el_size; - if (u->input.dim_m > 1) - u->ubo_stride = MP_ALIGN_UP(u->ubo_stride, sizeof(float[4])); - - u->ubo_offset = MP_ALIGN_UP(sc->ubo_size, u->ubo_stride); - u->ubo_length = u->input.dim_m; - sc->ubo_size = u->ubo_offset + u->ubo_stride * u->ubo_length; + u->type = SC_UNIFORM_TYPE_UBO; + u->layout = sc->ra->fns->uniform_layout(&u->input); + u->offset = MP_ALIGN_UP(sc->ubo_size, u->layout.align); + sc->ubo_size = u->offset + u->layout.size; } void gl_sc_uniform_texture(struct gl_shader_cache *sc, char *name, @@ -476,14 +456,14 @@ static const char *vao_glsl_type(const struct ra_renderpass_input *e) static void update_ubo(struct ra *ra, struct ra_buf *ubo, struct sc_uniform *u) { uintptr_t src = (uintptr_t) &u->v; - size_t dst = u->ubo_offset; - size_t src_stride = u->ubo_rowsize; - size_t dst_stride = u->ubo_stride; - - for (int i = 0; i < u->ubo_length; i++) { - ra->fns->buf_update(ra, ubo, dst, (void *)src, src_stride); - src += src_stride; - dst += dst_stride; + size_t dst = u->offset; + struct ra_layout src_layout = ra_renderpass_input_layout(&u->input); + struct ra_layout dst_layout = u->layout; + + for (int i = 0; i < u->input.dim_m; i++) { + ra->fns->buf_update(ra, ubo, dst, (void *)src, src_layout.stride); + src += src_layout.stride; + dst += dst_layout.stride; } } @@ -491,24 +471,26 @@ static void update_uniform(struct gl_shader_cache *sc, struct sc_entry *e, struct sc_uniform *u, int n) { struct sc_cached_uniform *un = &e->cached_uniforms[n]; - size_t size = ra_render_pass_input_data_size(&u->input); - bool changed = true; - if (size > 0) - changed = memcmp(&un->v, &u->v, size) != 0; + struct ra_layout layout = ra_renderpass_input_layout(&u->input); + if (layout.size > 0 && memcmp(&un->v, &u->v, layout.size) == 0) + return; - if (changed) { - un->v = u->v; + un->v = u->v; - if (u->ubo_length) { - assert(e->ubo); - update_ubo(sc->ra, e->ubo, u); - } else { - struct ra_renderpass_input_val value = { - .index = un->index, - .data = &un->v, - }; - MP_TARRAY_APPEND(sc, sc->values, sc->num_values, value); - } + switch (u->type) { + case SC_UNIFORM_TYPE_GLOBAL: { + struct ra_renderpass_input_val value = { + .index = un->index, + .data = &un->v, + }; + MP_TARRAY_APPEND(sc, sc->values, sc->num_values, value); + break; + } + case SC_UNIFORM_TYPE_UBO: + assert(e->ubo); + update_ubo(sc->ra, e->ubo, u); + break; + default: abort(); } } @@ -640,9 +622,9 @@ static void add_uniforms(struct gl_shader_cache *sc, bstr *dst) ADD(dst, "layout(std140, binding=%d) uniform UBO {\n", sc->ubo_binding); for (int n = 0; n < sc->num_uniforms; n++) { struct sc_uniform *u = &sc->uniforms[n]; - if (!u->ubo_length) + if (u->type != SC_UNIFORM_TYPE_UBO) continue; - ADD(dst, "layout(offset=%zu) %s %s;\n", u->ubo_offset, + ADD(dst, "layout(offset=%zu) %s %s;\n", u->offset, u->glsl_type, u->input.name); } ADD(dst, "};\n"); @@ -650,7 +632,7 @@ static void add_uniforms(struct gl_shader_cache *sc, bstr *dst) for (int n = 0; n < sc->num_uniforms; n++) { struct sc_uniform *u = &sc->uniforms[n]; - if (u->ubo_length) + if (u->type != SC_UNIFORM_TYPE_GLOBAL) continue; switch (u->input.type) { case RA_VARTYPE_INT: @@ -875,7 +857,8 @@ static void gl_sc_generate(struct gl_shader_cache *sc, }; for (int n = 0; n < sc->num_uniforms; n++) { struct sc_cached_uniform u = {0}; - if (!sc->uniforms[n].ubo_length) { + if (sc->uniforms[n].type == SC_UNIFORM_TYPE_GLOBAL) { + // global uniforms need to be made visible to the ra_renderpass u.index = sc->params.num_inputs; MP_TARRAY_APPEND(sc, sc->params.inputs, sc->params.num_inputs, sc->uniforms[n].input); diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c index b13035697c..b8fc24a52e 100644 --- a/video/out/opengl/utils.c +++ b/video/out/opengl/utils.c @@ -124,6 +124,48 @@ bool ra_tex_upload_pbo(struct ra *ra, struct ra_buf_pool *pbo, return ra->fns->tex_upload(ra, &newparams); } +struct ra_layout std140_layout(struct ra_renderpass_input *inp) +{ + size_t el_size = ra_vartype_size(inp->type); + + // std140 packing rules: + // 1. The alignment of generic values is their size in bytes + // 2. The alignment of vectors is the vector length * the base count, with + // the exception of vec3 which is always aligned like vec4 + // 3. The alignment of arrays is that of the element size rounded up to + // the nearest multiple of vec4 + // 4. Matrices are treated like arrays of vectors + // 5. Arrays/matrices are laid out with a stride equal to the alignment + size_t size = el_size * inp->dim_v; + if (inp->dim_v == 3) + size += el_size; + if (inp->dim_m > 1) + size = MP_ALIGN_UP(size, sizeof(float[4])); + + return (struct ra_layout) { + .align = size, + .stride = size, + .size = size * inp->dim_m, + }; +} + +struct ra_layout std430_layout(struct ra_renderpass_input *inp) +{ + size_t el_size = ra_vartype_size(inp->type); + + // std430 packing rules: like std140, except arrays/matrices are always + // "tightly" packed, even arrays/matrices of vec3s + size_t align = el_size * inp->dim_v; + if (inp->dim_v == 3 && inp->dim_m == 1) + align += el_size; + + return (struct ra_layout) { + .align = align, + .stride = align, + .size = align * inp->dim_m, + }; +} + // Create a texture and a FBO using the texture as color attachments. // fmt: texture internal format // If the parameters are the same as the previous call, do not touch it. diff --git a/video/out/opengl/utils.h b/video/out/opengl/utils.h index 34e459f34c..7d00d26cf5 100644 --- a/video/out/opengl/utils.h +++ b/video/out/opengl/utils.h @@ -89,6 +89,10 @@ struct ra_buf *ra_buf_pool_get(struct ra *ra, struct ra_buf_pool *pool, bool ra_tex_upload_pbo(struct ra *ra, struct ra_buf_pool *pbo, const struct ra_tex_upload_params *params); +// Layout rules for GLSL's packing modes +struct ra_layout std140_layout(struct ra_renderpass_input *inp); +struct ra_layout std430_layout(struct ra_renderpass_input *inp); + struct fbotex { struct ra *ra; struct ra_tex *tex; -- cgit v1.2.3