diff options
author | Niklas Haas <git@haasn.xyz> | 2017-09-14 08:04:55 +0200 |
---|---|---|
committer | Niklas Haas <git@haasn.xyz> | 2017-09-21 15:00:55 +0200 |
commit | 65979986a923a8f08019b257c3fe72cd5e8ecf68 (patch) | |
tree | b8f4b8c17d583594aef0ca509064f8b2ff7128d4 /video/out/gpu/shader_cache.c | |
parent | 20f958c9775652c3213588c2a0824f5353276adc (diff) | |
download | mpv-65979986a923a8f08019b257c3fe72cd5e8ecf68.tar.bz2 mpv-65979986a923a8f08019b257c3fe72cd5e8ecf68.tar.xz |
vo_opengl: refactor into vo_gpu
This is done in several steps:
1. refactor MPGLContext -> struct ra_ctx
2. move GL-specific stuff in vo_opengl into opengl/context.c
3. generalize context creation to support other APIs, and add --gpu-api
4. rename all of the --opengl- options that are no longer opengl-specific
5. move all of the stuff from opengl/* that isn't GL-specific into gpu/
(note: opengl/gl_utils.h became opengl/utils.h)
6. rename vo_opengl to vo_gpu
7. to handle window screenshots, the short-term approach was to just add
it to ra_swchain_fns. Long term (and for vulkan) this has to be moved to
ra itself (and vo_gpu altered to compensate), but this was a stop-gap
measure to prevent this commit from getting too big
8. move ra->fns->flush to ra_gl_ctx instead
9. some other minor changes that I've probably already forgotten
Note: This is one half of a major refactor, the other half of which is
provided by rossy's following commit. This commit enables support for
all linux platforms, while his version enables support for all non-linux
platforms.
Note 2: vo_opengl_cb.c also re-uses ra_gl_ctx so it benefits from the
--opengl- options like --opengl-early-flush, --opengl-finish etc. Should
be a strict superset of the old functionality.
Disclaimer: Since I have no way of compiling mpv on all platforms, some
of these ports were done blindly. Specifically, the blind ports included
context_mali_fbdev.c and context_rpi.c. Since they're both based on
egl_helpers, the port should have gone smoothly without any major
changes required. But if somebody complains about a compile error on
those platforms (assuming anybody actually uses them), you know where to
complain.
Diffstat (limited to 'video/out/gpu/shader_cache.c')
-rw-r--r-- | video/out/gpu/shader_cache.c | 954 |
1 files changed, 954 insertions, 0 deletions
diff --git a/video/out/gpu/shader_cache.c b/video/out/gpu/shader_cache.c new file mode 100644 index 0000000000..afda9cc036 --- /dev/null +++ b/video/out/gpu/shader_cache.c @@ -0,0 +1,954 @@ +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> +#include <assert.h> + +#include <libavutil/sha.h> +#include <libavutil/mem.h> + +#include "osdep/io.h" + +#include "common/common.h" +#include "options/path.h" +#include "stream/stream.h" +#include "shader_cache.h" +#include "utils.h" + +// Force cache flush if more than this number of shaders is created. +#define SC_MAX_ENTRIES 48 + +union uniform_val { + float f[9]; // RA_VARTYPE_FLOAT + int i[4]; // RA_VARTYPE_INT + struct ra_tex *tex; // RA_VARTYPE_TEX, RA_VARTYPE_IMG_* + struct ra_buf *buf; // RA_VARTYPE_BUF_* +}; + +enum sc_uniform_type { + SC_UNIFORM_TYPE_GLOBAL = 0, // global uniform (RA_CAP_GLOBAL_UNIFORM) + SC_UNIFORM_TYPE_UBO = 1, // uniform buffer (RA_CAP_BUF_RO) +}; + +struct sc_uniform { + enum sc_uniform_type type; + struct ra_renderpass_input input; + const char *glsl_type; + union uniform_val v; + char *buffer_format; + // for SC_UNIFORM_TYPE_UBO: + struct ra_layout layout; + size_t offset; // byte offset within the buffer +}; + +struct sc_cached_uniform { + union uniform_val v; + int index; // for ra_renderpass_input_val + bool set; // whether the uniform has ever been set +}; + +struct sc_entry { + struct ra_renderpass *pass; + struct sc_cached_uniform *cached_uniforms; + int num_cached_uniforms; + bstr total; + struct timer_pool *timer; + struct ra_buf *ubo; + int ubo_index; // for ra_renderpass_input_val.index +}; + +struct gl_shader_cache { + struct ra *ra; + struct mp_log *log; + + // permanent + char **exts; + int num_exts; + + // this is modified during use (gl_sc_add() etc.) and reset for each shader + bstr prelude_text; + bstr header_text; + bstr text; + + // Next binding point (texture unit, image unit, buffer binding, etc.) + // In OpenGL these are separate for each input type + int next_binding[RA_VARTYPE_COUNT]; + + struct ra_renderpass_params params; + + struct sc_entry **entries; + int num_entries; + + struct sc_entry *current_shader; // set by gl_sc_generate() + + struct sc_uniform *uniforms; + int num_uniforms; + + int ubo_binding; + size_t ubo_size; + + struct ra_renderpass_input_val *values; + int num_values; + + // For checking that the user is calling gl_sc_reset() properly. + bool needs_reset; + + bool error_state; // true if an error occurred + + // temporary buffers (avoids frequent reallocations) + bstr tmp[6]; + + // For the disk-cache. + char *cache_dir; + struct mpv_global *global; // can be NULL +}; + +static void gl_sc_reset(struct gl_shader_cache *sc); + +struct gl_shader_cache *gl_sc_create(struct ra *ra, struct mpv_global *global, + struct mp_log *log) +{ + struct gl_shader_cache *sc = talloc_ptrtype(NULL, sc); + *sc = (struct gl_shader_cache){ + .ra = ra, + .global = global, + .log = log, + }; + gl_sc_reset(sc); + return sc; +} + +// Reset the previous pass. This must be called after gl_sc_generate and before +// starting a new shader. +static void gl_sc_reset(struct gl_shader_cache *sc) +{ + sc->prelude_text.len = 0; + sc->header_text.len = 0; + sc->text.len = 0; + for (int n = 0; n < sc->num_uniforms; n++) + talloc_free((void *)sc->uniforms[n].input.name); + sc->num_uniforms = 0; + sc->ubo_binding = 0; + sc->ubo_size = 0; + for (int i = 0; i < RA_VARTYPE_COUNT; i++) + sc->next_binding[i] = 0; + sc->current_shader = NULL; + sc->params = (struct ra_renderpass_params){0}; + sc->needs_reset = false; +} + +static void sc_flush_cache(struct gl_shader_cache *sc) +{ + MP_VERBOSE(sc, "flushing shader cache\n"); + + for (int n = 0; n < sc->num_entries; n++) { + struct sc_entry *e = sc->entries[n]; + ra_buf_free(sc->ra, &e->ubo); + if (e->pass) + sc->ra->fns->renderpass_destroy(sc->ra, e->pass); + timer_pool_destroy(e->timer); + talloc_free(e); + } + sc->num_entries = 0; +} + +void gl_sc_destroy(struct gl_shader_cache *sc) +{ + if (!sc) + return; + gl_sc_reset(sc); + sc_flush_cache(sc); + talloc_free(sc); +} + +bool gl_sc_error_state(struct gl_shader_cache *sc) +{ + return sc->error_state; +} + +void gl_sc_reset_error(struct gl_shader_cache *sc) +{ + sc->error_state = false; +} + +void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name) +{ + for (int n = 0; n < sc->num_exts; n++) { + if (strcmp(sc->exts[n], name) == 0) + return; + } + MP_TARRAY_APPEND(sc, sc->exts, sc->num_exts, talloc_strdup(sc, name)); +} + +#define bstr_xappend0(sc, b, s) bstr_xappend(sc, b, bstr0(s)) + +void gl_sc_add(struct gl_shader_cache *sc, const char *text) +{ + bstr_xappend0(sc, &sc->text, text); +} + +void gl_sc_addf(struct gl_shader_cache *sc, const char *textf, ...) +{ + va_list ap; + va_start(ap, textf); + bstr_xappend_vasprintf(sc, &sc->text, textf, ap); + va_end(ap); +} + +void gl_sc_hadd(struct gl_shader_cache *sc, const char *text) +{ + bstr_xappend0(sc, &sc->header_text, text); +} + +void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...) +{ + va_list ap; + va_start(ap, textf); + bstr_xappend_vasprintf(sc, &sc->header_text, textf, ap); + va_end(ap); +} + +void gl_sc_hadd_bstr(struct gl_shader_cache *sc, struct bstr text) +{ + bstr_xappend(sc, &sc->header_text, text); +} + +void gl_sc_paddf(struct gl_shader_cache *sc, const char *textf, ...) +{ + va_list ap; + va_start(ap, textf); + bstr_xappend_vasprintf(sc, &sc->prelude_text, textf, ap); + va_end(ap); +} + +static struct sc_uniform *find_uniform(struct gl_shader_cache *sc, + const char *name) +{ + struct sc_uniform new = { + .input = { + .dim_v = 1, + .dim_m = 1, + }, + }; + + for (int n = 0; n < sc->num_uniforms; n++) { + struct sc_uniform *u = &sc->uniforms[n]; + if (strcmp(u->input.name, name) == 0) { + const char *allocname = u->input.name; + *u = new; + u->input.name = allocname; + return u; + } + } + + // not found -> add it + new.input.name = talloc_strdup(NULL, name); + MP_TARRAY_APPEND(sc, sc->uniforms, sc->num_uniforms, new); + return &sc->uniforms[sc->num_uniforms - 1]; +} + +static int gl_sc_next_binding(struct gl_shader_cache *sc, enum ra_vartype type) +{ + if (sc->ra->caps & RA_CAP_SHARED_BINDING) { + return sc->next_binding[type]++; + } else { + return sc->next_binding[0]++; + } +} + +// Updates the UBO metadata for the given sc_uniform. Assumes sc_uniform->input +// is already set. Also updates sc_uniform->type. +static void update_ubo_params(struct gl_shader_cache *sc, struct sc_uniform *u) +{ + if (!(sc->ra->caps & RA_CAP_BUF_RO)) + return; + + // Using UBOs with explicit layout(offset) like we do requires GLSL version + // 440 or higher. In theory the UBO code can also use older versions, but + // just try and avoid potential headaches. This also ensures they're only + // used on drivers that are probably modern enough to actually support them + // correctly. + if (sc->ra->glsl_version < 440) + return; + + u->type = SC_UNIFORM_TYPE_UBO; + u->layout = sc->ra->fns->uniform_layout(&u->input); + u->offset = MP_ALIGN_UP(sc->ubo_size, u->layout.align); + sc->ubo_size = u->offset + u->layout.size; +} + +void gl_sc_uniform_texture(struct gl_shader_cache *sc, char *name, + struct ra_tex *tex) +{ + const char *glsl_type = "sampler2D"; + if (tex->params.dimensions == 1) { + glsl_type = "sampler1D"; + } else if (tex->params.dimensions == 3) { + glsl_type = "sampler3D"; + } else if (tex->params.non_normalized) { + glsl_type = "sampler2DRect"; + } else if (tex->params.external_oes) { + glsl_type = "samplerExternalOES"; + } else if (tex->params.format->ctype == RA_CTYPE_UINT) { + glsl_type = sc->ra->glsl_es ? "highp usampler2D" : "usampler2D"; + } + + struct sc_uniform *u = find_uniform(sc, name); + u->input.type = RA_VARTYPE_TEX; + u->glsl_type = glsl_type; + u->input.binding = gl_sc_next_binding(sc, u->input.type); + u->v.tex = tex; +} + +void gl_sc_uniform_image2D_wo(struct gl_shader_cache *sc, const char *name, + struct ra_tex *tex) +{ + gl_sc_enable_extension(sc, "GL_ARB_shader_image_load_store"); + + struct sc_uniform *u = find_uniform(sc, name); + u->input.type = RA_VARTYPE_IMG_W; + u->glsl_type = "writeonly image2D"; + u->input.binding = gl_sc_next_binding(sc, u->input.type); + u->v.tex = tex; +} + +void gl_sc_ssbo(struct gl_shader_cache *sc, char *name, struct ra_buf *buf, + char *format, ...) +{ + assert(sc->ra->caps & RA_CAP_BUF_RW); + gl_sc_enable_extension(sc, "GL_ARB_shader_storage_buffer_object"); + + struct sc_uniform *u = find_uniform(sc, name); + u->input.type = RA_VARTYPE_BUF_RW; + u->glsl_type = ""; + u->input.binding = gl_sc_next_binding(sc, u->input.type); + u->v.buf = buf; + + va_list ap; + va_start(ap, format); + u->buffer_format = ta_vasprintf(sc, format, ap); + va_end(ap); +} + +void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, float f) +{ + struct sc_uniform *u = find_uniform(sc, name); + u->input.type = RA_VARTYPE_FLOAT; + u->glsl_type = "float"; + update_ubo_params(sc, u); + u->v.f[0] = f; +} + +void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, int i) +{ + struct sc_uniform *u = find_uniform(sc, name); + u->input.type = RA_VARTYPE_INT; + u->glsl_type = "int"; + update_ubo_params(sc, u); + u->v.i[0] = i; +} + +void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, float f[2]) +{ + struct sc_uniform *u = find_uniform(sc, name); + u->input.type = RA_VARTYPE_FLOAT; + u->input.dim_v = 2; + u->glsl_type = "vec2"; + update_ubo_params(sc, u); + u->v.f[0] = f[0]; + u->v.f[1] = f[1]; +} + +void gl_sc_uniform_vec3(struct gl_shader_cache *sc, char *name, float f[3]) +{ + struct sc_uniform *u = find_uniform(sc, name); + u->input.type = RA_VARTYPE_FLOAT; + u->input.dim_v = 3; + u->glsl_type = "vec3"; + update_ubo_params(sc, u); + u->v.f[0] = f[0]; + u->v.f[1] = f[1]; + u->v.f[2] = f[2]; +} + +static void transpose2x2(float r[2 * 2]) +{ + MPSWAP(float, r[0+2*1], r[1+2*0]); +} + +void gl_sc_uniform_mat2(struct gl_shader_cache *sc, char *name, + bool transpose, float *v) +{ + struct sc_uniform *u = find_uniform(sc, name); + u->input.type = RA_VARTYPE_FLOAT; + u->input.dim_v = 2; + u->input.dim_m = 2; + u->glsl_type = "mat2"; + update_ubo_params(sc, u); + for (int n = 0; n < 4; n++) + u->v.f[n] = v[n]; + if (transpose) + transpose2x2(&u->v.f[0]); +} + +static void transpose3x3(float r[3 * 3]) +{ + MPSWAP(float, r[0+3*1], r[1+3*0]); + MPSWAP(float, r[0+3*2], r[2+3*0]); + MPSWAP(float, r[1+3*2], r[2+3*1]); +} + +void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name, + bool transpose, float *v) +{ + struct sc_uniform *u = find_uniform(sc, name); + u->input.type = RA_VARTYPE_FLOAT; + u->input.dim_v = 3; + u->input.dim_m = 3; + u->glsl_type = "mat3"; + update_ubo_params(sc, u); + for (int n = 0; n < 9; n++) + u->v.f[n] = v[n]; + if (transpose) + transpose3x3(&u->v.f[0]); +} + +// Tell the shader generator (and later gl_sc_draw_data()) about the vertex +// data layout and attribute names. The entries array is terminated with a {0} +// entry. The array memory must remain valid indefinitely (for now). +void gl_sc_set_vertex_format(struct gl_shader_cache *sc, + const struct ra_renderpass_input *entries, + int vertex_stride) +{ + sc->params.vertex_attribs = (struct ra_renderpass_input *)entries; + sc->params.num_vertex_attribs = 0; + while (entries[sc->params.num_vertex_attribs].name) + sc->params.num_vertex_attribs++; + sc->params.vertex_stride = vertex_stride; +} + +void gl_sc_blend(struct gl_shader_cache *sc, + enum ra_blend blend_src_rgb, + enum ra_blend blend_dst_rgb, + enum ra_blend blend_src_alpha, + enum ra_blend blend_dst_alpha) +{ + sc->params.enable_blend = true; + sc->params.blend_src_rgb = blend_src_rgb; + sc->params.blend_dst_rgb = blend_dst_rgb; + sc->params.blend_src_alpha = blend_src_alpha; + sc->params.blend_dst_alpha = blend_dst_alpha; +} + +static const char *vao_glsl_type(const struct ra_renderpass_input *e) +{ + // pretty dumb... too dumb, but works for us + switch (e->dim_v) { + case 1: return "float"; + case 2: return "vec2"; + case 3: return "vec3"; + case 4: return "vec4"; + default: abort(); + } +} + +static void update_ubo(struct ra *ra, struct ra_buf *ubo, struct sc_uniform *u) +{ + uintptr_t src = (uintptr_t) &u->v; + size_t dst = u->offset; + struct ra_layout src_layout = ra_renderpass_input_layout(&u->input); + struct ra_layout dst_layout = u->layout; + + for (int i = 0; i < u->input.dim_m; i++) { + ra->fns->buf_update(ra, ubo, dst, (void *)src, src_layout.stride); + src += src_layout.stride; + dst += dst_layout.stride; + } +} + +static void update_uniform(struct gl_shader_cache *sc, struct sc_entry *e, + struct sc_uniform *u, int n) +{ + struct sc_cached_uniform *un = &e->cached_uniforms[n]; + struct ra_layout layout = ra_renderpass_input_layout(&u->input); + if (layout.size > 0 && un->set && memcmp(&un->v, &u->v, layout.size) == 0) + return; + + un->v = u->v; + un->set = true; + + switch (u->type) { + case SC_UNIFORM_TYPE_GLOBAL: { + struct ra_renderpass_input_val value = { + .index = un->index, + .data = &un->v, + }; + MP_TARRAY_APPEND(sc, sc->values, sc->num_values, value); + break; + } + case SC_UNIFORM_TYPE_UBO: + assert(e->ubo); + update_ubo(sc->ra, e->ubo, u); + break; + default: abort(); + } +} + +void gl_sc_set_cache_dir(struct gl_shader_cache *sc, const char *dir) +{ + talloc_free(sc->cache_dir); + sc->cache_dir = talloc_strdup(sc, dir); +} + +static bool create_pass(struct gl_shader_cache *sc, struct sc_entry *entry) +{ + bool ret = false; + + void *tmp = talloc_new(NULL); + struct ra_renderpass_params params = sc->params; + + MP_VERBOSE(sc, "new shader program:\n"); + if (sc->header_text.len) { + MP_VERBOSE(sc, "header:\n"); + mp_log_source(sc->log, MSGL_V, sc->header_text.start); + MP_VERBOSE(sc, "body:\n"); + } + if (sc->text.len) + mp_log_source(sc->log, MSGL_V, sc->text.start); + + // The vertex shader uses mangled names for the vertex attributes, so that + // the fragment shader can use the "real" names. But the shader is expecting + // the vertex attribute names (at least with older GLSL targets for GL). + params.vertex_attribs = talloc_memdup(tmp, params.vertex_attribs, + params.num_vertex_attribs * sizeof(params.vertex_attribs[0])); + for (int n = 0; n < params.num_vertex_attribs; n++) { + struct ra_renderpass_input *attrib = ¶ms.vertex_attribs[n]; + attrib->name = talloc_asprintf(tmp, "vertex_%s", attrib->name); + } + + const char *cache_header = "mpv shader cache v1\n"; + char *cache_filename = NULL; + char *cache_dir = NULL; + + if (sc->cache_dir && sc->cache_dir[0]) { + // Try to load it from a disk cache. + cache_dir = mp_get_user_path(tmp, sc->global, sc->cache_dir); + + struct AVSHA *sha = av_sha_alloc(); + if (!sha) + abort(); + av_sha_init(sha, 256); + av_sha_update(sha, entry->total.start, entry->total.len); + + uint8_t hash[256 / 8]; + av_sha_final(sha, hash); + av_free(sha); + + char hashstr[256 / 8 * 2 + 1]; + for (int n = 0; n < 256 / 8; n++) + snprintf(hashstr + n * 2, sizeof(hashstr) - n * 2, "%02X", hash[n]); + + cache_filename = mp_path_join(tmp, cache_dir, hashstr); + if (stat(cache_filename, &(struct stat){0}) == 0) { + MP_VERBOSE(sc, "Trying to load shader from disk...\n"); + struct bstr cachedata = + stream_read_file(cache_filename, tmp, sc->global, 1000000000); + if (bstr_eatstart0(&cachedata, cache_header)) + params.cached_program = cachedata; + } + } + + // If using a UBO, also make sure to add it as an input value so the RA + // can see it + if (sc->ubo_size) { + entry->ubo_index = sc->params.num_inputs; + struct ra_renderpass_input ubo_input = { + .name = "UBO", + .type = RA_VARTYPE_BUF_RO, + .dim_v = 1, + .dim_m = 1, + .binding = sc->ubo_binding, + }; + MP_TARRAY_APPEND(sc, params.inputs, params.num_inputs, ubo_input); + } + + entry->pass = sc->ra->fns->renderpass_create(sc->ra, ¶ms); + if (!entry->pass) + goto error; + + if (sc->ubo_size) { + struct ra_buf_params ubo_params = { + .type = RA_BUF_TYPE_UNIFORM, + .size = sc->ubo_size, + .host_mutable = true, + }; + + entry->ubo = ra_buf_create(sc->ra, &ubo_params); + if (!entry->ubo) { + MP_ERR(sc, "Failed creating uniform buffer!\n"); + goto error; + } + } + + if (entry->pass && cache_filename) { + bstr nc = entry->pass->params.cached_program; + if (nc.len && !bstr_equals(params.cached_program, nc)) { + mp_mkdirp(cache_dir); + + MP_VERBOSE(sc, "Writing shader cache file: %s\n", cache_filename); + FILE *out = fopen(cache_filename, "wb"); + if (out) { + fwrite(cache_header, strlen(cache_header), 1, out); + fwrite(nc.start, nc.len, 1, out); + fclose(out); + } + } + } + + ret = true; + +error: + talloc_free(tmp); + return ret; +} + +#define ADD(x, ...) bstr_xappend_asprintf(sc, (x), __VA_ARGS__) +#define ADD_BSTR(x, s) bstr_xappend(sc, (x), (s)) + +static void add_uniforms(struct gl_shader_cache *sc, bstr *dst) +{ + // Add all of the UBO entries separately as members of their own buffer + if (sc->ubo_size > 0) { + ADD(dst, "layout(std140, binding=%d) uniform UBO {\n", sc->ubo_binding); + for (int n = 0; n < sc->num_uniforms; n++) { + struct sc_uniform *u = &sc->uniforms[n]; + if (u->type != SC_UNIFORM_TYPE_UBO) + continue; + ADD(dst, "layout(offset=%zu) %s %s;\n", u->offset, + u->glsl_type, u->input.name); + } + ADD(dst, "};\n"); + } + + for (int n = 0; n < sc->num_uniforms; n++) { + struct sc_uniform *u = &sc->uniforms[n]; + if (u->type != SC_UNIFORM_TYPE_GLOBAL) + continue; + switch (u->input.type) { + case RA_VARTYPE_INT: + case RA_VARTYPE_FLOAT: + assert(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM); + // fall through + case RA_VARTYPE_TEX: + case RA_VARTYPE_IMG_W: + // Vulkan requires explicitly assigning the bindings in the shader + // source. For OpenGL it's optional, but requires higher GL version + // so we don't do it (and instead have ra_gl update the bindings + // after program creation). + if (sc->ra->glsl_vulkan) + ADD(dst, "layout(binding=%d) ", u->input.binding); + ADD(dst, "uniform %s %s;\n", u->glsl_type, u->input.name); + break; + case RA_VARTYPE_BUF_RO: + ADD(dst, "layout(std140, binding=%d) uniform %s { %s };\n", + u->input.binding, u->input.name, u->buffer_format); + break; + case RA_VARTYPE_BUF_RW: + ADD(dst, "layout(std430, binding=%d) buffer %s { %s };\n", + u->input.binding, u->input.name, u->buffer_format); + break; + } + } +} + +// 1. Generate vertex and fragment shaders from the fragment shader text added +// with gl_sc_add(). The generated shader program is cached (based on the +// text), so actual compilation happens only the first time. +// 2. Update the uniforms and textures set with gl_sc_uniform_*. +// 3. Make the new shader program current (glUseProgram()). +// After that, you render, and then you call gc_sc_reset(), which does: +// 1. Unbind the program and all textures. +// 2. Reset the sc state and prepare for a new shader program. (All uniforms +// and fragment operations needed for the next program have to be re-added.) +static void gl_sc_generate(struct gl_shader_cache *sc, + enum ra_renderpass_type type, + const struct ra_format *target_format) +{ + int glsl_version = sc->ra->glsl_version; + int glsl_es = sc->ra->glsl_es ? glsl_version : 0; + + sc->params.type = type; + + // gl_sc_reset() must be called after ending the previous render process, + // and before starting a new one. + assert(!sc->needs_reset); + sc->needs_reset = true; + + // gl_sc_set_vertex_format() must always be called + assert(sc->params.vertex_attribs); + + // If using a UBO, pick a binding (needed for shader generation) + if (sc->ubo_size) + sc->ubo_binding = gl_sc_next_binding(sc, RA_VARTYPE_BUF_RO); + + for (int n = 0; n < MP_ARRAY_SIZE(sc->tmp); n++) + sc->tmp[n].len = 0; + + // set up shader text (header + uniforms + body) + bstr *header = &sc->tmp[0]; + ADD(header, "#version %d%s\n", glsl_version, glsl_es >= 300 ? " es" : ""); + if (type == RA_RENDERPASS_TYPE_COMPUTE) { + // This extension cannot be enabled in fragment shader. Enable it as + // an exception for compute shader. + ADD(header, "#extension GL_ARB_compute_shader : enable\n"); + } + for (int n = 0; n < sc->num_exts; n++) + ADD(header, "#extension %s : enable\n", sc->exts[n]); + if (glsl_es) { + ADD(header, "precision mediump float;\n"); + ADD(header, "precision mediump sampler2D;\n"); + if (sc->ra->caps & RA_CAP_TEX_3D) + ADD(header, "precision mediump sampler3D;\n"); + } + + if (glsl_version >= 130) { + ADD(header, "#define tex1D texture\n"); + ADD(header, "#define tex3D texture\n"); + } else { + ADD(header, "#define tex1D texture1D\n"); + ADD(header, "#define tex3D texture3D\n"); + ADD(header, "#define texture texture2D\n"); + } + + if (sc->ra->glsl_vulkan && type == RA_RENDERPASS_TYPE_COMPUTE) { + ADD(header, "#define gl_GlobalInvocationIndex " + "(gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID)\n"); + } + + // Additional helpers. + ADD(header, "#define LUT_POS(x, lut_size)" + " mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x))\n"); + + char *vert_in = glsl_version >= 130 ? "in" : "attribute"; + char *vert_out = glsl_version >= 130 ? "out" : "varying"; + char *frag_in = glsl_version >= 130 ? "in" : "varying"; + + struct bstr *vert = NULL, *frag = NULL, *comp = NULL; + + if (type == RA_RENDERPASS_TYPE_RASTER) { + // vertex shader: we don't use the vertex shader, so just setup a + // dummy, which passes through the vertex array attributes. + bstr *vert_head = &sc->tmp[1]; + ADD_BSTR(vert_head, *header); + bstr *vert_body = &sc->tmp[2]; + ADD(vert_body, "void main() {\n"); + bstr *frag_vaos = &sc->tmp[3]; + for (int n = 0; n < sc->params.num_vertex_attribs; n++) { + const struct ra_renderpass_input *e = &sc->params.vertex_attribs[n]; + const char *glsl_type = vao_glsl_type(e); + char loc[32] = {0}; + if (sc->ra->glsl_vulkan) + snprintf(loc, sizeof(loc), "layout(location=%d) ", n); + if (strcmp(e->name, "position") == 0) { + // setting raster pos. requires setting gl_Position magic variable + assert(e->dim_v == 2 && e->type == RA_VARTYPE_FLOAT); + ADD(vert_head, "%s%s vec2 vertex_position;\n", loc, vert_in); + ADD(vert_body, "gl_Position = vec4(vertex_position, 1.0, 1.0);\n"); + } else { + ADD(vert_head, "%s%s %s vertex_%s;\n", loc, vert_in, glsl_type, e->name); + ADD(vert_head, "%s%s %s %s;\n", loc, vert_out, glsl_type, e->name); + ADD(vert_body, "%s = vertex_%s;\n", e->name, e->name); + ADD(frag_vaos, "%s%s %s %s;\n", loc, frag_in, glsl_type, e->name); + } + } + ADD(vert_body, "}\n"); + vert = vert_head; + ADD_BSTR(vert, *vert_body); + + // fragment shader; still requires adding used uniforms and VAO elements + frag = &sc->tmp[4]; + ADD_BSTR(frag, *header); + if (glsl_version >= 130) { + ADD(frag, "%sout vec4 out_color;\n", + sc->ra->glsl_vulkan ? "layout(location=0) " : ""); + } + ADD_BSTR(frag, *frag_vaos); + add_uniforms(sc, frag); + + ADD_BSTR(frag, sc->prelude_text); + ADD_BSTR(frag, sc->header_text); + + ADD(frag, "void main() {\n"); + // we require _all_ frag shaders to write to a "vec4 color" + ADD(frag, "vec4 color = vec4(0.0, 0.0, 0.0, 1.0);\n"); + ADD_BSTR(frag, sc->text); + if (glsl_version >= 130) { + ADD(frag, "out_color = color;\n"); + } else { + ADD(frag, "gl_FragColor = color;\n"); + } + ADD(frag, "}\n"); + + // We need to fix the format of the render dst at renderpass creation + // time + assert(target_format); + sc->params.target_format = target_format; + } + + if (type == RA_RENDERPASS_TYPE_COMPUTE) { + comp = &sc->tmp[4]; + ADD_BSTR(comp, *header); + + add_uniforms(sc, comp); + + ADD_BSTR(comp, sc->prelude_text); + ADD_BSTR(comp, sc->header_text); + + ADD(comp, "void main() {\n"); + ADD(comp, "vec4 color = vec4(0.0, 0.0, 0.0, 1.0);\n"); // convenience + ADD_BSTR(comp, sc->text); + ADD(comp, "}\n"); + } + + bstr *hash_total = &sc->tmp[5]; + + ADD(hash_total, "type %d\n", sc->params.type); + + if (frag) { + ADD_BSTR(hash_total, *frag); + sc->params.frag_shader = frag->start; + } + ADD(hash_total, "\n"); + if (vert) { + ADD_BSTR(hash_total, *vert); + sc->params.vertex_shader = vert->start; + } + ADD(hash_total, "\n"); + if (comp) { + ADD_BSTR(hash_total, *comp); + sc->params.compute_shader = comp->start; + } + ADD(hash_total, "\n"); + + if (sc->params.enable_blend) { + ADD(hash_total, "blend %d %d %d %d\n", + sc->params.blend_src_rgb, sc->params.blend_dst_rgb, + sc->params.blend_src_alpha, sc->params.blend_dst_alpha); + } + + if (sc->params.target_format) + ADD(hash_total, "format %s\n", sc->params.target_format->name); + + struct sc_entry *entry = NULL; + for (int n = 0; n < sc->num_entries; n++) { + struct sc_entry *cur = sc->entries[n]; + if (bstr_equals(cur->total, *hash_total)) { + entry = cur; + break; + } + } + if (!entry) { + if (sc->num_entries == SC_MAX_ENTRIES) + sc_flush_cache(sc); + entry = talloc_ptrtype(NULL, entry); + *entry = (struct sc_entry){ + .total = bstrdup(entry, *hash_total), + .timer = timer_pool_create(sc->ra), + }; + for (int n = 0; n < sc->num_uniforms; n++) { + struct sc_cached_uniform u = {0}; + if (sc->uniforms[n].type == SC_UNIFORM_TYPE_GLOBAL) { + // global uniforms need to be made visible to the ra_renderpass + u.index = sc->params.num_inputs; + MP_TARRAY_APPEND(sc, sc->params.inputs, sc->params.num_inputs, + sc->uniforms[n].input); + } + MP_TARRAY_APPEND(entry, entry->cached_uniforms, + entry->num_cached_uniforms, u); + } + if (!create_pass(sc, entry)) + sc->error_state = true; + MP_TARRAY_APPEND(sc, sc->entries, sc->num_entries, entry); + } + if (sc->error_state) + return; + + assert(sc->num_uniforms == entry->num_cached_uniforms); + + sc->num_values = 0; + for (int n = 0; n < sc->num_uniforms; n++) + update_uniform(sc, entry, &sc->uniforms[n], n); + + // If we're using a UBO, make sure to bind it as well + if (sc->ubo_size) { + struct ra_renderpass_input_val ubo_val = { + .index = entry->ubo_index, + .data = &entry->ubo, + }; + MP_TARRAY_APPEND(sc, sc->values, sc->num_values, ubo_val); + } + + sc->current_shader = entry; +} + +struct mp_pass_perf gl_sc_dispatch_draw(struct gl_shader_cache *sc, + struct ra_tex *target, + void *ptr, size_t num) +{ + struct timer_pool *timer = NULL; + + gl_sc_generate(sc, RA_RENDERPASS_TYPE_RASTER, target->params.format); + if (!sc->current_shader) + goto error; + + timer = sc->current_shader->timer; + + struct mp_rect full_rc = {0, 0, target->params.w, target->params.h}; + + struct ra_renderpass_run_params run = { + .pass = sc->current_shader->pass, + .values = sc->values, + .num_values = sc->num_values, + .target = target, + .vertex_data = ptr, + .vertex_count = num, + .viewport = full_rc, + .scissors = full_rc, + }; + + timer_pool_start(timer); + sc->ra->fns->renderpass_run(sc->ra, &run); + timer_pool_stop(timer); + +error: + gl_sc_reset(sc); + return timer_pool_measure(timer); +} + +struct mp_pass_perf gl_sc_dispatch_compute(struct gl_shader_cache *sc, + int w, int h, int d) +{ + struct timer_pool *timer = NULL; + + gl_sc_generate(sc, RA_RENDERPASS_TYPE_COMPUTE, NULL); + if (!sc->current_shader) + goto error; + + timer = sc->current_shader->timer; + + struct ra_renderpass_run_params run = { + .pass = sc->current_shader->pass, + .values = sc->values, + .num_values = sc->num_values, + .compute_groups = {w, h, d}, + }; + + timer_pool_start(timer); + sc->ra->fns->renderpass_run(sc->ra, &run); + timer_pool_stop(timer); + +error: + gl_sc_reset(sc); + return timer_pool_measure(timer); +} |