From 47af509e1fad3e9ce30e3e339d8cd705fcd44ef2 Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Sun, 17 Sep 2017 10:55:43 +0200 Subject: vo_gpu: attempt to avoid UBOs for dynamic variables This makes the radeon driver shut up about frequently updating STATIC_DRAW UBOs (--opengl-debug), and also reduces the amount of synchronization necessary for vulkan uniform buffers. Also add some extra debugging/tracing code paths. I went with a flags-based approach in case we ever want to extend this. --- video/out/gpu/shader_cache.c | 24 ++++++++++++++++++++++-- video/out/gpu/shader_cache.h | 4 ++++ video/out/gpu/video.c | 5 +++++ video/out/gpu/video_shaders.c | 1 + 4 files changed, 32 insertions(+), 2 deletions(-) (limited to 'video') diff --git a/video/out/gpu/shader_cache.c b/video/out/gpu/shader_cache.c index ff386546d3..4e9acc9e3e 100644 --- a/video/out/gpu/shader_cache.c +++ b/video/out/gpu/shader_cache.c @@ -76,6 +76,7 @@ struct gl_shader_cache { // Next binding point (texture unit, image unit, buffer binding, etc.) // In OpenGL these are separate for each input type int next_binding[RA_VARTYPE_COUNT]; + bool next_uniform_dynamic; struct ra_renderpass_params params; @@ -135,6 +136,7 @@ void gl_sc_reset(struct gl_shader_cache *sc) sc->pushc_size = 0; for (int i = 0; i < RA_VARTYPE_COUNT; i++) sc->next_binding[i] = 0; + sc->next_uniform_dynamic = false; sc->current_shader = NULL; sc->params = (struct ra_renderpass_params){0}; sc->needs_reset = false; @@ -259,14 +261,22 @@ static int gl_sc_next_binding(struct gl_shader_cache *sc, enum ra_vartype type) } } +void gl_sc_uniform_dynamic(struct gl_shader_cache *sc) +{ + sc->next_uniform_dynamic = true; +} + // Updates the metadata for the given sc_uniform. Assumes sc_uniform->input // and glsl_type/buffer_format are already set. static void update_uniform_params(struct gl_shader_cache *sc, struct sc_uniform *u) { + bool dynamic = sc->next_uniform_dynamic; + sc->next_uniform_dynamic = false; + // Try not using push constants for "large" values like matrices, since // this is likely to both exceed the VGPR budget as well as the pushc size // budget - bool try_pushc = u->input.dim_m == 1; + bool try_pushc = u->input.dim_m == 1 || dynamic; // Attempt using push constants first if (try_pushc && sc->ra->glsl_vulkan && sc->ra->max_pushc_size) { @@ -287,7 +297,10 @@ static void update_uniform_params(struct gl_shader_cache *sc, struct sc_uniform // to explicit offsets on UBO entries. In theory we could leave away // the offsets and support UBOs for older GL as well, but this is a nice // safety net for driver bugs (and also rules out potentially buggy drivers) - if (sc->ra->glsl_version >= 440 && (sc->ra->caps & RA_CAP_BUF_RO)) { + // Also avoid UBOs for highly dynamic stuff since that requires synchronizing + // the UBO writes every frame + bool try_ubo = !(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM) || !dynamic; + if (try_ubo && sc->ra->glsl_version >= 440 && (sc->ra->caps & RA_CAP_BUF_RO)) { u->type = SC_UNIFORM_TYPE_UBO; u->layout = sc->ra->fns->uniform_layout(&u->input); u->offset = MP_ALIGN_UP(sc->ubo_size, u->layout.align); @@ -514,6 +527,13 @@ static void update_uniform(struct gl_shader_cache *sc, struct sc_entry *e, un->v = u->v; un->set = true; + static const char *desc[] = { + [SC_UNIFORM_TYPE_UBO] = "UBO", + [SC_UNIFORM_TYPE_PUSHC] = "PC", + [SC_UNIFORM_TYPE_GLOBAL] = "global", + }; + MP_TRACE(sc, "Updating %s uniform '%s'\n", desc[u->type], u->input.name); + switch (u->type) { case SC_UNIFORM_TYPE_GLOBAL: { struct ra_renderpass_input_val value = { diff --git a/video/out/gpu/shader_cache.h b/video/out/gpu/shader_cache.h index 377293391c..d64bb3a0f4 100644 --- a/video/out/gpu/shader_cache.h +++ b/video/out/gpu/shader_cache.h @@ -25,6 +25,10 @@ void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...) void gl_sc_hadd_bstr(struct gl_shader_cache *sc, struct bstr text); void gl_sc_paddf(struct gl_shader_cache *sc, const char *textf, ...) PRINTF_ATTRIBUTE(2, 3); + +// A hint that the next data-type (i.e. non-binding) uniform is expected to +// change frequently. This refers to the _f, _i, _vecN etc. uniform types. +void gl_sc_uniform_dynamic(struct gl_shader_cache *sc); void gl_sc_uniform_texture(struct gl_shader_cache *sc, char *name, struct ra_tex *tex); void gl_sc_uniform_image2D_wo(struct gl_shader_cache *sc, const char *name, diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c index 4cc6114c52..dd3ad709cc 100644 --- a/video/out/gpu/video.c +++ b/video/out/gpu/video.c @@ -1500,7 +1500,9 @@ found: static void load_shader(struct gl_video *p, struct bstr body) { gl_sc_hadd_bstr(p->sc, body); + gl_sc_uniform_dynamic(p->sc); gl_sc_uniform_f(p->sc, "random", (double)av_lfg_get(&p->lfg) / UINT32_MAX); + gl_sc_uniform_dynamic(p->sc); gl_sc_uniform_i(p->sc, "frame", p->frames_uploaded); gl_sc_uniform_vec2(p->sc, "input_size", (float[]){(p->src_rect.x1 - p->src_rect.x0) * @@ -2579,6 +2581,7 @@ static void pass_dither(struct gl_video *p) float matrix[2][2] = {{cos(r), -sin(r) }, {sin(r) * m, cos(r) * m}}; + gl_sc_uniform_dynamic(p->sc); gl_sc_uniform_mat2(p->sc, "dither_trafo", true, &matrix[0][0]); GLSL(dither_pos = dither_trafo * dither_pos;) @@ -2957,11 +2960,13 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, // Blend the frames together if (oversample || linear) { + gl_sc_uniform_dynamic(p->sc); gl_sc_uniform_f(p->sc, "inter_coeff", mix); GLSL(color = mix(texture(texture0, texcoord0), texture(texture1, texcoord1), inter_coeff);) } else { + gl_sc_uniform_dynamic(p->sc); gl_sc_uniform_f(p->sc, "fcoord", mix); pass_sample_separated_gen(p->sc, tscale, 0, 0); } diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c index 48a8bc2eae..eb629a13ca 100644 --- a/video/out/gpu/video_shaders.c +++ b/video/out/gpu/video_shaders.c @@ -768,6 +768,7 @@ static void prng_init(struct gl_shader_cache *sc, AVLFG *lfg) // Initialize the PRNG by hashing the position + a random uniform GLSL(vec3 _m = vec3(HOOKED_pos, random) + vec3(1.0);) GLSL(float h = permute(permute(permute(_m.x)+_m.y)+_m.z);) + gl_sc_uniform_dynamic(sc); gl_sc_uniform_f(sc, "random", (double)av_lfg_get(lfg) / UINT32_MAX); } -- cgit v1.2.3