diff options
author | Niklas Haas <git@haasn.xyz> | 2017-06-29 17:00:06 +0200 |
---|---|---|
committer | Niklas Haas <git@haasn.xyz> | 2017-07-01 00:58:27 +0200 |
commit | dd78cc6fe72a3c5fadb00563cd47cc70b68f50fb (patch) | |
tree | 5050af3285b623499d0a2b06fa0ebb8d040b7cc9 /video/out/opengl/utils.c | |
parent | f003d8ea367f247e3ff49b672003817a0c3cdb30 (diff) | |
download | mpv-dd78cc6fe72a3c5fadb00563cd47cc70b68f50fb.tar.bz2 mpv-dd78cc6fe72a3c5fadb00563cd47cc70b68f50fb.tar.xz |
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number
of changes and improvements:
1. mpv users can now introspect the vo_opengl passes, which is something
that has been requested multiple times.
2. performance data is now measured per-pass, which helps both
development and debugging.
3. since adding more passes is cheap, we can now report information for
more passes (e.g. the blit pass, and the osd pass). Note: we also
switch to nanosecond scale, to be able to measure these passes
better.
4. `--user-shaders` authors can now describe their own passes, helping
users both identify which user shaders are active at any given time
as well as helping shader authors identify performance issues.
5. the timing data per pass is now exported as a full list of samples,
so projects like Argon-/mpv-stats can immediately read out all of the
samples and render a graph without having to manually poll this
option constantly.
Due to gl_timer's design being complicated (directly reading performance
data would block, so we delay the actual read-back until the next _start
command), it's vital not to conflate different passes that might be
doing different things from one frame to another. To accomplish this,
the actual timers are stored as part of the gl_shader_cache's sc_entry,
which makes them unique for that exact shader.
Starting and stopping the time measurement is easy to unify with the
gl_sc architecture, because the existing API already relies on a
"generate, render, reset" flow, so we can just put timer_start and
timer_stop in sc_generate and sc_reset, respectively.
The ugliest thing about this code is that due to the need to keep pass
information relatively stable in between frames, we need to distinguish
between "new" and "redrawn" frames, which bloats the code somewhat and
also feels hacky and vo_opengl-specific. (But then again, this entire
thing is vo_opengl-specific)
Diffstat (limited to 'video/out/opengl/utils.c')
-rw-r--r-- | video/out/opengl/utils.c | 61 |
1 files changed, 32 insertions, 29 deletions
diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c index 7e8680fff2..3615ff92d1 100644 --- a/video/out/opengl/utils.c +++ b/video/out/opengl/utils.c @@ -466,6 +466,7 @@ struct sc_entry { int num_uniforms; bstr frag; bstr vert; + struct gl_timer *timer; }; struct gl_shader_cache { @@ -520,6 +521,7 @@ void gl_sc_reset(struct gl_shader_cache *sc) GL *gl = sc->gl; if (sc->needs_reset) { + gl_timer_stop(gl); gl->UseProgram(0); for (int n = 0; n < sc->num_uniforms; n++) { @@ -552,6 +554,7 @@ static void sc_flush_cache(struct gl_shader_cache *sc) talloc_free(e->vert.start); talloc_free(e->frag.start); talloc_free(e->uniforms); + gl_timer_free(e->timer); } sc->num_entries = 0; } @@ -1029,7 +1032,10 @@ static GLuint load_program(struct gl_shader_cache *sc, const char *vertex, // 1. Unbind the program and all textures. // 2. Reset the sc state and prepare for a new shader program. (All uniforms // and fragment operations needed for the next program have to be re-added.) -void gl_sc_generate(struct gl_shader_cache *sc) +// The return value is a mp_pass_perf containing performance metrics for the +// execution of the generated shader. (Note: execution is measured up until +// the corresponding gl_sc_reset call) +struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc) { GL *gl = sc->gl; @@ -1137,6 +1143,7 @@ void gl_sc_generate(struct gl_shader_cache *sc) *entry = (struct sc_entry){ .vert = bstrdup(NULL, *vert), .frag = bstrdup(NULL, *frag), + .timer = gl_timer_create(gl), }; } // build vertex shader from vao and cache the locations of the uniform variables @@ -1161,7 +1168,10 @@ void gl_sc_generate(struct gl_shader_cache *sc) gl->ActiveTexture(GL_TEXTURE0); + gl_timer_start(entry->timer); sc->needs_reset = true; + + return gl_timer_measure(entry->timer); } // Maximum number of simultaneous query objects to keep around. Reducing this @@ -1169,16 +1179,13 @@ void gl_sc_generate(struct gl_shader_cache *sc) // available #define QUERY_OBJECT_NUM 8 -// How many samples to keep around, for the sake of average and peak -// calculations. This corresponds to a few seconds (exact time variable) -#define QUERY_SAMPLE_SIZE 256u - struct gl_timer { GL *gl; GLuint query[QUERY_OBJECT_NUM]; int query_idx; - GLuint64 samples[QUERY_SAMPLE_SIZE]; + // these numbers are all in nanoseconds + uint64_t samples[PERF_SAMPLE_COUNT]; int sample_idx; int sample_count; @@ -1186,27 +1193,23 @@ struct gl_timer { uint64_t peak; }; -int gl_timer_sample_count(struct gl_timer *timer) +struct mp_pass_perf gl_timer_measure(struct gl_timer *timer) { - return timer->sample_count; -} + assert(timer); + struct mp_pass_perf res = { + .count = timer->sample_count, + .index = (timer->sample_idx - timer->sample_count) % PERF_SAMPLE_COUNT, + .peak = timer->peak, + .samples = timer->samples, + }; -uint64_t gl_timer_last_us(struct gl_timer *timer) -{ - return timer->samples[(timer->sample_idx - 1) % QUERY_SAMPLE_SIZE] / 1000; -} + res.last = timer->samples[(timer->sample_idx - 1) % PERF_SAMPLE_COUNT]; -uint64_t gl_timer_avg_us(struct gl_timer *timer) -{ - if (timer->sample_count <= 0) - return 0; - - return timer->avg_sum / timer->sample_count / 1000; -} + if (timer->sample_count > 0) { + res.avg = timer->avg_sum / timer->sample_count; + } -uint64_t gl_timer_peak_us(struct gl_timer *timer) -{ - return timer->peak / 1000; + return res; } struct gl_timer *gl_timer_create(GL *gl) @@ -1237,13 +1240,13 @@ void gl_timer_free(struct gl_timer *timer) static void gl_timer_record(struct gl_timer *timer, GLuint64 new) { // Input res into the buffer and grab the previous value - GLuint64 old = timer->samples[timer->sample_idx]; + uint64_t old = timer->samples[timer->sample_idx]; timer->samples[timer->sample_idx++] = new; - timer->sample_idx %= QUERY_SAMPLE_SIZE; + timer->sample_idx %= PERF_SAMPLE_COUNT; // Update average and sum timer->avg_sum = timer->avg_sum + new - old; - timer->sample_count = MPMIN(timer->sample_count + 1, QUERY_SAMPLE_SIZE); + timer->sample_count = MPMIN(timer->sample_count + 1, PERF_SAMPLE_COUNT); // Update peak if necessary if (new >= timer->peak) { @@ -1252,7 +1255,7 @@ static void gl_timer_record(struct gl_timer *timer, GLuint64 new) // It's possible that the last peak was the value we just removed, // if so we need to scan for the new peak uint64_t peak = new; - for (int i = 0; i < QUERY_SAMPLE_SIZE; i++) + for (int i = 0; i < PERF_SAMPLE_COUNT; i++) peak = MPMAX(peak, timer->samples[i]); timer->peak = peak; } @@ -1264,6 +1267,7 @@ static void gl_timer_record(struct gl_timer *timer, GLuint64 new) // The caling code *MUST* ensure this void gl_timer_start(struct gl_timer *timer) { + assert(timer); GL *gl = timer->gl; if (!gl->BeginQuery) return; @@ -1283,9 +1287,8 @@ void gl_timer_start(struct gl_timer *timer) gl->BeginQuery(GL_TIME_ELAPSED, id); } -void gl_timer_stop(struct gl_timer *timer) +void gl_timer_stop(GL *gl) { - GL *gl = timer->gl; if (gl->EndQuery) gl->EndQuery(GL_TIME_ELAPSED); } |