summaryrefslogtreecommitdiffstats
path: root/video/out/opengl/utils.c
diff options
context:
space:
mode:
authorNiklas Haas <git@haasn.xyz>2017-06-29 17:00:06 +0200
committerNiklas Haas <git@haasn.xyz>2017-07-01 00:58:27 +0200
commitdd78cc6fe72a3c5fadb00563cd47cc70b68f50fb (patch)
tree5050af3285b623499d0a2b06fa0ebb8d040b7cc9 /video/out/opengl/utils.c
parentf003d8ea367f247e3ff49b672003817a0c3cdb30 (diff)
downloadmpv-dd78cc6fe72a3c5fadb00563cd47cc70b68f50fb.tar.bz2
mpv-dd78cc6fe72a3c5fadb00563cd47cc70b68f50fb.tar.xz
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number of changes and improvements: 1. mpv users can now introspect the vo_opengl passes, which is something that has been requested multiple times. 2. performance data is now measured per-pass, which helps both development and debugging. 3. since adding more passes is cheap, we can now report information for more passes (e.g. the blit pass, and the osd pass). Note: we also switch to nanosecond scale, to be able to measure these passes better. 4. `--user-shaders` authors can now describe their own passes, helping users both identify which user shaders are active at any given time as well as helping shader authors identify performance issues. 5. the timing data per pass is now exported as a full list of samples, so projects like Argon-/mpv-stats can immediately read out all of the samples and render a graph without having to manually poll this option constantly. Due to gl_timer's design being complicated (directly reading performance data would block, so we delay the actual read-back until the next _start command), it's vital not to conflate different passes that might be doing different things from one frame to another. To accomplish this, the actual timers are stored as part of the gl_shader_cache's sc_entry, which makes them unique for that exact shader. Starting and stopping the time measurement is easy to unify with the gl_sc architecture, because the existing API already relies on a "generate, render, reset" flow, so we can just put timer_start and timer_stop in sc_generate and sc_reset, respectively. The ugliest thing about this code is that due to the need to keep pass information relatively stable in between frames, we need to distinguish between "new" and "redrawn" frames, which bloats the code somewhat and also feels hacky and vo_opengl-specific. (But then again, this entire thing is vo_opengl-specific)
Diffstat (limited to 'video/out/opengl/utils.c')
-rw-r--r--video/out/opengl/utils.c61
1 files changed, 32 insertions, 29 deletions
diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c
index 7e8680fff2..3615ff92d1 100644
--- a/video/out/opengl/utils.c
+++ b/video/out/opengl/utils.c
@@ -466,6 +466,7 @@ struct sc_entry {
int num_uniforms;
bstr frag;
bstr vert;
+ struct gl_timer *timer;
};
struct gl_shader_cache {
@@ -520,6 +521,7 @@ void gl_sc_reset(struct gl_shader_cache *sc)
GL *gl = sc->gl;
if (sc->needs_reset) {
+ gl_timer_stop(gl);
gl->UseProgram(0);
for (int n = 0; n < sc->num_uniforms; n++) {
@@ -552,6 +554,7 @@ static void sc_flush_cache(struct gl_shader_cache *sc)
talloc_free(e->vert.start);
talloc_free(e->frag.start);
talloc_free(e->uniforms);
+ gl_timer_free(e->timer);
}
sc->num_entries = 0;
}
@@ -1029,7 +1032,10 @@ static GLuint load_program(struct gl_shader_cache *sc, const char *vertex,
// 1. Unbind the program and all textures.
// 2. Reset the sc state and prepare for a new shader program. (All uniforms
// and fragment operations needed for the next program have to be re-added.)
-void gl_sc_generate(struct gl_shader_cache *sc)
+// The return value is a mp_pass_perf containing performance metrics for the
+// execution of the generated shader. (Note: execution is measured up until
+// the corresponding gl_sc_reset call)
+struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc)
{
GL *gl = sc->gl;
@@ -1137,6 +1143,7 @@ void gl_sc_generate(struct gl_shader_cache *sc)
*entry = (struct sc_entry){
.vert = bstrdup(NULL, *vert),
.frag = bstrdup(NULL, *frag),
+ .timer = gl_timer_create(gl),
};
}
// build vertex shader from vao and cache the locations of the uniform variables
@@ -1161,7 +1168,10 @@ void gl_sc_generate(struct gl_shader_cache *sc)
gl->ActiveTexture(GL_TEXTURE0);
+ gl_timer_start(entry->timer);
sc->needs_reset = true;
+
+ return gl_timer_measure(entry->timer);
}
// Maximum number of simultaneous query objects to keep around. Reducing this
@@ -1169,16 +1179,13 @@ void gl_sc_generate(struct gl_shader_cache *sc)
// available
#define QUERY_OBJECT_NUM 8
-// How many samples to keep around, for the sake of average and peak
-// calculations. This corresponds to a few seconds (exact time variable)
-#define QUERY_SAMPLE_SIZE 256u
-
struct gl_timer {
GL *gl;
GLuint query[QUERY_OBJECT_NUM];
int query_idx;
- GLuint64 samples[QUERY_SAMPLE_SIZE];
+ // these numbers are all in nanoseconds
+ uint64_t samples[PERF_SAMPLE_COUNT];
int sample_idx;
int sample_count;
@@ -1186,27 +1193,23 @@ struct gl_timer {
uint64_t peak;
};
-int gl_timer_sample_count(struct gl_timer *timer)
+struct mp_pass_perf gl_timer_measure(struct gl_timer *timer)
{
- return timer->sample_count;
-}
+ assert(timer);
+ struct mp_pass_perf res = {
+ .count = timer->sample_count,
+ .index = (timer->sample_idx - timer->sample_count) % PERF_SAMPLE_COUNT,
+ .peak = timer->peak,
+ .samples = timer->samples,
+ };
-uint64_t gl_timer_last_us(struct gl_timer *timer)
-{
- return timer->samples[(timer->sample_idx - 1) % QUERY_SAMPLE_SIZE] / 1000;
-}
+ res.last = timer->samples[(timer->sample_idx - 1) % PERF_SAMPLE_COUNT];
-uint64_t gl_timer_avg_us(struct gl_timer *timer)
-{
- if (timer->sample_count <= 0)
- return 0;
-
- return timer->avg_sum / timer->sample_count / 1000;
-}
+ if (timer->sample_count > 0) {
+ res.avg = timer->avg_sum / timer->sample_count;
+ }
-uint64_t gl_timer_peak_us(struct gl_timer *timer)
-{
- return timer->peak / 1000;
+ return res;
}
struct gl_timer *gl_timer_create(GL *gl)
@@ -1237,13 +1240,13 @@ void gl_timer_free(struct gl_timer *timer)
static void gl_timer_record(struct gl_timer *timer, GLuint64 new)
{
// Input res into the buffer and grab the previous value
- GLuint64 old = timer->samples[timer->sample_idx];
+ uint64_t old = timer->samples[timer->sample_idx];
timer->samples[timer->sample_idx++] = new;
- timer->sample_idx %= QUERY_SAMPLE_SIZE;
+ timer->sample_idx %= PERF_SAMPLE_COUNT;
// Update average and sum
timer->avg_sum = timer->avg_sum + new - old;
- timer->sample_count = MPMIN(timer->sample_count + 1, QUERY_SAMPLE_SIZE);
+ timer->sample_count = MPMIN(timer->sample_count + 1, PERF_SAMPLE_COUNT);
// Update peak if necessary
if (new >= timer->peak) {
@@ -1252,7 +1255,7 @@ static void gl_timer_record(struct gl_timer *timer, GLuint64 new)
// It's possible that the last peak was the value we just removed,
// if so we need to scan for the new peak
uint64_t peak = new;
- for (int i = 0; i < QUERY_SAMPLE_SIZE; i++)
+ for (int i = 0; i < PERF_SAMPLE_COUNT; i++)
peak = MPMAX(peak, timer->samples[i]);
timer->peak = peak;
}
@@ -1264,6 +1267,7 @@ static void gl_timer_record(struct gl_timer *timer, GLuint64 new)
// The caling code *MUST* ensure this
void gl_timer_start(struct gl_timer *timer)
{
+ assert(timer);
GL *gl = timer->gl;
if (!gl->BeginQuery)
return;
@@ -1283,9 +1287,8 @@ void gl_timer_start(struct gl_timer *timer)
gl->BeginQuery(GL_TIME_ELAPSED, id);
}
-void gl_timer_stop(struct gl_timer *timer)
+void gl_timer_stop(GL *gl)
{
- GL *gl = timer->gl;
if (gl->EndQuery)
gl->EndQuery(GL_TIME_ELAPSED);
}