summaryrefslogtreecommitdiffstats
path: root/video
diff options
context:
space:
mode:
authorNiklas Haas <git@haasn.xyz>2017-06-29 17:00:06 +0200
committerNiklas Haas <git@haasn.xyz>2017-07-01 00:58:27 +0200
commitdd78cc6fe72a3c5fadb00563cd47cc70b68f50fb (patch)
tree5050af3285b623499d0a2b06fa0ebb8d040b7cc9 /video
parentf003d8ea367f247e3ff49b672003817a0c3cdb30 (diff)
downloadmpv-dd78cc6fe72a3c5fadb00563cd47cc70b68f50fb.tar.bz2
mpv-dd78cc6fe72a3c5fadb00563cd47cc70b68f50fb.tar.xz
vo_opengl: refactor vo performance subsystem
This replaces `vo-performance` by `vo-passes`, bringing with it a number of changes and improvements: 1. mpv users can now introspect the vo_opengl passes, which is something that has been requested multiple times. 2. performance data is now measured per-pass, which helps both development and debugging. 3. since adding more passes is cheap, we can now report information for more passes (e.g. the blit pass, and the osd pass). Note: we also switch to nanosecond scale, to be able to measure these passes better. 4. `--user-shaders` authors can now describe their own passes, helping users both identify which user shaders are active at any given time as well as helping shader authors identify performance issues. 5. the timing data per pass is now exported as a full list of samples, so projects like Argon-/mpv-stats can immediately read out all of the samples and render a graph without having to manually poll this option constantly. Due to gl_timer's design being complicated (directly reading performance data would block, so we delay the actual read-back until the next _start command), it's vital not to conflate different passes that might be doing different things from one frame to another. To accomplish this, the actual timers are stored as part of the gl_shader_cache's sc_entry, which makes them unique for that exact shader. Starting and stopping the time measurement is easy to unify with the gl_sc architecture, because the existing API already relies on a "generate, render, reset" flow, so we can just put timer_start and timer_stop in sc_generate and sc_reset, respectively. The ugliest thing about this code is that due to the need to keep pass information relatively stable in between frames, we need to distinguish between "new" and "redrawn" frames, which bloats the code somewhat and also feels hacky and vo_opengl-specific. (But then again, this entire thing is vo_opengl-specific)
Diffstat (limited to 'video')
-rw-r--r--video/out/opengl/user_shaders.c6
-rw-r--r--video/out/opengl/user_shaders.h1
-rw-r--r--video/out/opengl/utils.c61
-rw-r--r--video/out/opengl/utils.h10
-rw-r--r--video/out/opengl/video.c199
-rw-r--r--video/out/opengl/video.h2
-rw-r--r--video/out/opengl/video_shaders.c1
-rw-r--r--video/out/vo.h24
-rw-r--r--video/out/vo_opengl.c2
9 files changed, 206 insertions, 100 deletions
diff --git a/video/out/opengl/user_shaders.c b/video/out/opengl/user_shaders.c
index 7e1e5f4d12..427295b0ad 100644
--- a/video/out/opengl/user_shaders.c
+++ b/video/out/opengl/user_shaders.c
@@ -166,6 +166,7 @@ bool parse_user_shader_pass(struct mp_log *log, struct bstr *body,
return false;
*out = (struct gl_user_shader){
+ .desc = bstr0("(unknown)"),
.offset = identity_trans,
.width = {{ SZEXP_VAR_W, { .varname = bstr0("HOOKED") }}},
.height = {{ SZEXP_VAR_H, { .varname = bstr0("HOOKED") }}},
@@ -220,6 +221,11 @@ bool parse_user_shader_pass(struct mp_log *log, struct bstr *body,
continue;
}
+ if (bstr_eatstart0(&line, "DESC")) {
+ out->desc = bstr_strip(line);
+ continue;
+ }
+
if (bstr_eatstart0(&line, "OFFSET")) {
float ox, oy;
if (bstr_sscanf(line, "%f %f", &ox, &oy) != 2) {
diff --git a/video/out/opengl/user_shaders.h b/video/out/opengl/user_shaders.h
index fd6fc248f3..458e925bc4 100644
--- a/video/out/opengl/user_shaders.h
+++ b/video/out/opengl/user_shaders.h
@@ -60,6 +60,7 @@ struct gl_user_shader {
struct bstr bind_tex[SHADER_MAX_BINDS];
struct bstr save_tex;
struct bstr pass_body;
+ struct bstr desc;
struct gl_transform offset;
struct szexp width[MAX_SZEXP_SIZE];
struct szexp height[MAX_SZEXP_SIZE];
diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c
index 7e8680fff2..3615ff92d1 100644
--- a/video/out/opengl/utils.c
+++ b/video/out/opengl/utils.c
@@ -466,6 +466,7 @@ struct sc_entry {
int num_uniforms;
bstr frag;
bstr vert;
+ struct gl_timer *timer;
};
struct gl_shader_cache {
@@ -520,6 +521,7 @@ void gl_sc_reset(struct gl_shader_cache *sc)
GL *gl = sc->gl;
if (sc->needs_reset) {
+ gl_timer_stop(gl);
gl->UseProgram(0);
for (int n = 0; n < sc->num_uniforms; n++) {
@@ -552,6 +554,7 @@ static void sc_flush_cache(struct gl_shader_cache *sc)
talloc_free(e->vert.start);
talloc_free(e->frag.start);
talloc_free(e->uniforms);
+ gl_timer_free(e->timer);
}
sc->num_entries = 0;
}
@@ -1029,7 +1032,10 @@ static GLuint load_program(struct gl_shader_cache *sc, const char *vertex,
// 1. Unbind the program and all textures.
// 2. Reset the sc state and prepare for a new shader program. (All uniforms
// and fragment operations needed for the next program have to be re-added.)
-void gl_sc_generate(struct gl_shader_cache *sc)
+// The return value is a mp_pass_perf containing performance metrics for the
+// execution of the generated shader. (Note: execution is measured up until
+// the corresponding gl_sc_reset call)
+struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc)
{
GL *gl = sc->gl;
@@ -1137,6 +1143,7 @@ void gl_sc_generate(struct gl_shader_cache *sc)
*entry = (struct sc_entry){
.vert = bstrdup(NULL, *vert),
.frag = bstrdup(NULL, *frag),
+ .timer = gl_timer_create(gl),
};
}
// build vertex shader from vao and cache the locations of the uniform variables
@@ -1161,7 +1168,10 @@ void gl_sc_generate(struct gl_shader_cache *sc)
gl->ActiveTexture(GL_TEXTURE0);
+ gl_timer_start(entry->timer);
sc->needs_reset = true;
+
+ return gl_timer_measure(entry->timer);
}
// Maximum number of simultaneous query objects to keep around. Reducing this
@@ -1169,16 +1179,13 @@ void gl_sc_generate(struct gl_shader_cache *sc)
// available
#define QUERY_OBJECT_NUM 8
-// How many samples to keep around, for the sake of average and peak
-// calculations. This corresponds to a few seconds (exact time variable)
-#define QUERY_SAMPLE_SIZE 256u
-
struct gl_timer {
GL *gl;
GLuint query[QUERY_OBJECT_NUM];
int query_idx;
- GLuint64 samples[QUERY_SAMPLE_SIZE];
+ // these numbers are all in nanoseconds
+ uint64_t samples[PERF_SAMPLE_COUNT];
int sample_idx;
int sample_count;
@@ -1186,27 +1193,23 @@ struct gl_timer {
uint64_t peak;
};
-int gl_timer_sample_count(struct gl_timer *timer)
+struct mp_pass_perf gl_timer_measure(struct gl_timer *timer)
{
- return timer->sample_count;
-}
+ assert(timer);
+ struct mp_pass_perf res = {
+ .count = timer->sample_count,
+ .index = (timer->sample_idx - timer->sample_count) % PERF_SAMPLE_COUNT,
+ .peak = timer->peak,
+ .samples = timer->samples,
+ };
-uint64_t gl_timer_last_us(struct gl_timer *timer)
-{
- return timer->samples[(timer->sample_idx - 1) % QUERY_SAMPLE_SIZE] / 1000;
-}
+ res.last = timer->samples[(timer->sample_idx - 1) % PERF_SAMPLE_COUNT];
-uint64_t gl_timer_avg_us(struct gl_timer *timer)
-{
- if (timer->sample_count <= 0)
- return 0;
-
- return timer->avg_sum / timer->sample_count / 1000;
-}
+ if (timer->sample_count > 0) {
+ res.avg = timer->avg_sum / timer->sample_count;
+ }
-uint64_t gl_timer_peak_us(struct gl_timer *timer)
-{
- return timer->peak / 1000;
+ return res;
}
struct gl_timer *gl_timer_create(GL *gl)
@@ -1237,13 +1240,13 @@ void gl_timer_free(struct gl_timer *timer)
static void gl_timer_record(struct gl_timer *timer, GLuint64 new)
{
// Input res into the buffer and grab the previous value
- GLuint64 old = timer->samples[timer->sample_idx];
+ uint64_t old = timer->samples[timer->sample_idx];
timer->samples[timer->sample_idx++] = new;
- timer->sample_idx %= QUERY_SAMPLE_SIZE;
+ timer->sample_idx %= PERF_SAMPLE_COUNT;
// Update average and sum
timer->avg_sum = timer->avg_sum + new - old;
- timer->sample_count = MPMIN(timer->sample_count + 1, QUERY_SAMPLE_SIZE);
+ timer->sample_count = MPMIN(timer->sample_count + 1, PERF_SAMPLE_COUNT);
// Update peak if necessary
if (new >= timer->peak) {
@@ -1252,7 +1255,7 @@ static void gl_timer_record(struct gl_timer *timer, GLuint64 new)
// It's possible that the last peak was the value we just removed,
// if so we need to scan for the new peak
uint64_t peak = new;
- for (int i = 0; i < QUERY_SAMPLE_SIZE; i++)
+ for (int i = 0; i < PERF_SAMPLE_COUNT; i++)
peak = MPMAX(peak, timer->samples[i]);
timer->peak = peak;
}
@@ -1264,6 +1267,7 @@ static void gl_timer_record(struct gl_timer *timer, GLuint64 new)
// The caling code *MUST* ensure this
void gl_timer_start(struct gl_timer *timer)
{
+ assert(timer);
GL *gl = timer->gl;
if (!gl->BeginQuery)
return;
@@ -1283,9 +1287,8 @@ void gl_timer_start(struct gl_timer *timer)
gl->BeginQuery(GL_TIME_ELAPSED, id);
}
-void gl_timer_stop(struct gl_timer *timer)
+void gl_timer_stop(GL *gl)
{
- GL *gl = timer->gl;
if (gl->EndQuery)
gl->EndQuery(GL_TIME_ELAPSED);
}
diff --git a/video/out/opengl/utils.h b/video/out/opengl/utils.h
index 95eb1c4fea..92b1005c39 100644
--- a/video/out/opengl/utils.h
+++ b/video/out/opengl/utils.h
@@ -169,7 +169,7 @@ void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name,
bool transpose, GLfloat *v);
void gl_sc_set_vao(struct gl_shader_cache *sc, struct gl_vao *vao);
void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name);
-void gl_sc_generate(struct gl_shader_cache *sc);
+struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc);
void gl_sc_reset(struct gl_shader_cache *sc);
struct mpv_global;
void gl_sc_set_cache_dir(struct gl_shader_cache *sc, struct mpv_global *global,
@@ -180,12 +180,8 @@ struct gl_timer;
struct gl_timer *gl_timer_create(GL *gl);
void gl_timer_free(struct gl_timer *timer);
void gl_timer_start(struct gl_timer *timer);
-void gl_timer_stop(struct gl_timer *timer);
-
-int gl_timer_sample_count(struct gl_timer *timer);
-uint64_t gl_timer_last_us(struct gl_timer *timer);
-uint64_t gl_timer_avg_us(struct gl_timer *timer);
-uint64_t gl_timer_peak_us(struct gl_timer *timer);
+void gl_timer_stop(GL *gl);
+struct mp_pass_perf gl_timer_measure(struct gl_timer *timer);
#define NUM_PBO_BUFFERS 3
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index 59462b8038..9dd78cf335 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -111,6 +111,15 @@ enum plane_type {
PLANE_XYZ,
};
+static const char *plane_names[] = {
+ [PLANE_NONE] = "unknown",
+ [PLANE_RGB] = "rgb",
+ [PLANE_LUMA] = "luma",
+ [PLANE_CHROMA] = "chroma",
+ [PLANE_ALPHA] = "alpha",
+ [PLANE_XYZ] = "xyz",
+};
+
// A self-contained description of a source image which can be bound to a
// texture unit and sampled from. Contains metadata about how it's to be used
struct img_tex {
@@ -158,6 +167,13 @@ struct cached_file {
struct bstr body;
};
+struct pass_info {
+ struct bstr desc;
+ struct mp_pass_perf perf;
+};
+
+#define PASS_INFO_MAX (SHADER_MAX_HOOKS + 32)
+
struct gl_video {
GL *gl;
@@ -186,10 +202,6 @@ struct gl_video {
GLuint dither_texture;
int dither_size;
- struct gl_timer *upload_timer;
- struct gl_timer *render_timer;
- struct gl_timer *present_timer;
-
struct mp_image_params real_image_params; // configured format
struct mp_image_params image_params; // texture format (mind hwdec case)
struct gl_imgfmt_desc gl_format; // texture format
@@ -239,6 +251,14 @@ struct gl_video {
bool use_linear;
float user_gamma;
+ // pass info / metrics
+ struct pass_info pass_fresh[PASS_INFO_MAX];
+ struct pass_info pass_redraw[PASS_INFO_MAX];
+ struct pass_info *pass;
+ int pass_idx;
+ struct gl_timer *upload_timer;
+ struct gl_timer *blit_timer;
+
// hooks and saved textures
struct saved_tex saved_tex[SHADER_MAX_SAVED];
int saved_tex_num;
@@ -931,6 +951,43 @@ static void uninit_video(struct gl_video *p)
p->hwdec_active = false;
}
+static void pass_record(struct gl_video *p, struct mp_pass_perf perf)
+{
+ assert(p->pass_idx < PASS_INFO_MAX);
+ struct pass_info *pass = &p->pass[p->pass_idx];
+ pass->perf = perf;
+
+ if (pass->desc.len == 0)
+ bstr_xappend(p, &pass->desc, bstr0("(unknown)"));
+
+ p->pass_idx++;
+}
+
+static void pass_describe(struct gl_video *p, const char *textf, ...)
+{
+ assert(p->pass_idx < PASS_INFO_MAX);
+ struct pass_info *pass = &p->pass[p->pass_idx];
+
+ if (pass->desc.len > 0)
+ bstr_xappend(p, &pass->desc, bstr0(" + "));
+
+ va_list ap;
+ va_start(ap, textf);
+ bstr_xappend_vasprintf(p, &pass->desc, textf, ap);
+ va_end(ap);
+}
+
+static void pass_info_reset(struct gl_video *p, bool is_redraw)
+{
+ p->pass = is_redraw ? p->pass_redraw : p->pass_fresh;
+ p->pass_idx = 0;
+
+ for (int i = 0; i < PASS_INFO_MAX; i++) {
+ p->pass[i].desc.len = 0;
+ p->pass[i].perf = (struct mp_pass_perf){0};
+ }
+}
+
static void pass_prepare_src_tex(struct gl_video *p)
{
struct gl_shader_cache *sc = p->sc;
@@ -1008,7 +1065,7 @@ static void finish_pass_direct(struct gl_video *p, GLint fbo, int vp_w, int vp_h
{
GL *gl = p->gl;
pass_prepare_src_tex(p);
- gl_sc_generate(p->sc);
+ pass_record(p, gl_sc_generate(p->sc));
gl->BindFramebuffer(GL_FRAMEBUFFER, fbo);
render_pass_quad(p, vp_w, vp_h, dst);
gl->BindFramebuffer(GL_FRAMEBUFFER, 0);
@@ -1450,7 +1507,7 @@ static void pass_sample_separated(struct gl_video *p, struct img_tex src,
// First pass (scale only in the y dir)
src.transform = t_y;
sampler_prelude(p->sc, pass_bind(p, src));
- GLSLF("// pass 1\n");
+ GLSLF("// first pass\n");
pass_sample_separated_gen(p->sc, scaler, 0, 1);
GLSLF("color *= %f;\n", src.multiplier);
finish_pass_fbo(p, &scaler->sep_fbo, src.w, h, FBOTEX_FUZZY_H);
@@ -1458,8 +1515,8 @@ static void pass_sample_separated(struct gl_video *p, struct img_tex src,
// Second pass (scale only in the x dir)
src = img_tex_fbo(&scaler->sep_fbo, src.type, src.components);
src.transform = t_x;
+ pass_describe(p, "%s second pass", scaler->conf.kernel.name);
sampler_prelude(p->sc, pass_bind(p, src));
- GLSLF("// pass 2\n");
pass_sample_separated_gen(p->sc, scaler, 1, 0);
}
@@ -1475,6 +1532,17 @@ static void pass_sample(struct gl_video *p, struct img_tex tex,
{
reinit_scaler(p, scaler, conf, scale_factor, filter_sizes);
+ // Describe scaler
+ const char *scaler_opt[] = {
+ [SCALER_SCALE] = "scale",
+ [SCALER_DSCALE] = "dscale",
+ [SCALER_CSCALE] = "cscale",
+ [SCALER_TSCALE] = "tscale",
+ };
+
+ pass_describe(p, "%s=%s (%s)", scaler_opt[scaler->index],
+ scaler->conf.kernel.name, plane_names[tex.type]);
+
bool is_separated = scaler->kernel && !scaler->kernel->polar;
// Set up the transformation+prelude and bind the texture, for everything
@@ -1550,12 +1618,14 @@ static void pass_add_hooks(struct gl_video *p, struct tex_hook hook,
static void deband_hook(struct gl_video *p, struct img_tex tex,
struct gl_transform *trans, void *priv)
{
+ pass_describe(p, "debanding (%s)", plane_names[tex.type]);
pass_sample_deband(p->sc, p->opts.deband_opts, &p->lfg);
}
static void unsharp_hook(struct gl_video *p, struct img_tex tex,
struct gl_transform *trans, void *priv)
{
+ pass_describe(p, "unsharp masking");
GLSLF("#define tex HOOKED\n");
GLSLF("#define pos HOOKED_pos\n");
GLSLF("#define pt HOOKED_pt\n");
@@ -1620,8 +1690,10 @@ static void user_hook(struct gl_video *p, struct img_tex tex,
struct gl_user_shader *shader = priv;
assert(shader);
+ pass_describe(p, "user shader: %.*s (%s)", BSTR_P(shader->desc),
+ plane_names[tex.type]);
+
load_shader(p, shader->pass_body);
- GLSLF("// custom hook\n");
GLSLF("color = hook();\n");
// Make sure we at least create a legal FBO on failure, since it's better
@@ -1734,6 +1806,7 @@ static void pass_read_video(struct gl_video *p)
if (num > 0) {
GLSLF("// merging plane %d ... into %d\n", n, first);
copy_img_tex(p, &num, tex[n]);
+ pass_describe(p, "merging planes");
finish_pass_fbo(p, &p->merge_fbo[n], tex[n].w, tex[n].h, 0);
tex[first] = img_tex_fbo(&p->merge_fbo[n], tex[n].type, num);
tex[n] = (struct img_tex){0};
@@ -1745,8 +1818,8 @@ static void pass_read_video(struct gl_video *p)
for (int n = 0; n < 4; n++) {
if (gl_is_integer_format(tex[n].gl_format)) {
GLSLF("// use_integer fix for plane %d\n", n);
-
copy_img_tex(p, &(int){0}, tex[n]);
+ pass_describe(p, "use_integer fix");
finish_pass_fbo(p, &p->integer_fbo[n], tex[n].w, tex[n].h, 0);
tex[n] = img_tex_fbo(&p->integer_fbo[n], tex[n].type,
tex[n].components);
@@ -1913,7 +1986,7 @@ static void pass_convert_yuv(struct gl_video *p)
mp_csp_copy_equalizer_values(&cparams, &p->video_eq);
p->user_gamma = 1.0 / (cparams.gamma * p->opts.gamma);
- GLSLF("// color conversion\n");
+ pass_describe(p, "color conversion");
if (p->color_swizzle[0])
GLSLF("color = color.%s;\n", p->color_swizzle);
@@ -2292,12 +2365,12 @@ static void pass_draw_osd(struct gl_video *p, int draw_flags, double pts,
gl_sc_uniform_sampler(p->sc, "osdtex", GL_TEXTURE_2D, 0);
switch (fmt) {
case SUBBITMAP_RGBA: {
- GLSLF("// OSD (RGBA)\n");
+ pass_describe(p, "drawing osd (rgba)");
GLSL(color = texture(osdtex, texcoord).bgra;)
break;
}
case SUBBITMAP_LIBASS: {
- GLSLF("// OSD (libass)\n");
+ pass_describe(p, "drawing osd (libass)");
GLSL(color =
vec4(ass_color.rgb, ass_color.a * texture(osdtex, texcoord).r);)
break;
@@ -2317,7 +2390,7 @@ static void pass_draw_osd(struct gl_video *p, int draw_flags, double pts,
pass_colormanage(p, csp_srgb, true);
}
gl_sc_set_vao(p->sc, mpgl_osd_get_vao(p->osd));
- gl_sc_generate(p->sc);
+ pass_record(p, gl_sc_generate(p->sc));
mpgl_osd_draw_part(p->osd, vp_w, vp_h, n);
gl_sc_reset(p->sc);
}
@@ -2386,10 +2459,6 @@ static void pass_render_frame(struct gl_video *p)
if (p->dumb_mode)
return;
- // start the render timer here. it will continue to the end of this
- // function, to render the time needed to draw (excluding screen
- // presentation)
- gl_timer_start(p->render_timer);
p->use_linear = p->opts.linear_scaling || p->opts.sigmoid_upscaling;
pass_read_video(p);
@@ -2414,6 +2483,7 @@ static void pass_render_frame(struct gl_video *p)
rect.w, rect.h, p->blend_subs_fbo.fbo, false);
GLSL(color = texture(texture0, texcoord0);)
pass_read_fbo(p, &p->blend_subs_fbo);
+ pass_describe(p, "blend subs video");
}
pass_opt_hook_point(p, "MAIN", &p->texture_offset);
@@ -2444,17 +2514,14 @@ static void pass_render_frame(struct gl_video *p)
pass_draw_osd(p, OSD_DRAW_SUB_ONLY, vpts, rect,
p->texture_w, p->texture_h, p->blend_subs_fbo.fbo, false);
pass_read_fbo(p, &p->blend_subs_fbo);
+ pass_describe(p, "blend subs");
}
pass_opt_hook_point(p, "SCALED", NULL);
-
- gl_timer_stop(p->render_timer);
}
static void pass_draw_to_screen(struct gl_video *p, int fbo)
{
- gl_timer_start(p->present_timer);
-
if (p->dumb_mode)
pass_render_frame_dumb(p, fbo);
@@ -2486,9 +2553,8 @@ static void pass_draw_to_screen(struct gl_video *p, int fbo)
pass_opt_hook_point(p, "OUTPUT", NULL);
pass_dither(p);
+ pass_describe(p, "output to screen");
finish_pass_direct(p, fbo, p->vp_w, p->vp_h, &p->dst_rect);
-
- gl_timer_stop(p->present_timer);
}
// Draws an interpolate frame to fbo, based on the frame timing in t
@@ -2498,6 +2564,8 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t,
int vp_w = p->dst_rect.x1 - p->dst_rect.x0,
vp_h = p->dst_rect.y1 - p->dst_rect.y0;
+ bool is_new = false;
+
// Reset the queue completely if this is a still image, to avoid any
// interpolation artifacts from surrounding frames when unpausing or
// framestepping
@@ -2507,6 +2575,8 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t,
// First of all, figure out if we have a frame available at all, and draw
// it manually + reset the queue if not
if (p->surfaces[p->surface_now].id == 0) {
+ is_new = true;
+ pass_info_reset(p, false);
if (!gl_video_upload_image(p, t->current, t->frame_id))
return;
pass_render_frame(p);
@@ -2569,6 +2639,8 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t,
continue;
if (f_id > p->surfaces[p->surface_idx].id) {
+ is_new = true;
+ pass_info_reset(p, false);
if (!gl_video_upload_image(p, f, f_id))
return;
pass_render_frame(p);
@@ -2601,6 +2673,9 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t,
p->osd_pts = p->surfaces[surface_now].pts;
// Finally, draw the right mix of frames to the screen.
+ if (!is_new)
+ pass_info_reset(p, true);
+ pass_describe(p, "interpolation");
if (!valid || t->still) {
// surface_now is guaranteed to be valid, so we can safely use it.
pass_read_fbo(p, &p->surfaces[surface_now].fbotex);
@@ -2667,16 +2742,6 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t,
p->frames_drawn += 1;
}
-static void timer_dbg(struct gl_video *p, const char *name, struct gl_timer *t)
-{
- if (gl_timer_sample_count(t) > 0) {
- MP_DBG(p, "%s time: last %dus avg %dus peak %dus\n", name,
- (int)gl_timer_last_us(t),
- (int)gl_timer_avg_us(t),
- (int)gl_timer_peak_us(t));
- }
-}
-
// (fbo==0 makes BindFramebuffer select the screen backbuffer)
void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, int fbo)
{
@@ -2757,6 +2822,7 @@ void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, int fbo)
if (is_new || !p->output_fbo_valid) {
p->output_fbo_valid = false;
+ pass_info_reset(p, false);
if (!gl_video_upload_image(p, frame->current, frame->frame_id))
goto done;
pass_render_frame(p);
@@ -2778,6 +2844,8 @@ void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, int fbo)
// "output fbo valid" and "output fbo needed" are equivalent
if (p->output_fbo_valid) {
+ pass_info_reset(p, true);
+ pass_describe(p, "redraw cached frame");
gl->BindFramebuffer(GL_READ_FRAMEBUFFER, p->output_fbo.fbo);
gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo);
struct mp_rect rc = p->dst_rect;
@@ -2785,11 +2853,14 @@ void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, int fbo)
rc.y1 = -p->vp_h - p->dst_rect.y0;
rc.y0 = -p->vp_h - p->dst_rect.y1;
}
+ gl_timer_start(p->blit_timer);
gl->BlitFramebuffer(rc.x0, rc.y0, rc.x1, rc.y1,
rc.x0, rc.y0, rc.x1, rc.y1,
GL_COLOR_BUFFER_BIT, GL_NEAREST);
+ gl_timer_stop(gl);
gl->BindFramebuffer(GL_READ_FRAMEBUFFER, 0);
gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
+ pass_record(p, gl_timer_measure(p->blit_timer));
}
}
}
@@ -2830,9 +2901,16 @@ done:
p->frames_rendered++;
// Report performance metrics
- timer_dbg(p, "upload", p->upload_timer);
- timer_dbg(p, "render", p->render_timer);
- timer_dbg(p, "present", p->present_timer);
+ for (int i = 0; i < PASS_INFO_MAX; i++) {
+ struct pass_info *pass = &p->pass[i];
+ if (pass->desc.len) {
+ MP_DBG(p, "pass '%.*s': last %dus avg %dus peak %dus\n",
+ BSTR_P(pass->desc),
+ (int)pass->perf.last/1000,
+ (int)pass->perf.avg/1000,
+ (int)pass->perf.peak/1000);
+ }
+ }
}
// vp_w/vp_h is the implicit size of the target framebuffer.
@@ -2857,22 +2935,22 @@ void gl_video_resize(struct gl_video *p, int vp_w, int vp_h,
p->hwdec->driver->overlay_adjust(p->hwdec, vp_w, abs(vp_h), src, dst);
}
-static struct voctrl_performance_entry gl_video_perfentry(struct gl_timer *t)
+static void frame_perf_data(struct pass_info pass[], struct mp_frame_perf *out)
{
- return (struct voctrl_performance_entry) {
- .last = gl_timer_last_us(t),
- .avg = gl_timer_avg_us(t),
- .peak = gl_timer_peak_us(t),
- };
+ for (int i = 0; i < PASS_INFO_MAX; i++) {
+ if (!pass[i].desc.len)
+ break;
+ out->perf[out->count] = pass[i].perf;
+ out->desc[out->count] = pass[i].desc.start;
+ out->count++;
+ }
}
-struct voctrl_performance_data gl_video_perfdata(struct gl_video *p)
+void gl_video_perfdata(struct gl_video *p, struct voctrl_performance_data *out)
{
- return (struct voctrl_performance_data) {
- .upload = gl_video_perfentry(p->upload_timer),
- .render = gl_video_perfentry(p->render_timer),
- .present = gl_video_perfentry(p->present_timer),
- };
+ *out = (struct voctrl_performance_data){0};
+ frame_perf_data(p->pass_fresh, &out->fresh);
+ frame_perf_data(p->pass_redraw, &out->redraw);
}
// This assumes nv12, with textures set to GL_NEAREST filtering.
@@ -2942,9 +3020,13 @@ static bool gl_video_upload_image(struct gl_video *p, struct mp_image *mpi,
if (p->hwdec_active) {
// Hardware decoding
struct gl_hwdec_frame gl_frame = {0};
+
+ pass_describe(p, "map frame (hwdec)");
gl_timer_start(p->upload_timer);
bool ok = p->hwdec->driver->map_frame(p->hwdec, vimg->mpi, &gl_frame) >= 0;
- gl_timer_stop(p->upload_timer);
+ gl_timer_stop(gl);
+ pass_record(p, gl_timer_measure(p->upload_timer));
+
vimg->hwdec_mapped = true;
if (ok) {
struct mp_image layout = {0};
@@ -2973,9 +3055,8 @@ static bool gl_video_upload_image(struct gl_video *p, struct mp_image *mpi,
// Software decoding
assert(mpi->num_planes == p->plane_count);
+ pass_describe(p, "upload frame (swdec)");
gl_timer_start(p->upload_timer);
-
-
for (int n = 0; n < p->plane_count; n++) {
struct texplane *plane = &vimg->planes[n];
@@ -2988,8 +3069,8 @@ static bool gl_video_upload_image(struct gl_video *p, struct mp_image *mpi,
0, 0, plane->w, plane->h);
gl->BindTexture(plane->gl_target, 0);
}
-
- gl_timer_stop(p->upload_timer);
+ gl_timer_stop(gl);
+ pass_record(p, gl_timer_measure(p->upload_timer));
return true;
@@ -3182,9 +3263,8 @@ static void init_gl(struct gl_video *p)
if (p->texture_16bit_depth > 0)
MP_VERBOSE(p, "16 bit texture depth: %d.\n", p->texture_16bit_depth);
- p->upload_timer = gl_timer_create(p->gl);
- p->render_timer = gl_timer_create(p->gl);
- p->present_timer = gl_timer_create(p->gl);
+ p->upload_timer = gl_timer_create(gl);
+ p->blit_timer = gl_timer_create(gl);
debug_check_gl(p, "after init_gl");
}
@@ -3205,8 +3285,11 @@ void gl_video_uninit(struct gl_video *p)
gl->DeleteTextures(1, &p->lut_3d_texture);
gl_timer_free(p->upload_timer);
- gl_timer_free(p->render_timer);
- gl_timer_free(p->present_timer);
+ gl_timer_free(p->blit_timer);
+ for (int i = 0; i < PASS_INFO_MAX; i++) {
+ talloc_free(p->pass_fresh[i].desc.start);
+ talloc_free(p->pass_redraw[i].desc.start);
+ }
mpgl_osd_destroy(p->osd);
diff --git a/video/out/opengl/video.h b/video/out/opengl/video.h
index 55fd18461a..8526cecb84 100644
--- a/video/out/opengl/video.h
+++ b/video/out/opengl/video.h
@@ -155,7 +155,7 @@ void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, int fbo);
void gl_video_resize(struct gl_video *p, int vp_w, int vp_h,
struct mp_rect *src, struct mp_rect *dst,
struct mp_osd_res *osd);
-struct voctrl_performance_data gl_video_perfdata(struct gl_video *p);
+void gl_video_perfdata(struct gl_video *p, struct voctrl_performance_data *out);
struct mp_csp_equalizer;
struct mp_csp_equalizer *gl_video_eq_ptr(struct gl_video *p);
void gl_video_eq_update(struct gl_video *p);
diff --git a/video/out/opengl/video_shaders.c b/video/out/opengl/video_shaders.c
index 982a26cf4b..9e54d33d41 100644
--- a/video/out/opengl/video_shaders.c
+++ b/video/out/opengl/video_shaders.c
@@ -701,7 +701,6 @@ void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts,
// Assumes the texture was hooked
void pass_sample_unsharp(struct gl_shader_cache *sc, float param) {
- GLSLF("// unsharp\n");
GLSLF("{\n");
GLSL(float st1 = 1.2;)
GLSL(vec4 p = HOOKED_tex(HOOKED_pos);)
diff --git a/video/out/vo.h b/video/out/vo.h
index c59e1b04c8..6dce8f6c2f 100644
--- a/video/out/vo.h
+++ b/video/out/vo.h
@@ -143,13 +143,31 @@ struct voctrl_playback_state {
};
// VOCTRL_PERFORMANCE_DATA
-struct voctrl_performance_entry {
- // Times are in microseconds
+#define PERF_SAMPLE_COUNT 256u
+
+struct mp_pass_perf {
+ // times are all in nanoseconds
uint64_t last, avg, peak;
+ // this is a ring buffer, indices are relative to index and modulo
+ // PERF_SAMPLE_COUNT
+ uint64_t *samples;
+ int count;
+ int index;
+};
+
+#define VO_PASS_PERF_MAX 128
+
+struct mp_frame_perf {
+ int count;
+ struct mp_pass_perf perf[VO_PASS_PERF_MAX];
+ // The owner of this struct does not have ownership over the names, and
+ // they may change at any time - so this struct should not be stored
+ // anywhere or the results reused
+ char *desc[VO_PASS_PERF_MAX];
};
struct voctrl_performance_data {
- struct voctrl_performance_entry upload, render, present;
+ struct mp_frame_perf fresh, redraw;
};
enum {
diff --git a/video/out/vo_opengl.c b/video/out/vo_opengl.c
index 9b3f944e21..f5b0bd37c4 100644
--- a/video/out/vo_opengl.c
+++ b/video/out/vo_opengl.c
@@ -301,7 +301,7 @@ static int control(struct vo *vo, uint32_t request, void *data)
vo->want_redraw = true;
return true;
case VOCTRL_PERFORMANCE_DATA:
- *(struct voctrl_performance_data *)data = gl_video_perfdata(p->renderer);
+ gl_video_perfdata(p->renderer, (struct voctrl_performance_data *)data);
return true;
}