summaryrefslogtreecommitdiffstats
path: root/video
diff options
context:
space:
mode:
Diffstat (limited to 'video')
-rw-r--r--video/out/opengl/user_shaders.c6
-rw-r--r--video/out/opengl/user_shaders.h1
-rw-r--r--video/out/opengl/utils.c61
-rw-r--r--video/out/opengl/utils.h10
-rw-r--r--video/out/opengl/video.c199
-rw-r--r--video/out/opengl/video.h2
-rw-r--r--video/out/opengl/video_shaders.c1
-rw-r--r--video/out/vo.h24
-rw-r--r--video/out/vo_opengl.c2
9 files changed, 206 insertions, 100 deletions
diff --git a/video/out/opengl/user_shaders.c b/video/out/opengl/user_shaders.c
index 7e1e5f4d12..427295b0ad 100644
--- a/video/out/opengl/user_shaders.c
+++ b/video/out/opengl/user_shaders.c
@@ -166,6 +166,7 @@ bool parse_user_shader_pass(struct mp_log *log, struct bstr *body,
return false;
*out = (struct gl_user_shader){
+ .desc = bstr0("(unknown)"),
.offset = identity_trans,
.width = {{ SZEXP_VAR_W, { .varname = bstr0("HOOKED") }}},
.height = {{ SZEXP_VAR_H, { .varname = bstr0("HOOKED") }}},
@@ -220,6 +221,11 @@ bool parse_user_shader_pass(struct mp_log *log, struct bstr *body,
continue;
}
+ if (bstr_eatstart0(&line, "DESC")) {
+ out->desc = bstr_strip(line);
+ continue;
+ }
+
if (bstr_eatstart0(&line, "OFFSET")) {
float ox, oy;
if (bstr_sscanf(line, "%f %f", &ox, &oy) != 2) {
diff --git a/video/out/opengl/user_shaders.h b/video/out/opengl/user_shaders.h
index fd6fc248f3..458e925bc4 100644
--- a/video/out/opengl/user_shaders.h
+++ b/video/out/opengl/user_shaders.h
@@ -60,6 +60,7 @@ struct gl_user_shader {
struct bstr bind_tex[SHADER_MAX_BINDS];
struct bstr save_tex;
struct bstr pass_body;
+ struct bstr desc;
struct gl_transform offset;
struct szexp width[MAX_SZEXP_SIZE];
struct szexp height[MAX_SZEXP_SIZE];
diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c
index 7e8680fff2..3615ff92d1 100644
--- a/video/out/opengl/utils.c
+++ b/video/out/opengl/utils.c
@@ -466,6 +466,7 @@ struct sc_entry {
int num_uniforms;
bstr frag;
bstr vert;
+ struct gl_timer *timer;
};
struct gl_shader_cache {
@@ -520,6 +521,7 @@ void gl_sc_reset(struct gl_shader_cache *sc)
GL *gl = sc->gl;
if (sc->needs_reset) {
+ gl_timer_stop(gl);
gl->UseProgram(0);
for (int n = 0; n < sc->num_uniforms; n++) {
@@ -552,6 +554,7 @@ static void sc_flush_cache(struct gl_shader_cache *sc)
talloc_free(e->vert.start);
talloc_free(e->frag.start);
talloc_free(e->uniforms);
+ gl_timer_free(e->timer);
}
sc->num_entries = 0;
}
@@ -1029,7 +1032,10 @@ static GLuint load_program(struct gl_shader_cache *sc, const char *vertex,
// 1. Unbind the program and all textures.
// 2. Reset the sc state and prepare for a new shader program. (All uniforms
// and fragment operations needed for the next program have to be re-added.)
-void gl_sc_generate(struct gl_shader_cache *sc)
+// The return value is a mp_pass_perf containing performance metrics for the
+// execution of the generated shader. (Note: execution is measured up until
+// the corresponding gl_sc_reset call)
+struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc)
{
GL *gl = sc->gl;
@@ -1137,6 +1143,7 @@ void gl_sc_generate(struct gl_shader_cache *sc)
*entry = (struct sc_entry){
.vert = bstrdup(NULL, *vert),
.frag = bstrdup(NULL, *frag),
+ .timer = gl_timer_create(gl),
};
}
// build vertex shader from vao and cache the locations of the uniform variables
@@ -1161,7 +1168,10 @@ void gl_sc_generate(struct gl_shader_cache *sc)
gl->ActiveTexture(GL_TEXTURE0);
+ gl_timer_start(entry->timer);
sc->needs_reset = true;
+
+ return gl_timer_measure(entry->timer);
}
// Maximum number of simultaneous query objects to keep around. Reducing this
@@ -1169,16 +1179,13 @@ void gl_sc_generate(struct gl_shader_cache *sc)
// available
#define QUERY_OBJECT_NUM 8
-// How many samples to keep around, for the sake of average and peak
-// calculations. This corresponds to a few seconds (exact time variable)
-#define QUERY_SAMPLE_SIZE 256u
-
struct gl_timer {
GL *gl;
GLuint query[QUERY_OBJECT_NUM];
int query_idx;
- GLuint64 samples[QUERY_SAMPLE_SIZE];
+ // these numbers are all in nanoseconds
+ uint64_t samples[PERF_SAMPLE_COUNT];
int sample_idx;
int sample_count;
@@ -1186,27 +1193,23 @@ struct gl_timer {
uint64_t peak;
};
-int gl_timer_sample_count(struct gl_timer *timer)
+struct mp_pass_perf gl_timer_measure(struct gl_timer *timer)
{
- return timer->sample_count;
-}
+ assert(timer);
+ struct mp_pass_perf res = {
+ .count = timer->sample_count,
+ .index = (timer->sample_idx - timer->sample_count) % PERF_SAMPLE_COUNT,
+ .peak = timer->peak,
+ .samples = timer->samples,
+ };
-uint64_t gl_timer_last_us(struct gl_timer *timer)
-{
- return timer->samples[(timer->sample_idx - 1) % QUERY_SAMPLE_SIZE] / 1000;
-}
+ res.last = timer->samples[(timer->sample_idx - 1) % PERF_SAMPLE_COUNT];
-uint64_t gl_timer_avg_us(struct gl_timer *timer)
-{
- if (timer->sample_count <= 0)
- return 0;
-
- return timer->avg_sum / timer->sample_count / 1000;
-}
+ if (timer->sample_count > 0) {
+ res.avg = timer->avg_sum / timer->sample_count;
+ }
-uint64_t gl_timer_peak_us(struct gl_timer *timer)
-{
- return timer->peak / 1000;
+ return res;
}
struct gl_timer *gl_timer_create(GL *gl)
@@ -1237,13 +1240,13 @@ void gl_timer_free(struct gl_timer *timer)
static void gl_timer_record(struct gl_timer *timer, GLuint64 new)
{
// Input res into the buffer and grab the previous value
- GLuint64 old = timer->samples[timer->sample_idx];
+ uint64_t old = timer->samples[timer->sample_idx];
timer->samples[timer->sample_idx++] = new;
- timer->sample_idx %= QUERY_SAMPLE_SIZE;
+ timer->sample_idx %= PERF_SAMPLE_COUNT;
// Update average and sum
timer->avg_sum = timer->avg_sum + new - old;
- timer->sample_count = MPMIN(timer->sample_count + 1, QUERY_SAMPLE_SIZE);
+ timer->sample_count = MPMIN(timer->sample_count + 1, PERF_SAMPLE_COUNT);
// Update peak if necessary
if (new >= timer->peak) {
@@ -1252,7 +1255,7 @@ static void gl_timer_record(struct gl_timer *timer, GLuint64 new)
// It's possible that the last peak was the value we just removed,
// if so we need to scan for the new peak
uint64_t peak = new;
- for (int i = 0; i < QUERY_SAMPLE_SIZE; i++)
+ for (int i = 0; i < PERF_SAMPLE_COUNT; i++)
peak = MPMAX(peak, timer->samples[i]);
timer->peak = peak;
}
@@ -1264,6 +1267,7 @@ static void gl_timer_record(struct gl_timer *timer, GLuint64 new)
// The caling code *MUST* ensure this
void gl_timer_start(struct gl_timer *timer)
{
+ assert(timer);
GL *gl = timer->gl;
if (!gl->BeginQuery)
return;
@@ -1283,9 +1287,8 @@ void gl_timer_start(struct gl_timer *timer)
gl->BeginQuery(GL_TIME_ELAPSED, id);
}
-void gl_timer_stop(struct gl_timer *timer)
+void gl_timer_stop(GL *gl)
{
- GL *gl = timer->gl;
if (gl->EndQuery)
gl->EndQuery(GL_TIME_ELAPSED);
}
diff --git a/video/out/opengl/utils.h b/video/out/opengl/utils.h
index 95eb1c4fea..92b1005c39 100644
--- a/video/out/opengl/utils.h
+++ b/video/out/opengl/utils.h
@@ -169,7 +169,7 @@ void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name,
bool transpose, GLfloat *v);
void gl_sc_set_vao(struct gl_shader_cache *sc, struct gl_vao *vao);
void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name);
-void gl_sc_generate(struct gl_shader_cache *sc);
+struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc);
void gl_sc_reset(struct gl_shader_cache *sc);
struct mpv_global;
void gl_sc_set_cache_dir(struct gl_shader_cache *sc, struct mpv_global *global,
@@ -180,12 +180,8 @@ struct gl_timer;
struct gl_timer *gl_timer_create(GL *gl);
void gl_timer_free(struct gl_timer *timer);
void gl_timer_start(struct gl_timer *timer);
-void gl_timer_stop(struct gl_timer *timer);
-
-int gl_timer_sample_count(struct gl_timer *timer);
-uint64_t gl_timer_last_us(struct gl_timer *timer);
-uint64_t gl_timer_avg_us(struct gl_timer *timer);
-uint64_t gl_timer_peak_us(struct gl_timer *timer);
+void gl_timer_stop(GL *gl);
+struct mp_pass_perf gl_timer_measure(struct gl_timer *timer);
#define NUM_PBO_BUFFERS 3
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index 59462b8038..9dd78cf335 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -111,6 +111,15 @@ enum plane_type {
PLANE_XYZ,
};
+static const char *plane_names[] = {
+ [PLANE_NONE] = "unknown",
+ [PLANE_RGB] = "rgb",
+ [PLANE_LUMA] = "luma",
+ [PLANE_CHROMA] = "chroma",
+ [PLANE_ALPHA] = "alpha",
+ [PLANE_XYZ] = "xyz",
+};
+
// A self-contained description of a source image which can be bound to a
// texture unit and sampled from. Contains metadata about how it's to be used
struct img_tex {
@@ -158,6 +167,13 @@ struct cached_file {
struct bstr body;
};
+struct pass_info {
+ struct bstr desc;
+ struct mp_pass_perf perf;
+};
+
+#define PASS_INFO_MAX (SHADER_MAX_HOOKS + 32)
+
struct gl_video {
GL *gl;
@@ -186,10 +202,6 @@ struct gl_video {
GLuint dither_texture;
int dither_size;
- struct gl_timer *upload_timer;
- struct gl_timer *render_timer;
- struct gl_timer *present_timer;
-
struct mp_image_params real_image_params; // configured format
struct mp_image_params image_params; // texture format (mind hwdec case)
struct gl_imgfmt_desc gl_format; // texture format
@@ -239,6 +251,14 @@ struct gl_video {
bool use_linear;
float user_gamma;
+ // pass info / metrics
+ struct pass_info pass_fresh[PASS_INFO_MAX];
+ struct pass_info pass_redraw[PASS_INFO_MAX];
+ struct pass_info *pass;
+ int pass_idx;
+ struct gl_timer *upload_timer;
+ struct gl_timer *blit_timer;
+
// hooks and saved textures
struct saved_tex saved_tex[SHADER_MAX_SAVED];
int saved_tex_num;
@@ -931,6 +951,43 @@ static void uninit_video(struct gl_video *p)
p->hwdec_active = false;
}
+static void pass_record(struct gl_video *p, struct mp_pass_perf perf)
+{
+ assert(p->pass_idx < PASS_INFO_MAX);
+ struct pass_info *pass = &p->pass[p->pass_idx];
+ pass->perf = perf;
+
+ if (pass->desc.len == 0)
+ bstr_xappend(p, &pass->desc, bstr0("(unknown)"));
+
+ p->pass_idx++;
+}
+
+static void pass_describe(struct gl_video *p, const char *textf, ...)
+{
+ assert(p->pass_idx < PASS_INFO_MAX);
+ struct pass_info *pass = &p->pass[p->pass_idx];
+
+ if (pass->desc.len > 0)
+ bstr_xappend(p, &pass->desc, bstr0(" + "));
+
+ va_list ap;
+ va_start(ap, textf);
+ bstr_xappend_vasprintf(p, &pass->desc, textf, ap);
+ va_end(ap);
+}
+
+static void pass_info_reset(struct gl_video *p, bool is_redraw)
+{
+ p->pass = is_redraw ? p->pass_redraw : p->pass_fresh;
+ p->pass_idx = 0;
+
+ for (int i = 0; i < PASS_INFO_MAX; i++) {
+ p->pass[i].desc.len = 0;
+ p->pass[i].perf = (struct mp_pass_perf){0};
+ }
+}
+
static void pass_prepare_src_tex(struct gl_video *p)
{
struct gl_shader_cache *sc = p->sc;
@@ -1008,7 +1065,7 @@ static void finish_pass_direct(struct gl_video *p, GLint fbo, int vp_w, int vp_h
{
GL *gl = p->gl;
pass_prepare_src_tex(p);
- gl_sc_generate(p->sc);
+ pass_record(p, gl_sc_generate(p->sc));
gl->BindFramebuffer(GL_FRAMEBUFFER, fbo);
render_pass_quad(p, vp_w, vp_h, dst);
gl->BindFramebuffer(GL_FRAMEBUFFER, 0);
@@ -1450,7 +1507,7 @@ static void pass_sample_separated(struct gl_video *p, struct img_tex src,
// First pass (scale only in the y dir)
src.transform = t_y;
sampler_prelude(p->sc, pass_bind(p, src));
- GLSLF("// pass 1\n");
+ GLSLF("// first pass\n");
pass_sample_separated_gen(p->sc, scaler, 0, 1);
GLSLF("color *= %f;\n", src.multiplier);
finish_pass_fbo(p, &scaler->sep_fbo, src.w, h, FBOTEX_FUZZY_H);
@@ -1458,8 +1515,8 @@ static void pass_sample_separated(struct gl_video *p, struct img_tex src,
// Second pass (scale only in the x dir)
src = img_tex_fbo(&scaler->sep_fbo, src.type, src.components);
src.transform = t_x;
+ pass_describe(p, "%s second pass", scaler->conf.kernel.name);
sampler_prelude(p->sc, pass_bind(p, src));
- GLSLF("// pass 2\n");
pass_sample_separated_gen(p->sc, scaler, 1, 0);
}
@@ -1475,6 +1532,17 @@ static void pass_sample(struct gl_video *p, struct img_tex tex,
{
reinit_scaler(p, scaler, conf, scale_factor, filter_sizes);
+ // Describe scaler
+ const char *scaler_opt[] = {
+ [SCALER_SCALE] = "scale",
+ [SCALER_DSCALE] = "dscale",
+ [SCALER_CSCALE] = "cscale",
+ [SCALER_TSCALE] = "tscale",
+ };
+
+ pass_describe(p, "%s=%s (%s)", scaler_opt[scaler->index],
+ scaler->conf.kernel.name, plane_names[tex.type]);
+
bool is_separated = scaler->kernel && !scaler->kernel->polar;
// Set up the transformation+prelude and bind the texture, for everything
@@ -1550,12 +1618,14 @@ static void pass_add_hooks(struct gl_video *p, struct tex_hook hook,
static void deband_hook(struct gl_video *p, struct img_tex tex,
struct gl_transform *trans, void *priv)
{
+ pass_describe(p, "debanding (%s)", plane_names[tex.type]);
pass_sample_deband(p->sc, p->opts.deband_opts, &p->lfg);
}
static void unsharp_hook(struct gl_video *p, struct img_tex tex,
struct gl_transform *trans, void *priv)
{
+ pass_describe(p, "unsharp masking");
GLSLF("#define tex HOOKED\n");
GLSLF("#define pos HOOKED_pos\n");
GLSLF("#define pt HOOKED_pt\n");
@@ -1620,8 +1690,10 @@ static void user_hook(struct gl_video *p, struct img_tex tex,
struct gl_user_shader *shader = priv;
assert(shader);
+ pass_describe(p, "user shader: %.*s (%s)", BSTR_P(shader->desc),
+ plane_names[tex.type]);
+
load_shader(p, shader->pass_body);
- GLSLF("// custom hook\n");
GLSLF("color = hook();\n");
// Make sure we at least create a legal FBO on failure, since it's better
@@ -1734,6 +1806,7 @@ static void pass_read_video(struct gl_video *p)
if (num > 0) {
GLSLF("// merging plane %d ... into %d\n", n, first);
copy_img_tex(p, &num, tex[n]);
+ pass_describe(p, "merging planes");
finish_pass_fbo(p, &p->merge_fbo[n], tex[n].w, tex[n].h, 0);
tex[first] = img_tex_fbo(&p->merge_fbo[n], tex[n].type, num);
tex[n] = (struct img_tex){0};
@@ -1745,8 +1818,8 @@ static void pass_read_video(struct gl_video *p)
for (int n = 0; n < 4; n++) {
if (gl_is_integer_format(tex[n].gl_format)) {
GLSLF("// use_integer fix for plane %d\n", n);
-
copy_img_tex(p, &(int){0}, tex[n]);
+ pass_describe(p, "use_integer fix");
finish_pass_fbo(p, &p->integer_fbo[n], tex[n].w, tex[n].h, 0);
tex[n] = img_tex_fbo(&p->integer_fbo[n], tex[n].type,
tex[n].components);
@@ -1913,7 +1986,7 @@ static void pass_convert_yuv(struct gl_video *p)
mp_csp_copy_equalizer_values(&cparams, &p->video_eq);
p->user_gamma = 1.0 / (cparams.gamma * p->opts.gamma);
- GLSLF("// color conversion\n");
+ pass_describe(p, "color conversion");
if (p->color_swizzle[0])
GLSLF("color = color.%s;\n", p->color_swizzle);
@@ -2292,12 +2365,12 @@ static void pass_draw_osd(struct gl_video *p, int draw_flags, double pts,
gl_sc_uniform_sampler(p->sc, "osdtex", GL_TEXTURE_2D, 0);
switch (fmt) {
case SUBBITMAP_RGBA: {
- GLSLF("// OSD (RGBA)\n");
+ pass_describe(p, "drawing osd (rgba)");
GLSL(color = texture(osdtex, texcoord).bgra;)
break;
}
case SUBBITMAP_LIBASS: {
- GLSLF("// OSD (libass)\n");
+ pass_describe(p, "drawing osd (libass)");
GLSL(color =
vec4(ass_color.rgb, ass_color.a * texture(osdtex, texcoord).r);)
break;
@@ -2317,7 +2390,7 @@ static void pass_draw_osd(struct gl_video *p, int draw_flags, double pts,
pass_colormanage(p, csp_srgb, true);
}
gl_sc_set_vao(p->sc, mpgl_osd_get_vao(p->osd));
- gl_sc_generate(p->sc);
+ pass_record(p, gl_sc_generate(p->sc));
mpgl_osd_draw_part(p->osd, vp_w, vp_h, n);
gl_sc_reset(p->sc);
}
@@ -2386,10 +2459,6 @@ static void pass_render_frame(struct gl_video *p)
if (p->dumb_mode)
return;
- // start the render timer here. it will continue to the end of this
- // function, to render the time needed to draw (excluding screen
- // presentation)
- gl_timer_start(p->render_timer);
p->use_linear = p->opts.linear_scaling || p->opts.sigmoid_upscaling;
pass_read_video(p);
@@ -2414,6 +2483,7 @@ static void pass_render_frame(struct gl_video *p)
rect.w, rect.h, p->blend_subs_fbo.fbo, false);
GLSL(color = texture(texture0, texcoord0);)
pass_read_fbo(p, &p->blend_subs_fbo);
+ pass_describe(p, "blend subs video");
}
pass_opt_hook_point(p, "MAIN", &p->texture_offset);
@@ -2444,17 +2514,14 @@ static void pass_render_frame(struct gl_video *p)
pass_draw_osd(p, OSD_DRAW_SUB_ONLY, vpts, rect,
p->texture_w, p->texture_h, p->blend_subs_fbo.fbo, false);
pass_read_fbo(p, &p->blend_subs_fbo);
+ pass_describe(p, "blend subs");
}
pass_opt_hook_point(p, "SCALED", NULL);
-
- gl_timer_stop(p->render_timer);
}
static void pass_draw_to_screen(struct gl_video *p, int fbo)
{
- gl_timer_start(p->present_timer);
-
if (p->dumb_mode)
pass_render_frame_dumb(p, fbo);
@@ -2486,9 +2553,8 @@ static void pass_draw_to_screen(struct gl_video *p, int fbo)
pass_opt_hook_point(p, "OUTPUT", NULL);
pass_dither(p);
+ pass_describe(p, "output to screen");
finish_pass_direct(p, fbo, p->vp_w, p->vp_h, &p->dst_rect);
-
- gl_timer_stop(p->present_timer);
}
// Draws an interpolate frame to fbo, based on the frame timing in t
@@ -2498,6 +2564,8 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t,
int vp_w = p->dst_rect.x1 - p->dst_rect.x0,
vp_h = p->dst_rect.y1 - p->dst_rect.y0;
+ bool is_new = false;
+
// Reset the queue completely if this is a still image, to avoid any
// interpolation artifacts from surrounding frames when unpausing or
// framestepping
@@ -2507,6 +2575,8 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t,
// First of all, figure out if we have a frame available at all, and draw
// it manually + reset the queue if not
if (p->surfaces[p->surface_now].id == 0) {
+ is_new = true;
+ pass_info_reset(p, false);
if (!gl_video_upload_image(p, t->current, t->frame_id))
return;
pass_render_frame(p);
@@ -2569,6 +2639,8 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t,
continue;
if (f_id > p->surfaces[p->surface_idx].id) {
+ is_new = true;
+ pass_info_reset(p, false);
if (!gl_video_upload_image(p, f, f_id))
return;
pass_render_frame(p);
@@ -2601,6 +2673,9 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t,
p->osd_pts = p->surfaces[surface_now].pts;
// Finally, draw the right mix of frames to the screen.
+ if (!is_new)
+ pass_info_reset(p, true);
+ pass_describe(p, "interpolation");
if (!valid || t->still) {
// surface_now is guaranteed to be valid, so we can safely use it.
pass_read_fbo(p, &p->surfaces[surface_now].fbotex);
@@ -2667,16 +2742,6 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t,
p->frames_drawn += 1;
}
-static void timer_dbg(struct gl_video *p, const char *name, struct gl_timer *t)
-{
- if (gl_timer_sample_count(t) > 0) {
- MP_DBG(p, "%s time: last %dus avg %dus peak %dus\n", name,
- (int)gl_timer_last_us(t),
- (int)gl_timer_avg_us(t),
- (int)gl_timer_peak_us(t));
- }
-}
-
// (fbo==0 makes BindFramebuffer select the screen backbuffer)
void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, int fbo)
{
@@ -2757,6 +2822,7 @@ void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, int fbo)
if (is_new || !p->output_fbo_valid) {
p->output_fbo_valid = false;
+ pass_info_reset(p, false);
if (!gl_video_upload_image(p, frame->current, frame->frame_id))
goto done;
pass_render_frame(p);
@@ -2778,6 +2844,8 @@ void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, int fbo)
// "output fbo valid" and "output fbo needed" are equivalent
if (p->output_fbo_valid) {
+ pass_info_reset(p, true);
+ pass_describe(p, "redraw cached frame");
gl->BindFramebuffer(GL_READ_FRAMEBUFFER, p->output_fbo.fbo);
gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo);
struct mp_rect rc = p->dst_rect;
@@ -2785,11 +2853,14 @@ void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, int fbo)
rc.y1 = -p->vp_h - p->dst_rect.y0;
rc.y0 = -p->vp_h - p->dst_rect.y1;
}
+ gl_timer_start(p->blit_timer);
gl->BlitFramebuffer(rc.x0, rc.y0, rc.x1, rc.y1,
rc.x0, rc.y0, rc.x1, rc.y1,
GL_COLOR_BUFFER_BIT, GL_NEAREST);
+ gl_timer_stop(gl);
gl->BindFramebuffer(GL_READ_FRAMEBUFFER, 0);
gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
+ pass_record(p, gl_timer_measure(p->blit_timer));
}
}
}
@@ -2830,9 +2901,16 @@ done:
p->frames_rendered++;
// Report performance metrics
- timer_dbg(p, "upload", p->upload_timer);
- timer_dbg(p, "render", p->render_timer);
- timer_dbg(p, "present", p->present_timer);
+ for (int i = 0; i < PASS_INFO_MAX; i++) {
+ struct pass_info *pass = &p->pass[i];
+ if (pass->desc.len) {
+ MP_DBG(p, "pass '%.*s': last %dus avg %dus peak %dus\n",
+ BSTR_P(pass->desc),
+ (int)pass->perf.last/1000,
+ (int)pass->perf.avg/1000,
+ (int)pass->perf.peak/1000);
+ }
+ }
}
// vp_w/vp_h is the implicit size of the target framebuffer.
@@ -2857,22 +2935,22 @@ void gl_video_resize(struct gl_video *p, int vp_w, int vp_h,
p->hwdec->driver->overlay_adjust(p->hwdec, vp_w, abs(vp_h), src, dst);
}
-static struct voctrl_performance_entry gl_video_perfentry(struct gl_timer *t)
+static void frame_perf_data(struct pass_info pass[], struct mp_frame_perf *out)
{
- return (struct voctrl_performance_entry) {
- .last = gl_timer_last_us(t),
- .avg = gl_timer_avg_us(t),
- .peak = gl_timer_peak_us(t),
- };
+ for (int i = 0; i < PASS_INFO_MAX; i++) {
+ if (!pass[i].desc.len)
+ break;
+ out->perf[out->count] = pass[i].perf;
+ out->desc[out->count] = pass[i].desc.start;
+ out->count++;
+ }
}
-struct voctrl_performance_data gl_video_perfdata(struct gl_video *p)
+void gl_video_perfdata(struct gl_video *p, struct voctrl_performance_data *out)
{
- return (struct voctrl_performance_data) {
- .upload = gl_video_perfentry(p->upload_timer),
- .render = gl_video_perfentry(p->render_timer),
- .present = gl_video_perfentry(p->present_timer),
- };
+ *out = (struct voctrl_performance_data){0};
+ frame_perf_data(p->pass_fresh, &out->fresh);
+ frame_perf_data(p->pass_redraw, &out->redraw);
}
// This assumes nv12, with textures set to GL_NEAREST filtering.
@@ -2942,9 +3020,13 @@ static bool gl_video_upload_image(struct gl_video *p, struct mp_image *mpi,
if (p->hwdec_active) {
// Hardware decoding
struct gl_hwdec_frame gl_frame = {0};
+
+ pass_describe(p, "map frame (hwdec)");
gl_timer_start(p->upload_timer);
bool ok = p->hwdec->driver->map_frame(p->hwdec, vimg->mpi, &gl_frame) >= 0;
- gl_timer_stop(p->upload_timer);
+ gl_timer_stop(gl);
+ pass_record(p, gl_timer_measure(p->upload_timer));
+
vimg->hwdec_mapped = true;
if (ok) {
struct mp_image layout = {0};
@@ -2973,9 +3055,8 @@ static bool gl_video_upload_image(struct gl_video *p, struct mp_image *mpi,
// Software decoding
assert(mpi->num_planes == p->plane_count);
+ pass_describe(p, "upload frame (swdec)");
gl_timer_start(p->upload_timer);
-
-
for (int n = 0; n < p->plane_count; n++) {
struct texplane *plane = &vimg->planes[n];
@@ -2988,8 +3069,8 @@ static bool gl_video_upload_image(struct gl_video *p, struct mp_image *mpi,
0, 0, plane->w, plane->h);
gl->BindTexture(plane->gl_target, 0);
}
-
- gl_timer_stop(p->upload_timer);
+ gl_timer_stop(gl);
+ pass_record(p, gl_timer_measure(p->upload_timer));
return true;
@@ -3182,9 +3263,8 @@ static void init_gl(struct gl_video *p)
if (p->texture_16bit_depth > 0)
MP_VERBOSE(p, "16 bit texture depth: %d.\n", p->texture_16bit_depth);
- p->upload_timer = gl_timer_create(p->gl);
- p->render_timer = gl_timer_create(p->gl);
- p->present_timer = gl_timer_create(p->gl);
+ p->upload_timer = gl_timer_create(gl);
+ p->blit_timer = gl_timer_create(gl);
debug_check_gl(p, "after init_gl");
}
@@ -3205,8 +3285,11 @@ void gl_video_uninit(struct gl_video *p)
gl->DeleteTextures(1, &p->lut_3d_texture);
gl_timer_free(p->upload_timer);
- gl_timer_free(p->render_timer);
- gl_timer_free(p->present_timer);
+ gl_timer_free(p->blit_timer);
+ for (int i = 0; i < PASS_INFO_MAX; i++) {
+ talloc_free(p->pass_fresh[i].desc.start);
+ talloc_free(p->pass_redraw[i].desc.start);
+ }
mpgl_osd_destroy(p->osd);
diff --git a/video/out/opengl/video.h b/video/out/opengl/video.h
index 55fd18461a..8526cecb84 100644
--- a/video/out/opengl/video.h
+++ b/video/out/opengl/video.h
@@ -155,7 +155,7 @@ void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, int fbo);
void gl_video_resize(struct gl_video *p, int vp_w, int vp_h,
struct mp_rect *src, struct mp_rect *dst,
struct mp_osd_res *osd);
-struct voctrl_performance_data gl_video_perfdata(struct gl_video *p);
+void gl_video_perfdata(struct gl_video *p, struct voctrl_performance_data *out);
struct mp_csp_equalizer;
struct mp_csp_equalizer *gl_video_eq_ptr(struct gl_video *p);
void gl_video_eq_update(struct gl_video *p);
diff --git a/video/out/opengl/video_shaders.c b/video/out/opengl/video_shaders.c
index 982a26cf4b..9e54d33d41 100644
--- a/video/out/opengl/video_shaders.c
+++ b/video/out/opengl/video_shaders.c
@@ -701,7 +701,6 @@ void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts,
// Assumes the texture was hooked
void pass_sample_unsharp(struct gl_shader_cache *sc, float param) {
- GLSLF("// unsharp\n");
GLSLF("{\n");
GLSL(float st1 = 1.2;)
GLSL(vec4 p = HOOKED_tex(HOOKED_pos);)
diff --git a/video/out/vo.h b/video/out/vo.h
index c59e1b04c8..6dce8f6c2f 100644
--- a/video/out/vo.h
+++ b/video/out/vo.h
@@ -143,13 +143,31 @@ struct voctrl_playback_state {
};
// VOCTRL_PERFORMANCE_DATA
-struct voctrl_performance_entry {
- // Times are in microseconds
+#define PERF_SAMPLE_COUNT 256u
+
+struct mp_pass_perf {
+ // times are all in nanoseconds
uint64_t last, avg, peak;
+ // this is a ring buffer, indices are relative to index and modulo
+ // PERF_SAMPLE_COUNT
+ uint64_t *samples;
+ int count;
+ int index;
+};
+
+#define VO_PASS_PERF_MAX 128
+
+struct mp_frame_perf {
+ int count;
+ struct mp_pass_perf perf[VO_PASS_PERF_MAX];
+ // The owner of this struct does not have ownership over the names, and
+ // they may change at any time - so this struct should not be stored
+ // anywhere or the results reused
+ char *desc[VO_PASS_PERF_MAX];
};
struct voctrl_performance_data {
- struct voctrl_performance_entry upload, render, present;
+ struct mp_frame_perf fresh, redraw;
};
enum {
diff --git a/video/out/vo_opengl.c b/video/out/vo_opengl.c
index 9b3f944e21..f5b0bd37c4 100644
--- a/video/out/vo_opengl.c
+++ b/video/out/vo_opengl.c
@@ -301,7 +301,7 @@ static int control(struct vo *vo, uint32_t request, void *data)
vo->want_redraw = true;
return true;
case VOCTRL_PERFORMANCE_DATA:
- *(struct voctrl_performance_data *)data = gl_video_perfdata(p->renderer);
+ gl_video_perfdata(p->renderer, (struct voctrl_performance_data *)data);
return true;
}