diff options
Diffstat (limited to 'video')
-rw-r--r-- | video/out/opengl/user_shaders.c | 6 | ||||
-rw-r--r-- | video/out/opengl/user_shaders.h | 1 | ||||
-rw-r--r-- | video/out/opengl/utils.c | 61 | ||||
-rw-r--r-- | video/out/opengl/utils.h | 10 | ||||
-rw-r--r-- | video/out/opengl/video.c | 199 | ||||
-rw-r--r-- | video/out/opengl/video.h | 2 | ||||
-rw-r--r-- | video/out/opengl/video_shaders.c | 1 | ||||
-rw-r--r-- | video/out/vo.h | 24 | ||||
-rw-r--r-- | video/out/vo_opengl.c | 2 |
9 files changed, 206 insertions, 100 deletions
diff --git a/video/out/opengl/user_shaders.c b/video/out/opengl/user_shaders.c index 7e1e5f4d12..427295b0ad 100644 --- a/video/out/opengl/user_shaders.c +++ b/video/out/opengl/user_shaders.c @@ -166,6 +166,7 @@ bool parse_user_shader_pass(struct mp_log *log, struct bstr *body, return false; *out = (struct gl_user_shader){ + .desc = bstr0("(unknown)"), .offset = identity_trans, .width = {{ SZEXP_VAR_W, { .varname = bstr0("HOOKED") }}}, .height = {{ SZEXP_VAR_H, { .varname = bstr0("HOOKED") }}}, @@ -220,6 +221,11 @@ bool parse_user_shader_pass(struct mp_log *log, struct bstr *body, continue; } + if (bstr_eatstart0(&line, "DESC")) { + out->desc = bstr_strip(line); + continue; + } + if (bstr_eatstart0(&line, "OFFSET")) { float ox, oy; if (bstr_sscanf(line, "%f %f", &ox, &oy) != 2) { diff --git a/video/out/opengl/user_shaders.h b/video/out/opengl/user_shaders.h index fd6fc248f3..458e925bc4 100644 --- a/video/out/opengl/user_shaders.h +++ b/video/out/opengl/user_shaders.h @@ -60,6 +60,7 @@ struct gl_user_shader { struct bstr bind_tex[SHADER_MAX_BINDS]; struct bstr save_tex; struct bstr pass_body; + struct bstr desc; struct gl_transform offset; struct szexp width[MAX_SZEXP_SIZE]; struct szexp height[MAX_SZEXP_SIZE]; diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c index 7e8680fff2..3615ff92d1 100644 --- a/video/out/opengl/utils.c +++ b/video/out/opengl/utils.c @@ -466,6 +466,7 @@ struct sc_entry { int num_uniforms; bstr frag; bstr vert; + struct gl_timer *timer; }; struct gl_shader_cache { @@ -520,6 +521,7 @@ void gl_sc_reset(struct gl_shader_cache *sc) GL *gl = sc->gl; if (sc->needs_reset) { + gl_timer_stop(gl); gl->UseProgram(0); for (int n = 0; n < sc->num_uniforms; n++) { @@ -552,6 +554,7 @@ static void sc_flush_cache(struct gl_shader_cache *sc) talloc_free(e->vert.start); talloc_free(e->frag.start); talloc_free(e->uniforms); + gl_timer_free(e->timer); } sc->num_entries = 0; } @@ -1029,7 +1032,10 @@ static GLuint load_program(struct gl_shader_cache *sc, const char *vertex, // 1. Unbind the program and all textures. // 2. Reset the sc state and prepare for a new shader program. (All uniforms // and fragment operations needed for the next program have to be re-added.) -void gl_sc_generate(struct gl_shader_cache *sc) +// The return value is a mp_pass_perf containing performance metrics for the +// execution of the generated shader. (Note: execution is measured up until +// the corresponding gl_sc_reset call) +struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc) { GL *gl = sc->gl; @@ -1137,6 +1143,7 @@ void gl_sc_generate(struct gl_shader_cache *sc) *entry = (struct sc_entry){ .vert = bstrdup(NULL, *vert), .frag = bstrdup(NULL, *frag), + .timer = gl_timer_create(gl), }; } // build vertex shader from vao and cache the locations of the uniform variables @@ -1161,7 +1168,10 @@ void gl_sc_generate(struct gl_shader_cache *sc) gl->ActiveTexture(GL_TEXTURE0); + gl_timer_start(entry->timer); sc->needs_reset = true; + + return gl_timer_measure(entry->timer); } // Maximum number of simultaneous query objects to keep around. Reducing this @@ -1169,16 +1179,13 @@ void gl_sc_generate(struct gl_shader_cache *sc) // available #define QUERY_OBJECT_NUM 8 -// How many samples to keep around, for the sake of average and peak -// calculations. This corresponds to a few seconds (exact time variable) -#define QUERY_SAMPLE_SIZE 256u - struct gl_timer { GL *gl; GLuint query[QUERY_OBJECT_NUM]; int query_idx; - GLuint64 samples[QUERY_SAMPLE_SIZE]; + // these numbers are all in nanoseconds + uint64_t samples[PERF_SAMPLE_COUNT]; int sample_idx; int sample_count; @@ -1186,27 +1193,23 @@ struct gl_timer { uint64_t peak; }; -int gl_timer_sample_count(struct gl_timer *timer) +struct mp_pass_perf gl_timer_measure(struct gl_timer *timer) { - return timer->sample_count; -} + assert(timer); + struct mp_pass_perf res = { + .count = timer->sample_count, + .index = (timer->sample_idx - timer->sample_count) % PERF_SAMPLE_COUNT, + .peak = timer->peak, + .samples = timer->samples, + }; -uint64_t gl_timer_last_us(struct gl_timer *timer) -{ - return timer->samples[(timer->sample_idx - 1) % QUERY_SAMPLE_SIZE] / 1000; -} + res.last = timer->samples[(timer->sample_idx - 1) % PERF_SAMPLE_COUNT]; -uint64_t gl_timer_avg_us(struct gl_timer *timer) -{ - if (timer->sample_count <= 0) - return 0; - - return timer->avg_sum / timer->sample_count / 1000; -} + if (timer->sample_count > 0) { + res.avg = timer->avg_sum / timer->sample_count; + } -uint64_t gl_timer_peak_us(struct gl_timer *timer) -{ - return timer->peak / 1000; + return res; } struct gl_timer *gl_timer_create(GL *gl) @@ -1237,13 +1240,13 @@ void gl_timer_free(struct gl_timer *timer) static void gl_timer_record(struct gl_timer *timer, GLuint64 new) { // Input res into the buffer and grab the previous value - GLuint64 old = timer->samples[timer->sample_idx]; + uint64_t old = timer->samples[timer->sample_idx]; timer->samples[timer->sample_idx++] = new; - timer->sample_idx %= QUERY_SAMPLE_SIZE; + timer->sample_idx %= PERF_SAMPLE_COUNT; // Update average and sum timer->avg_sum = timer->avg_sum + new - old; - timer->sample_count = MPMIN(timer->sample_count + 1, QUERY_SAMPLE_SIZE); + timer->sample_count = MPMIN(timer->sample_count + 1, PERF_SAMPLE_COUNT); // Update peak if necessary if (new >= timer->peak) { @@ -1252,7 +1255,7 @@ static void gl_timer_record(struct gl_timer *timer, GLuint64 new) // It's possible that the last peak was the value we just removed, // if so we need to scan for the new peak uint64_t peak = new; - for (int i = 0; i < QUERY_SAMPLE_SIZE; i++) + for (int i = 0; i < PERF_SAMPLE_COUNT; i++) peak = MPMAX(peak, timer->samples[i]); timer->peak = peak; } @@ -1264,6 +1267,7 @@ static void gl_timer_record(struct gl_timer *timer, GLuint64 new) // The caling code *MUST* ensure this void gl_timer_start(struct gl_timer *timer) { + assert(timer); GL *gl = timer->gl; if (!gl->BeginQuery) return; @@ -1283,9 +1287,8 @@ void gl_timer_start(struct gl_timer *timer) gl->BeginQuery(GL_TIME_ELAPSED, id); } -void gl_timer_stop(struct gl_timer *timer) +void gl_timer_stop(GL *gl) { - GL *gl = timer->gl; if (gl->EndQuery) gl->EndQuery(GL_TIME_ELAPSED); } diff --git a/video/out/opengl/utils.h b/video/out/opengl/utils.h index 95eb1c4fea..92b1005c39 100644 --- a/video/out/opengl/utils.h +++ b/video/out/opengl/utils.h @@ -169,7 +169,7 @@ void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name, bool transpose, GLfloat *v); void gl_sc_set_vao(struct gl_shader_cache *sc, struct gl_vao *vao); void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name); -void gl_sc_generate(struct gl_shader_cache *sc); +struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc); void gl_sc_reset(struct gl_shader_cache *sc); struct mpv_global; void gl_sc_set_cache_dir(struct gl_shader_cache *sc, struct mpv_global *global, @@ -180,12 +180,8 @@ struct gl_timer; struct gl_timer *gl_timer_create(GL *gl); void gl_timer_free(struct gl_timer *timer); void gl_timer_start(struct gl_timer *timer); -void gl_timer_stop(struct gl_timer *timer); - -int gl_timer_sample_count(struct gl_timer *timer); -uint64_t gl_timer_last_us(struct gl_timer *timer); -uint64_t gl_timer_avg_us(struct gl_timer *timer); -uint64_t gl_timer_peak_us(struct gl_timer *timer); +void gl_timer_stop(GL *gl); +struct mp_pass_perf gl_timer_measure(struct gl_timer *timer); #define NUM_PBO_BUFFERS 3 diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c index 59462b8038..9dd78cf335 100644 --- a/video/out/opengl/video.c +++ b/video/out/opengl/video.c @@ -111,6 +111,15 @@ enum plane_type { PLANE_XYZ, }; +static const char *plane_names[] = { + [PLANE_NONE] = "unknown", + [PLANE_RGB] = "rgb", + [PLANE_LUMA] = "luma", + [PLANE_CHROMA] = "chroma", + [PLANE_ALPHA] = "alpha", + [PLANE_XYZ] = "xyz", +}; + // A self-contained description of a source image which can be bound to a // texture unit and sampled from. Contains metadata about how it's to be used struct img_tex { @@ -158,6 +167,13 @@ struct cached_file { struct bstr body; }; +struct pass_info { + struct bstr desc; + struct mp_pass_perf perf; +}; + +#define PASS_INFO_MAX (SHADER_MAX_HOOKS + 32) + struct gl_video { GL *gl; @@ -186,10 +202,6 @@ struct gl_video { GLuint dither_texture; int dither_size; - struct gl_timer *upload_timer; - struct gl_timer *render_timer; - struct gl_timer *present_timer; - struct mp_image_params real_image_params; // configured format struct mp_image_params image_params; // texture format (mind hwdec case) struct gl_imgfmt_desc gl_format; // texture format @@ -239,6 +251,14 @@ struct gl_video { bool use_linear; float user_gamma; + // pass info / metrics + struct pass_info pass_fresh[PASS_INFO_MAX]; + struct pass_info pass_redraw[PASS_INFO_MAX]; + struct pass_info *pass; + int pass_idx; + struct gl_timer *upload_timer; + struct gl_timer *blit_timer; + // hooks and saved textures struct saved_tex saved_tex[SHADER_MAX_SAVED]; int saved_tex_num; @@ -931,6 +951,43 @@ static void uninit_video(struct gl_video *p) p->hwdec_active = false; } +static void pass_record(struct gl_video *p, struct mp_pass_perf perf) +{ + assert(p->pass_idx < PASS_INFO_MAX); + struct pass_info *pass = &p->pass[p->pass_idx]; + pass->perf = perf; + + if (pass->desc.len == 0) + bstr_xappend(p, &pass->desc, bstr0("(unknown)")); + + p->pass_idx++; +} + +static void pass_describe(struct gl_video *p, const char *textf, ...) +{ + assert(p->pass_idx < PASS_INFO_MAX); + struct pass_info *pass = &p->pass[p->pass_idx]; + + if (pass->desc.len > 0) + bstr_xappend(p, &pass->desc, bstr0(" + ")); + + va_list ap; + va_start(ap, textf); + bstr_xappend_vasprintf(p, &pass->desc, textf, ap); + va_end(ap); +} + +static void pass_info_reset(struct gl_video *p, bool is_redraw) +{ + p->pass = is_redraw ? p->pass_redraw : p->pass_fresh; + p->pass_idx = 0; + + for (int i = 0; i < PASS_INFO_MAX; i++) { + p->pass[i].desc.len = 0; + p->pass[i].perf = (struct mp_pass_perf){0}; + } +} + static void pass_prepare_src_tex(struct gl_video *p) { struct gl_shader_cache *sc = p->sc; @@ -1008,7 +1065,7 @@ static void finish_pass_direct(struct gl_video *p, GLint fbo, int vp_w, int vp_h { GL *gl = p->gl; pass_prepare_src_tex(p); - gl_sc_generate(p->sc); + pass_record(p, gl_sc_generate(p->sc)); gl->BindFramebuffer(GL_FRAMEBUFFER, fbo); render_pass_quad(p, vp_w, vp_h, dst); gl->BindFramebuffer(GL_FRAMEBUFFER, 0); @@ -1450,7 +1507,7 @@ static void pass_sample_separated(struct gl_video *p, struct img_tex src, // First pass (scale only in the y dir) src.transform = t_y; sampler_prelude(p->sc, pass_bind(p, src)); - GLSLF("// pass 1\n"); + GLSLF("// first pass\n"); pass_sample_separated_gen(p->sc, scaler, 0, 1); GLSLF("color *= %f;\n", src.multiplier); finish_pass_fbo(p, &scaler->sep_fbo, src.w, h, FBOTEX_FUZZY_H); @@ -1458,8 +1515,8 @@ static void pass_sample_separated(struct gl_video *p, struct img_tex src, // Second pass (scale only in the x dir) src = img_tex_fbo(&scaler->sep_fbo, src.type, src.components); src.transform = t_x; + pass_describe(p, "%s second pass", scaler->conf.kernel.name); sampler_prelude(p->sc, pass_bind(p, src)); - GLSLF("// pass 2\n"); pass_sample_separated_gen(p->sc, scaler, 1, 0); } @@ -1475,6 +1532,17 @@ static void pass_sample(struct gl_video *p, struct img_tex tex, { reinit_scaler(p, scaler, conf, scale_factor, filter_sizes); + // Describe scaler + const char *scaler_opt[] = { + [SCALER_SCALE] = "scale", + [SCALER_DSCALE] = "dscale", + [SCALER_CSCALE] = "cscale", + [SCALER_TSCALE] = "tscale", + }; + + pass_describe(p, "%s=%s (%s)", scaler_opt[scaler->index], + scaler->conf.kernel.name, plane_names[tex.type]); + bool is_separated = scaler->kernel && !scaler->kernel->polar; // Set up the transformation+prelude and bind the texture, for everything @@ -1550,12 +1618,14 @@ static void pass_add_hooks(struct gl_video *p, struct tex_hook hook, static void deband_hook(struct gl_video *p, struct img_tex tex, struct gl_transform *trans, void *priv) { + pass_describe(p, "debanding (%s)", plane_names[tex.type]); pass_sample_deband(p->sc, p->opts.deband_opts, &p->lfg); } static void unsharp_hook(struct gl_video *p, struct img_tex tex, struct gl_transform *trans, void *priv) { + pass_describe(p, "unsharp masking"); GLSLF("#define tex HOOKED\n"); GLSLF("#define pos HOOKED_pos\n"); GLSLF("#define pt HOOKED_pt\n"); @@ -1620,8 +1690,10 @@ static void user_hook(struct gl_video *p, struct img_tex tex, struct gl_user_shader *shader = priv; assert(shader); + pass_describe(p, "user shader: %.*s (%s)", BSTR_P(shader->desc), + plane_names[tex.type]); + load_shader(p, shader->pass_body); - GLSLF("// custom hook\n"); GLSLF("color = hook();\n"); // Make sure we at least create a legal FBO on failure, since it's better @@ -1734,6 +1806,7 @@ static void pass_read_video(struct gl_video *p) if (num > 0) { GLSLF("// merging plane %d ... into %d\n", n, first); copy_img_tex(p, &num, tex[n]); + pass_describe(p, "merging planes"); finish_pass_fbo(p, &p->merge_fbo[n], tex[n].w, tex[n].h, 0); tex[first] = img_tex_fbo(&p->merge_fbo[n], tex[n].type, num); tex[n] = (struct img_tex){0}; @@ -1745,8 +1818,8 @@ static void pass_read_video(struct gl_video *p) for (int n = 0; n < 4; n++) { if (gl_is_integer_format(tex[n].gl_format)) { GLSLF("// use_integer fix for plane %d\n", n); - copy_img_tex(p, &(int){0}, tex[n]); + pass_describe(p, "use_integer fix"); finish_pass_fbo(p, &p->integer_fbo[n], tex[n].w, tex[n].h, 0); tex[n] = img_tex_fbo(&p->integer_fbo[n], tex[n].type, tex[n].components); @@ -1913,7 +1986,7 @@ static void pass_convert_yuv(struct gl_video *p) mp_csp_copy_equalizer_values(&cparams, &p->video_eq); p->user_gamma = 1.0 / (cparams.gamma * p->opts.gamma); - GLSLF("// color conversion\n"); + pass_describe(p, "color conversion"); if (p->color_swizzle[0]) GLSLF("color = color.%s;\n", p->color_swizzle); @@ -2292,12 +2365,12 @@ static void pass_draw_osd(struct gl_video *p, int draw_flags, double pts, gl_sc_uniform_sampler(p->sc, "osdtex", GL_TEXTURE_2D, 0); switch (fmt) { case SUBBITMAP_RGBA: { - GLSLF("// OSD (RGBA)\n"); + pass_describe(p, "drawing osd (rgba)"); GLSL(color = texture(osdtex, texcoord).bgra;) break; } case SUBBITMAP_LIBASS: { - GLSLF("// OSD (libass)\n"); + pass_describe(p, "drawing osd (libass)"); GLSL(color = vec4(ass_color.rgb, ass_color.a * texture(osdtex, texcoord).r);) break; @@ -2317,7 +2390,7 @@ static void pass_draw_osd(struct gl_video *p, int draw_flags, double pts, pass_colormanage(p, csp_srgb, true); } gl_sc_set_vao(p->sc, mpgl_osd_get_vao(p->osd)); - gl_sc_generate(p->sc); + pass_record(p, gl_sc_generate(p->sc)); mpgl_osd_draw_part(p->osd, vp_w, vp_h, n); gl_sc_reset(p->sc); } @@ -2386,10 +2459,6 @@ static void pass_render_frame(struct gl_video *p) if (p->dumb_mode) return; - // start the render timer here. it will continue to the end of this - // function, to render the time needed to draw (excluding screen - // presentation) - gl_timer_start(p->render_timer); p->use_linear = p->opts.linear_scaling || p->opts.sigmoid_upscaling; pass_read_video(p); @@ -2414,6 +2483,7 @@ static void pass_render_frame(struct gl_video *p) rect.w, rect.h, p->blend_subs_fbo.fbo, false); GLSL(color = texture(texture0, texcoord0);) pass_read_fbo(p, &p->blend_subs_fbo); + pass_describe(p, "blend subs video"); } pass_opt_hook_point(p, "MAIN", &p->texture_offset); @@ -2444,17 +2514,14 @@ static void pass_render_frame(struct gl_video *p) pass_draw_osd(p, OSD_DRAW_SUB_ONLY, vpts, rect, p->texture_w, p->texture_h, p->blend_subs_fbo.fbo, false); pass_read_fbo(p, &p->blend_subs_fbo); + pass_describe(p, "blend subs"); } pass_opt_hook_point(p, "SCALED", NULL); - - gl_timer_stop(p->render_timer); } static void pass_draw_to_screen(struct gl_video *p, int fbo) { - gl_timer_start(p->present_timer); - if (p->dumb_mode) pass_render_frame_dumb(p, fbo); @@ -2486,9 +2553,8 @@ static void pass_draw_to_screen(struct gl_video *p, int fbo) pass_opt_hook_point(p, "OUTPUT", NULL); pass_dither(p); + pass_describe(p, "output to screen"); finish_pass_direct(p, fbo, p->vp_w, p->vp_h, &p->dst_rect); - - gl_timer_stop(p->present_timer); } // Draws an interpolate frame to fbo, based on the frame timing in t @@ -2498,6 +2564,8 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, int vp_w = p->dst_rect.x1 - p->dst_rect.x0, vp_h = p->dst_rect.y1 - p->dst_rect.y0; + bool is_new = false; + // Reset the queue completely if this is a still image, to avoid any // interpolation artifacts from surrounding frames when unpausing or // framestepping @@ -2507,6 +2575,8 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, // First of all, figure out if we have a frame available at all, and draw // it manually + reset the queue if not if (p->surfaces[p->surface_now].id == 0) { + is_new = true; + pass_info_reset(p, false); if (!gl_video_upload_image(p, t->current, t->frame_id)) return; pass_render_frame(p); @@ -2569,6 +2639,8 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, continue; if (f_id > p->surfaces[p->surface_idx].id) { + is_new = true; + pass_info_reset(p, false); if (!gl_video_upload_image(p, f, f_id)) return; pass_render_frame(p); @@ -2601,6 +2673,9 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, p->osd_pts = p->surfaces[surface_now].pts; // Finally, draw the right mix of frames to the screen. + if (!is_new) + pass_info_reset(p, true); + pass_describe(p, "interpolation"); if (!valid || t->still) { // surface_now is guaranteed to be valid, so we can safely use it. pass_read_fbo(p, &p->surfaces[surface_now].fbotex); @@ -2667,16 +2742,6 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, p->frames_drawn += 1; } -static void timer_dbg(struct gl_video *p, const char *name, struct gl_timer *t) -{ - if (gl_timer_sample_count(t) > 0) { - MP_DBG(p, "%s time: last %dus avg %dus peak %dus\n", name, - (int)gl_timer_last_us(t), - (int)gl_timer_avg_us(t), - (int)gl_timer_peak_us(t)); - } -} - // (fbo==0 makes BindFramebuffer select the screen backbuffer) void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, int fbo) { @@ -2757,6 +2822,7 @@ void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, int fbo) if (is_new || !p->output_fbo_valid) { p->output_fbo_valid = false; + pass_info_reset(p, false); if (!gl_video_upload_image(p, frame->current, frame->frame_id)) goto done; pass_render_frame(p); @@ -2778,6 +2844,8 @@ void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, int fbo) // "output fbo valid" and "output fbo needed" are equivalent if (p->output_fbo_valid) { + pass_info_reset(p, true); + pass_describe(p, "redraw cached frame"); gl->BindFramebuffer(GL_READ_FRAMEBUFFER, p->output_fbo.fbo); gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo); struct mp_rect rc = p->dst_rect; @@ -2785,11 +2853,14 @@ void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, int fbo) rc.y1 = -p->vp_h - p->dst_rect.y0; rc.y0 = -p->vp_h - p->dst_rect.y1; } + gl_timer_start(p->blit_timer); gl->BlitFramebuffer(rc.x0, rc.y0, rc.x1, rc.y1, rc.x0, rc.y0, rc.x1, rc.y1, GL_COLOR_BUFFER_BIT, GL_NEAREST); + gl_timer_stop(gl); gl->BindFramebuffer(GL_READ_FRAMEBUFFER, 0); gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + pass_record(p, gl_timer_measure(p->blit_timer)); } } } @@ -2830,9 +2901,16 @@ done: p->frames_rendered++; // Report performance metrics - timer_dbg(p, "upload", p->upload_timer); - timer_dbg(p, "render", p->render_timer); - timer_dbg(p, "present", p->present_timer); + for (int i = 0; i < PASS_INFO_MAX; i++) { + struct pass_info *pass = &p->pass[i]; + if (pass->desc.len) { + MP_DBG(p, "pass '%.*s': last %dus avg %dus peak %dus\n", + BSTR_P(pass->desc), + (int)pass->perf.last/1000, + (int)pass->perf.avg/1000, + (int)pass->perf.peak/1000); + } + } } // vp_w/vp_h is the implicit size of the target framebuffer. @@ -2857,22 +2935,22 @@ void gl_video_resize(struct gl_video *p, int vp_w, int vp_h, p->hwdec->driver->overlay_adjust(p->hwdec, vp_w, abs(vp_h), src, dst); } -static struct voctrl_performance_entry gl_video_perfentry(struct gl_timer *t) +static void frame_perf_data(struct pass_info pass[], struct mp_frame_perf *out) { - return (struct voctrl_performance_entry) { - .last = gl_timer_last_us(t), - .avg = gl_timer_avg_us(t), - .peak = gl_timer_peak_us(t), - }; + for (int i = 0; i < PASS_INFO_MAX; i++) { + if (!pass[i].desc.len) + break; + out->perf[out->count] = pass[i].perf; + out->desc[out->count] = pass[i].desc.start; + out->count++; + } } -struct voctrl_performance_data gl_video_perfdata(struct gl_video *p) +void gl_video_perfdata(struct gl_video *p, struct voctrl_performance_data *out) { - return (struct voctrl_performance_data) { - .upload = gl_video_perfentry(p->upload_timer), - .render = gl_video_perfentry(p->render_timer), - .present = gl_video_perfentry(p->present_timer), - }; + *out = (struct voctrl_performance_data){0}; + frame_perf_data(p->pass_fresh, &out->fresh); + frame_perf_data(p->pass_redraw, &out->redraw); } // This assumes nv12, with textures set to GL_NEAREST filtering. @@ -2942,9 +3020,13 @@ static bool gl_video_upload_image(struct gl_video *p, struct mp_image *mpi, if (p->hwdec_active) { // Hardware decoding struct gl_hwdec_frame gl_frame = {0}; + + pass_describe(p, "map frame (hwdec)"); gl_timer_start(p->upload_timer); bool ok = p->hwdec->driver->map_frame(p->hwdec, vimg->mpi, &gl_frame) >= 0; - gl_timer_stop(p->upload_timer); + gl_timer_stop(gl); + pass_record(p, gl_timer_measure(p->upload_timer)); + vimg->hwdec_mapped = true; if (ok) { struct mp_image layout = {0}; @@ -2973,9 +3055,8 @@ static bool gl_video_upload_image(struct gl_video *p, struct mp_image *mpi, // Software decoding assert(mpi->num_planes == p->plane_count); + pass_describe(p, "upload frame (swdec)"); gl_timer_start(p->upload_timer); - - for (int n = 0; n < p->plane_count; n++) { struct texplane *plane = &vimg->planes[n]; @@ -2988,8 +3069,8 @@ static bool gl_video_upload_image(struct gl_video *p, struct mp_image *mpi, 0, 0, plane->w, plane->h); gl->BindTexture(plane->gl_target, 0); } - - gl_timer_stop(p->upload_timer); + gl_timer_stop(gl); + pass_record(p, gl_timer_measure(p->upload_timer)); return true; @@ -3182,9 +3263,8 @@ static void init_gl(struct gl_video *p) if (p->texture_16bit_depth > 0) MP_VERBOSE(p, "16 bit texture depth: %d.\n", p->texture_16bit_depth); - p->upload_timer = gl_timer_create(p->gl); - p->render_timer = gl_timer_create(p->gl); - p->present_timer = gl_timer_create(p->gl); + p->upload_timer = gl_timer_create(gl); + p->blit_timer = gl_timer_create(gl); debug_check_gl(p, "after init_gl"); } @@ -3205,8 +3285,11 @@ void gl_video_uninit(struct gl_video *p) gl->DeleteTextures(1, &p->lut_3d_texture); gl_timer_free(p->upload_timer); - gl_timer_free(p->render_timer); - gl_timer_free(p->present_timer); + gl_timer_free(p->blit_timer); + for (int i = 0; i < PASS_INFO_MAX; i++) { + talloc_free(p->pass_fresh[i].desc.start); + talloc_free(p->pass_redraw[i].desc.start); + } mpgl_osd_destroy(p->osd); diff --git a/video/out/opengl/video.h b/video/out/opengl/video.h index 55fd18461a..8526cecb84 100644 --- a/video/out/opengl/video.h +++ b/video/out/opengl/video.h @@ -155,7 +155,7 @@ void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, int fbo); void gl_video_resize(struct gl_video *p, int vp_w, int vp_h, struct mp_rect *src, struct mp_rect *dst, struct mp_osd_res *osd); -struct voctrl_performance_data gl_video_perfdata(struct gl_video *p); +void gl_video_perfdata(struct gl_video *p, struct voctrl_performance_data *out); struct mp_csp_equalizer; struct mp_csp_equalizer *gl_video_eq_ptr(struct gl_video *p); void gl_video_eq_update(struct gl_video *p); diff --git a/video/out/opengl/video_shaders.c b/video/out/opengl/video_shaders.c index 982a26cf4b..9e54d33d41 100644 --- a/video/out/opengl/video_shaders.c +++ b/video/out/opengl/video_shaders.c @@ -701,7 +701,6 @@ void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts, // Assumes the texture was hooked void pass_sample_unsharp(struct gl_shader_cache *sc, float param) { - GLSLF("// unsharp\n"); GLSLF("{\n"); GLSL(float st1 = 1.2;) GLSL(vec4 p = HOOKED_tex(HOOKED_pos);) diff --git a/video/out/vo.h b/video/out/vo.h index c59e1b04c8..6dce8f6c2f 100644 --- a/video/out/vo.h +++ b/video/out/vo.h @@ -143,13 +143,31 @@ struct voctrl_playback_state { }; // VOCTRL_PERFORMANCE_DATA -struct voctrl_performance_entry { - // Times are in microseconds +#define PERF_SAMPLE_COUNT 256u + +struct mp_pass_perf { + // times are all in nanoseconds uint64_t last, avg, peak; + // this is a ring buffer, indices are relative to index and modulo + // PERF_SAMPLE_COUNT + uint64_t *samples; + int count; + int index; +}; + +#define VO_PASS_PERF_MAX 128 + +struct mp_frame_perf { + int count; + struct mp_pass_perf perf[VO_PASS_PERF_MAX]; + // The owner of this struct does not have ownership over the names, and + // they may change at any time - so this struct should not be stored + // anywhere or the results reused + char *desc[VO_PASS_PERF_MAX]; }; struct voctrl_performance_data { - struct voctrl_performance_entry upload, render, present; + struct mp_frame_perf fresh, redraw; }; enum { diff --git a/video/out/vo_opengl.c b/video/out/vo_opengl.c index 9b3f944e21..f5b0bd37c4 100644 --- a/video/out/vo_opengl.c +++ b/video/out/vo_opengl.c @@ -301,7 +301,7 @@ static int control(struct vo *vo, uint32_t request, void *data) vo->want_redraw = true; return true; case VOCTRL_PERFORMANCE_DATA: - *(struct voctrl_performance_data *)data = gl_video_perfdata(p->renderer); + gl_video_perfdata(p->renderer, (struct voctrl_performance_data *)data); return true; } |