summaryrefslogtreecommitdiffstats
path: root/video
diff options
context:
space:
mode:
authorNiklas Haas <git@nand.wakku.to>2016-06-05 21:55:30 +0200
committerwm4 <wm4@nowhere>2016-06-07 12:16:15 +0200
commit8ceb935bd8e1062ff83287c00cca0b7428a7dfba (patch)
tree0183f856b6262ae8b65d65bdf0caa466c03985f3 /video
parent88b584656d9752573cc4320c845a6d31b5877140 (diff)
downloadmpv-8ceb935bd8e1062ff83287c00cca0b7428a7dfba.tar.bz2
mpv-8ceb935bd8e1062ff83287c00cca0b7428a7dfba.tar.xz
vo_opengl: add time queries
To avoid blocking the CPU, we use 8 time objects and rotate through them, only blocking until the last possible moment (before we need access to them on the next iteration through the ring buffer). I tested it out on my machine and 4 query objects were enough to guarantee block-free querying, but the extra margin shouldn't hurt. Frame render times are just output at the end of each frame, via MP_DBG. This might be improved in the future. (In particular, I want to expose these numbers as properties so that users get some more visible feedback about render times) Currently, we measure pass_render_frame and pass_draw_to_screen separately because the former might be called multiple times due to interpolation. Doing it this way gives more faithful numbers. Same goes for frame upload times.
Diffstat (limited to 'video')
-rw-r--r--video/out/opengl/common.c17
-rw-r--r--video/out/opengl/common.h11
-rw-r--r--video/out/opengl/utils.c126
-rw-r--r--video/out/opengl/utils.h12
-rw-r--r--video/out/opengl/video.c42
5 files changed, 208 insertions, 0 deletions
diff --git a/video/out/opengl/common.c b/video/out/opengl/common.c
index 99d8bbeb51..dd9ecc46fa 100644
--- a/video/out/opengl/common.c
+++ b/video/out/opengl/common.c
@@ -274,6 +274,23 @@ static const struct gl_functions gl_functions[] = {
},
},
{
+ .ver_core = 330,
+ .extension = "GL_ARB_timer_query",
+ .functions = (const struct gl_function[]) {
+ DEF_FN(GenQueries),
+ DEF_FN(DeleteQueries),
+ DEF_FN(BeginQuery),
+ DEF_FN(EndQuery),
+ DEF_FN(QueryCounter),
+ DEF_FN(IsQuery),
+ DEF_FN(GetQueryObjectiv),
+ DEF_FN(GetQueryObjecti64v),
+ DEF_FN(GetQueryObjectuiv),
+ DEF_FN(GetQueryObjectui64v),
+ {0}
+ },
+ },
+ {
.ver_core = 430,
.ver_es_core = 300,
.functions = (const struct gl_function[]) {
diff --git a/video/out/opengl/common.h b/video/out/opengl/common.h
index ed12732bfc..ea5442b4ff 100644
--- a/video/out/opengl/common.h
+++ b/video/out/opengl/common.h
@@ -186,6 +186,17 @@ struct GL {
GLenum (GLAPIENTRY *ClientWaitSync)(GLsync, GLbitfield, GLuint64);
void (GLAPIENTRY *DeleteSync)(GLsync sync);
+ void (GLAPIENTRY *GenQueries)(GLsizei, GLuint *);
+ void (GLAPIENTRY *DeleteQueries)(GLsizei, const GLuint *);
+ void (GLAPIENTRY *BeginQuery)(GLenum, GLuint);
+ void (GLAPIENTRY *EndQuery)(GLenum);
+ void (GLAPIENTRY *QueryCounter)(GLuint, GLenum);
+ GLboolean (GLAPIENTRY *IsQuery)(GLuint);
+ void (GLAPIENTRY *GetQueryObjectiv)(GLuint, GLenum, GLint *);
+ void (GLAPIENTRY *GetQueryObjecti64v)(GLuint, GLenum, GLint64 *);
+ void (GLAPIENTRY *GetQueryObjectuiv)(GLuint, GLenum, GLuint *);
+ void (GLAPIENTRY *GetQueryObjectui64v)(GLuint, GLenum, GLuint64 *);
+
void (GLAPIENTRY *VDPAUInitNV)(const GLvoid *, const GLvoid *);
void (GLAPIENTRY *VDPAUFiniNV)(void);
GLvdpauSurfaceNV (GLAPIENTRY *VDPAURegisterOutputSurfaceNV)
diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c
index cfb6eec679..4702254df5 100644
--- a/video/out/opengl/utils.c
+++ b/video/out/opengl/utils.c
@@ -1019,3 +1019,129 @@ void gl_sc_gen_shader_and_reset(struct gl_shader_cache *sc)
gl_sc_reset(sc);
}
+
+// Maximum number of simultaneous query objects to keep around. Reducing this
+// number might cause rendering to block until the result of a previous query is
+// available
+#define QUERY_OBJECT_NUM 8
+
+// How many samples to keep around, for the sake of average and peak
+// calculations. This corresponds to a few seconds (exact time variable)
+#define QUERY_SAMPLE_SIZE 256
+
+struct gl_timer {
+ GL *gl;
+ GLuint query[QUERY_OBJECT_NUM];
+ int query_idx;
+
+ GLuint64 samples[QUERY_SAMPLE_SIZE];
+ int sample_idx;
+ int sample_count;
+
+ uint64_t avg_sum;
+ uint64_t peak;
+};
+
+int gl_timer_sample_count(struct gl_timer *timer)
+{
+ return timer->sample_count;
+}
+
+uint64_t gl_timer_last_us(struct gl_timer *timer)
+{
+ return timer->samples[(timer->sample_idx - 1) % QUERY_SAMPLE_SIZE] / 1000;
+}
+
+uint64_t gl_timer_avg_us(struct gl_timer *timer)
+{
+ if (timer->sample_count <= 0)
+ return 0;
+
+ return timer->avg_sum / timer->sample_count / 1000;
+}
+
+uint64_t gl_timer_peak_us(struct gl_timer *timer)
+{
+ return timer->peak / 1000;
+}
+
+struct gl_timer *gl_timer_create(GL *gl)
+{
+ struct gl_timer *timer = talloc_ptrtype(NULL, timer);
+ *timer = (struct gl_timer){ .gl = gl };
+
+ if (gl->GenQueries)
+ gl->GenQueries(QUERY_OBJECT_NUM, timer->query);
+
+ return timer;
+}
+
+void gl_timer_free(struct gl_timer *timer)
+{
+ if (!timer)
+ return;
+
+ GL *gl = timer->gl;
+ if (gl && gl->DeleteQueries) {
+ // this is a no-op on already uninitialized queries
+ gl->DeleteQueries(QUERY_OBJECT_NUM, timer->query);
+ }
+
+ talloc_free(timer);
+}
+
+static void gl_timer_record(struct gl_timer *timer, GLuint64 new)
+{
+ // Input res into the buffer and grab the previous value
+ GLuint64 old = timer->samples[timer->sample_idx];
+ timer->samples[timer->sample_idx++] = new;
+ timer->sample_idx %= QUERY_SAMPLE_SIZE;
+
+ // Update average and sum
+ timer->avg_sum = timer->avg_sum + new - old;
+ timer->sample_count = MPMIN(timer->sample_count + 1, QUERY_SAMPLE_SIZE);
+
+ // Update peak if necessary
+ if (new >= timer->peak) {
+ timer->peak = new;
+ } else if (timer->peak == old) {
+ // It's possible that the last peak was the value we just removed,
+ // if so we need to scan for the new peak
+ uint64_t peak = new;
+ for (int i = 0; i < QUERY_SAMPLE_SIZE; i++)
+ peak = MPMAX(peak, timer->samples[i]);
+ timer->peak = peak;
+ }
+}
+
+// If no free query is available, this can block. Shouldn't ever happen in
+// practice, though. (If it does, consider increasing QUERY_OBJECT_NUM)
+// IMPORTANT: only one gl_timer object may ever be active at a single time.
+// The caling code *MUST* ensure this
+void gl_timer_start(struct gl_timer *timer)
+{
+ GL *gl = timer->gl;
+ if (!gl->BeginQuery)
+ return;
+
+ // Get the next query object
+ GLuint id = timer->query[timer->query_idx++];
+ timer->query_idx %= QUERY_OBJECT_NUM;
+
+ // If this query object already holds a result, we need to get and
+ // record it first
+ if (gl->IsQuery(id)) {
+ GLuint64 elapsed;
+ gl->GetQueryObjectui64v(id, GL_QUERY_RESULT, &elapsed);
+ gl_timer_record(timer, elapsed);
+ }
+
+ gl->BeginQuery(GL_TIME_ELAPSED, id);
+}
+
+void gl_timer_stop(struct gl_timer *timer)
+{
+ GL *gl = timer->gl;
+ if (gl->EndQuery)
+ gl->EndQuery(GL_TIME_ELAPSED);
+}
diff --git a/video/out/opengl/utils.h b/video/out/opengl/utils.h
index 85d3413831..33e66cd3de 100644
--- a/video/out/opengl/utils.h
+++ b/video/out/opengl/utils.h
@@ -172,4 +172,16 @@ void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name);
void gl_sc_gen_shader_and_reset(struct gl_shader_cache *sc);
void gl_sc_reset(struct gl_shader_cache *sc);
+struct gl_timer;
+
+struct gl_timer *gl_timer_create(GL *gl);
+void gl_timer_free(struct gl_timer *timer);
+void gl_timer_start(struct gl_timer *timer);
+void gl_timer_stop(struct gl_timer *timer);
+
+int gl_timer_sample_count(struct gl_timer *timer);
+uint64_t gl_timer_last_us(struct gl_timer *timer);
+uint64_t gl_timer_avg_us(struct gl_timer *timer);
+uint64_t gl_timer_peak_us(struct gl_timer *timer);
+
#endif
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index 8a36f489b5..fe7c0abaa9 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -196,6 +196,10 @@ struct gl_video {
GLuint nnedi3_weights_buffer;
+ struct gl_timer *upload_timer;
+ struct gl_timer *render_timer;
+ struct gl_timer *present_timer;
+
struct mp_image_params real_image_params; // configured format
struct mp_image_params image_params; // texture format (mind hwdec case)
struct mp_imgfmt_desc image_desc;
@@ -2497,6 +2501,11 @@ static void pass_render_frame(struct gl_video *p)
if (p->dumb_mode)
return;
+ // start the render timer here. it will continue to the end of this
+ // function, to render the time needed to draw (excluding screen
+ // presentation)
+ gl_timer_start(p->render_timer);
+
p->use_linear = p->opts.linear_scaling || p->opts.sigmoid_upscaling;
pass_read_video(p);
pass_opt_hook_point(p, "NATIVE", &p->texture_offset);
@@ -2553,10 +2562,14 @@ static void pass_render_frame(struct gl_video *p)
}
pass_opt_hook_point(p, "SCALED", NULL);
+
+ gl_timer_stop(p->render_timer);
}
static void pass_draw_to_screen(struct gl_video *p, int fbo)
{
+ gl_timer_start(p->present_timer);
+
if (p->dumb_mode)
pass_render_frame_dumb(p, fbo);
@@ -2582,6 +2595,8 @@ static void pass_draw_to_screen(struct gl_video *p, int fbo)
pass_dither(p);
finish_pass_direct(p, fbo, p->vp_w, p->vp_h, &p->dst_rect);
+
+ gl_timer_stop(p->present_timer);
}
// Draws an interpolate frame to fbo, based on the frame timing in t
@@ -2754,6 +2769,16 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t,
p->frames_drawn += 1;
}
+static void timer_dbg(struct gl_video *p, const char *name, struct gl_timer *t)
+{
+ if (gl_timer_sample_count(t) > 0) {
+ MP_DBG(p, "%s time: last %dus avg %dus peak %dus\n", name,
+ (int)gl_timer_last_us(t),
+ (int)gl_timer_avg_us(t),
+ (int)gl_timer_peak_us(t));
+ }
+}
+
// (fbo==0 makes BindFramebuffer select the screen backbuffer)
void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, int fbo)
{
@@ -2857,6 +2882,11 @@ done:
gl->Flush();
p->frames_rendered++;
+
+ // Report performance metrics
+ timer_dbg(p, "upload", p->upload_timer);
+ timer_dbg(p, "render", p->render_timer);
+ timer_dbg(p, "present", p->present_timer);
}
// vp_w/vp_h is the implicit size of the target framebuffer.
@@ -2971,6 +3001,8 @@ static bool gl_video_upload_image(struct gl_video *p, struct mp_image *mpi)
assert(mpi->num_planes == p->plane_count);
+ gl_timer_start(p->upload_timer);
+
mp_image_t pbo_mpi = *mpi;
bool pbo = map_image(p, &pbo_mpi);
if (pbo) {
@@ -2998,6 +3030,8 @@ static bool gl_video_upload_image(struct gl_video *p, struct mp_image *mpi)
if (pbo)
gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+ gl_timer_stop(p->upload_timer);
+
return true;
error:
@@ -3227,6 +3261,10 @@ static void init_gl(struct gl_video *p)
gl->DeleteTextures(1, &tex);
}
+ p->upload_timer = gl_timer_create(p->gl);
+ p->render_timer = gl_timer_create(p->gl);
+ p->present_timer = gl_timer_create(p->gl);
+
debug_check_gl(p, "after init_gl");
}
@@ -3245,6 +3283,10 @@ void gl_video_uninit(struct gl_video *p)
gl->DeleteTextures(1, &p->lut_3d_texture);
+ gl_timer_free(p->upload_timer);
+ gl_timer_free(p->render_timer);
+ gl_timer_free(p->present_timer);
+
mpgl_osd_destroy(p->osd);
gl_set_debug_logger(gl, NULL);