summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNiklas Haas <git@nand.wakku.to>2016-06-05 21:55:30 +0200
committerwm4 <wm4@nowhere>2016-06-07 12:16:15 +0200
commit8ceb935bd8e1062ff83287c00cca0b7428a7dfba (patch)
tree0183f856b6262ae8b65d65bdf0caa466c03985f3
parent88b584656d9752573cc4320c845a6d31b5877140 (diff)
downloadmpv-8ceb935bd8e1062ff83287c00cca0b7428a7dfba.tar.bz2
mpv-8ceb935bd8e1062ff83287c00cca0b7428a7dfba.tar.xz
vo_opengl: add time queries
To avoid blocking the CPU, we use 8 time objects and rotate through them, only blocking until the last possible moment (before we need access to them on the next iteration through the ring buffer). I tested it out on my machine and 4 query objects were enough to guarantee block-free querying, but the extra margin shouldn't hurt. Frame render times are just output at the end of each frame, via MP_DBG. This might be improved in the future. (In particular, I want to expose these numbers as properties so that users get some more visible feedback about render times) Currently, we measure pass_render_frame and pass_draw_to_screen separately because the former might be called multiple times due to interpolation. Doing it this way gives more faithful numbers. Same goes for frame upload times.
-rw-r--r--video/out/opengl/common.c17
-rw-r--r--video/out/opengl/common.h11
-rw-r--r--video/out/opengl/utils.c126
-rw-r--r--video/out/opengl/utils.h12
-rw-r--r--video/out/opengl/video.c42
5 files changed, 208 insertions, 0 deletions
diff --git a/video/out/opengl/common.c b/video/out/opengl/common.c
index 99d8bbeb51..dd9ecc46fa 100644
--- a/video/out/opengl/common.c
+++ b/video/out/opengl/common.c
@@ -274,6 +274,23 @@ static const struct gl_functions gl_functions[] = {
},
},
{
+ .ver_core = 330,
+ .extension = "GL_ARB_timer_query",
+ .functions = (const struct gl_function[]) {
+ DEF_FN(GenQueries),
+ DEF_FN(DeleteQueries),
+ DEF_FN(BeginQuery),
+ DEF_FN(EndQuery),
+ DEF_FN(QueryCounter),
+ DEF_FN(IsQuery),
+ DEF_FN(GetQueryObjectiv),
+ DEF_FN(GetQueryObjecti64v),
+ DEF_FN(GetQueryObjectuiv),
+ DEF_FN(GetQueryObjectui64v),
+ {0}
+ },
+ },
+ {
.ver_core = 430,
.ver_es_core = 300,
.functions = (const struct gl_function[]) {
diff --git a/video/out/opengl/common.h b/video/out/opengl/common.h
index ed12732bfc..ea5442b4ff 100644
--- a/video/out/opengl/common.h
+++ b/video/out/opengl/common.h
@@ -186,6 +186,17 @@ struct GL {
GLenum (GLAPIENTRY *ClientWaitSync)(GLsync, GLbitfield, GLuint64);
void (GLAPIENTRY *DeleteSync)(GLsync sync);
+ void (GLAPIENTRY *GenQueries)(GLsizei, GLuint *);
+ void (GLAPIENTRY *DeleteQueries)(GLsizei, const GLuint *);
+ void (GLAPIENTRY *BeginQuery)(GLenum, GLuint);
+ void (GLAPIENTRY *EndQuery)(GLenum);
+ void (GLAPIENTRY *QueryCounter)(GLuint, GLenum);
+ GLboolean (GLAPIENTRY *IsQuery)(GLuint);
+ void (GLAPIENTRY *GetQueryObjectiv)(GLuint, GLenum, GLint *);
+ void (GLAPIENTRY *GetQueryObjecti64v)(GLuint, GLenum, GLint64 *);
+ void (GLAPIENTRY *GetQueryObjectuiv)(GLuint, GLenum, GLuint *);
+ void (GLAPIENTRY *GetQueryObjectui64v)(GLuint, GLenum, GLuint64 *);
+
void (GLAPIENTRY *VDPAUInitNV)(const GLvoid *, const GLvoid *);
void (GLAPIENTRY *VDPAUFiniNV)(void);
GLvdpauSurfaceNV (GLAPIENTRY *VDPAURegisterOutputSurfaceNV)
diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c
index cfb6eec679..4702254df5 100644
--- a/video/out/opengl/utils.c
+++ b/video/out/opengl/utils.c
@@ -1019,3 +1019,129 @@ void gl_sc_gen_shader_and_reset(struct gl_shader_cache *sc)
gl_sc_reset(sc);
}
+
+// Maximum number of simultaneous query objects to keep around. Reducing this
+// number might cause rendering to block until the result of a previous query is
+// available
+#define QUERY_OBJECT_NUM 8
+
+// How many samples to keep around, for the sake of average and peak
+// calculations. This corresponds to a few seconds (exact time variable)
+#define QUERY_SAMPLE_SIZE 256
+
+struct gl_timer {
+ GL *gl;
+ GLuint query[QUERY_OBJECT_NUM];
+ int query_idx;
+
+ GLuint64 samples[QUERY_SAMPLE_SIZE];
+ int sample_idx;
+ int sample_count;
+
+ uint64_t avg_sum;
+ uint64_t peak;
+};
+
+int gl_timer_sample_count(struct gl_timer *timer)
+{
+ return timer->sample_count;
+}
+
+uint64_t gl_timer_last_us(struct gl_timer *timer)
+{
+ return timer->samples[(timer->sample_idx - 1) % QUERY_SAMPLE_SIZE] / 1000;
+}
+
+uint64_t gl_timer_avg_us(struct gl_timer *timer)
+{
+ if (timer->sample_count <= 0)
+ return 0;
+
+ return timer->avg_sum / timer->sample_count / 1000;
+}
+
+uint64_t gl_timer_peak_us(struct gl_timer *timer)
+{
+ return timer->peak / 1000;
+}
+
+struct gl_timer *gl_timer_create(GL *gl)
+{
+ struct gl_timer *timer = talloc_ptrtype(NULL, timer);
+ *timer = (struct gl_timer){ .gl = gl };
+
+ if (gl->GenQueries)
+ gl->GenQueries(QUERY_OBJECT_NUM, timer->query);
+
+ return timer;
+}
+
+void gl_timer_free(struct gl_timer *timer)
+{
+ if (!timer)
+ return;
+
+ GL *gl = timer->gl;
+ if (gl && gl->DeleteQueries) {
+ // this is a no-op on already uninitialized queries
+ gl->DeleteQueries(QUERY_OBJECT_NUM, timer->query);
+ }
+
+ talloc_free(timer);
+}
+
+static void gl_timer_record(struct gl_timer *timer, GLuint64 new)
+{
+ // Input res into the buffer and grab the previous value
+ GLuint64 old = timer->samples[timer->sample_idx];
+ timer->samples[timer->sample_idx++] = new;
+ timer->sample_idx %= QUERY_SAMPLE_SIZE;
+
+ // Update average and sum
+ timer->avg_sum = timer->avg_sum + new - old;
+ timer->sample_count = MPMIN(timer->sample_count + 1, QUERY_SAMPLE_SIZE);
+
+ // Update peak if necessary
+ if (new >= timer->peak) {
+ timer->peak = new;
+ } else if (timer->peak == old) {
+ // It's possible that the last peak was the value we just removed,
+ // if so we need to scan for the new peak
+ uint64_t peak = new;
+ for (int i = 0; i < QUERY_SAMPLE_SIZE; i++)
+ peak = MPMAX(peak, timer->samples[i]);
+ timer->peak = peak;
+ }
+}
+
+// If no free query is available, this can block. Shouldn't ever happen in
+// practice, though. (If it does, consider increasing QUERY_OBJECT_NUM)
+// IMPORTANT: only one gl_timer object may ever be active at a single time.
+// The caling code *MUST* ensure this
+void gl_timer_start(struct gl_timer *timer)
+{
+ GL *gl = timer->gl;
+ if (!gl->BeginQuery)
+ return;
+
+ // Get the next query object
+ GLuint id = timer->query[timer->query_idx++];
+ timer->query_idx %= QUERY_OBJECT_NUM;
+
+ // If this query object already holds a result, we need to get and
+ // record it first
+ if (gl->IsQuery(id)) {
+ GLuint64 elapsed;
+ gl->GetQueryObjectui64v(id, GL_QUERY_RESULT, &elapsed);
+ gl_timer_record(timer, elapsed);
+ }
+
+ gl->BeginQuery(GL_TIME_ELAPSED, id);
+}
+
+void gl_timer_stop(struct gl_timer *timer)
+{
+ GL *gl = timer->gl;
+ if (gl->EndQuery)
+ gl->EndQuery(GL_TIME_ELAPSED);
+}
diff --git a/video/out/opengl/utils.h b/video/out/opengl/utils.h
index 85d3413831..33e66cd3de 100644
--- a/video/out/opengl/utils.h
+++ b/video/out/opengl/utils.h
@@ -172,4 +172,16 @@ void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name);
void gl_sc_gen_shader_and_reset(struct gl_shader_cache *sc);
void gl_sc_reset(struct gl_shader_cache *sc);
+struct gl_timer;
+
+struct gl_timer *gl_timer_create(GL *gl);
+void gl_timer_free(struct gl_timer *timer);
+void gl_timer_start(struct gl_timer *timer);
+void gl_timer_stop(struct gl_timer *timer);
+
+int gl_timer_sample_count(struct gl_timer *timer);
+uint64_t gl_timer_last_us(struct gl_timer *timer);
+uint64_t gl_timer_avg_us(struct gl_timer *timer);
+uint64_t gl_timer_peak_us(struct gl_timer *timer);
+
#endif
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index 8a36f489b5..fe7c0abaa9 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -196,6 +196,10 @@ struct gl_video {
GLuint nnedi3_weights_buffer;
+ struct gl_timer *upload_timer;
+ struct gl_timer *render_timer;
+ struct gl_timer *present_timer;
+
struct mp_image_params real_image_params; // configured format
struct mp_image_params image_params; // texture format (mind hwdec case)
struct mp_imgfmt_desc image_desc;
@@ -2497,6 +2501,11 @@ static void pass_render_frame(struct gl_video *p)
if (p->dumb_mode)
return;
+ // start the render timer here. it will continue to the end of this
+ // function, to render the time needed to draw (excluding screen
+ // presentation)
+ gl_timer_start(p->render_timer);
+
p->use_linear = p->opts.linear_scaling || p->opts.sigmoid_upscaling;
pass_read_video(p);
pass_opt_hook_point(p, "NATIVE", &p->texture_offset);
@@ -2553,10 +2562,14 @@ static void pass_render_frame(struct gl_video *p)
}
pass_opt_hook_point(p, "SCALED", NULL);
+
+ gl_timer_stop(p->render_timer);
}
static void pass_draw_to_screen(struct gl_video *p, int fbo)
{
+ gl_timer_start(p->present_timer);
+
if (p->dumb_mode)
pass_render_frame_dumb(p, fbo);
@@ -2582,6 +2595,8 @@ static void pass_draw_to_screen(struct gl_video *p, int fbo)
pass_dither(p);
finish_pass_direct(p, fbo, p->vp_w, p->vp_h, &p->dst_rect);
+
+ gl_timer_stop(p->present_timer);
}
// Draws an interpolate frame to fbo, based on the frame timing in t
@@ -2754,6 +2769,16 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t,
p->frames_drawn += 1;
}
+static void timer_dbg(struct gl_video *p, const char *name, struct gl_timer *t)
+{
+ if (gl_timer_sample_count(t) > 0) {
+ MP_DBG(p, "%s time: last %dus avg %dus peak %dus\n", name,
+ (int)gl_timer_last_us(t),
+ (int)gl_timer_avg_us(t),
+ (int)gl_timer_peak_us(t));
+ }
+}
+
// (fbo==0 makes BindFramebuffer select the screen backbuffer)
void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, int fbo)
{
@@ -2857,6 +2882,11 @@ done:
gl->Flush();
p->frames_rendered++;
+
+ // Report performance metrics
+ timer_dbg(p, "upload", p->upload_timer);
+ timer_dbg(p, "render", p->render_timer);
+ timer_dbg(p, "present", p->present_timer);
}
// vp_w/vp_h is the implicit size of the target framebuffer.
@@ -2971,6 +3001,8 @@ static bool gl_video_upload_image(struct gl_video *p, struct mp_image *mpi)
assert(mpi->num_planes == p->plane_count);
+ gl_timer_start(p->upload_timer);
+
mp_image_t pbo_mpi = *mpi;
bool pbo = map_image(p, &pbo_mpi);
if (pbo) {
@@ -2998,6 +3030,8 @@ static bool gl_video_upload_image(struct gl_video *p, struct mp_image *mpi)
if (pbo)
gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+ gl_timer_stop(p->upload_timer);
+
return true;
error:
@@ -3227,6 +3261,10 @@ static void init_gl(struct gl_video *p)
gl->DeleteTextures(1, &tex);
}
+ p->upload_timer = gl_timer_create(p->gl);
+ p->render_timer = gl_timer_create(p->gl);
+ p->present_timer = gl_timer_create(p->gl);
+
debug_check_gl(p, "after init_gl");
}
@@ -3245,6 +3283,10 @@ void gl_video_uninit(struct gl_video *p)
gl->DeleteTextures(1, &p->lut_3d_texture);
+ gl_timer_free(p->upload_timer);
+ gl_timer_free(p->render_timer);
+ gl_timer_free(p->present_timer);
+
mpgl_osd_destroy(p->osd);
gl_set_debug_logger(gl, NULL);