vo_gpu: glx: use GLX_OML_sync_control for better vsync reporting

Use the extension to compute the (hopefully correct) video delay and vsync phase. This is very fuzzy, because the latency will suddenly be applied after some frames have already been shown. This means there _will_ be "jumps" in the time accounting, which can lead to strange effects at start of playback (such as making initial "dropped" etc. frames worse). The only reasonable way to fix this would be running a few dummy frame swaps at start of playback until the latency is known. The same happens when unpausing. This only affects display-sync mode. Correct function was not confirmed. It only "looks right". I don't have the equipment to make scientifically correct measurements. A potentially bad thing is that we trust the timestamps we're receiving. Out of bounds timestamps could wreak havoc. On the other hand, this will probably cause the higher level code to panic and just disable DS. As a further caveat, this makes a bunch of assumptions about UST timestamps. If there are delayed frames (i.e. we skipped one or more vsyncs), the latency logic is mostly reset. There is no attempt to make the vo.c skipped vsync logic to use this. Also, the latency computation determines a vsync duration, and there's no effort to reconcile or share the vo.c logic for determining vsync duration.
author: wm4 <wm4@nowhere> 2018-08-31 16:33:15 +0200
committer: Anton Kindestam <antonki@kth.se> 2018-12-06 10:30:14 +0100
commit: 83884fdf03fc991679bea53d3d5bddf97ed16a9b (patch)
tree: 84248a21d9a50185dcc51bfb79077f84bdd0e96c
parent: 8b83c8996686072bc743b112ae5cb3bf93aa33ed (diff)
download: mpv-83884fdf03fc991679bea53d3d5bddf97ed16a9b.tar.bz2
mpv-83884fdf03fc991679bea53d3d5bddf97ed16a9b.tar.xz
7 files changed, 142 insertions, 0 deletions
diff --git a/video/out/gpu/context.h b/video/out/gpu/context.h
index a2fcb3711a..b6b6ffcf43 100644
--- a/video/out/gpu/context.h
+++ b/video/out/gpu/context.h
@@ -83,6 +83,15 @@ struct ra_swapchain_fns {
     // Performs a buffer swap. This blocks for as long as necessary to meet
     // params.swapchain_depth, or until the next vblank (for vsynced contexts)
     void (*swap_buffers)(struct ra_swapchain *sw);
+
+    // Return the latency at which swap_buffers() is performed. This is in
+    // seconds and always >= 0. Essentially, it's the predicted time the last
+    // shown frame will take until it is actually displayed on the physical
+    // screen. (A reasonable implementation is returning the duration the
+    // last actually displayed frame took after its swap_buffers() was called.)
+    // Should return -1 on error (e.g. discontinuities).
+    // Can be NULL 0 or always return -1 if unsupported.
+    double (*get_latency)(struct ra_swapchain *sw);
 };
 
 // Create and destroy a ra_ctx. This also takes care of creating and destroying
diff --git a/video/out/opengl/context.c b/video/out/opengl/context.c
index 43b57aa4ed..44e64b7d9a 100644
--- a/video/out/opengl/context.c
+++ b/video/out/opengl/context.c
@@ -313,9 +313,16 @@ void ra_gl_ctx_swap_buffers(struct ra_swapchain *sw)
     }
 }
 
+static double ra_gl_ctx_get_latency(struct ra_swapchain *sw)
+{
+    struct priv *p = sw->priv;
+    return p->params.get_latency ? p->params.get_latency(sw->ctx) : -1;
+}
+
 static const struct ra_swapchain_fns ra_gl_swapchain_fns = {
     .color_depth   = ra_gl_ctx_color_depth,
     .start_frame   = ra_gl_ctx_start_frame,
     .submit_frame  = ra_gl_ctx_submit_frame,
     .swap_buffers  = ra_gl_ctx_swap_buffers,
+    .get_latency   = ra_gl_ctx_get_latency,
 };
diff --git a/video/out/opengl/context.h b/video/out/opengl/context.h
index 5fccc70033..feaf8e1ab6 100644
--- a/video/out/opengl/context.h
+++ b/video/out/opengl/context.h
@@ -23,6 +23,9 @@ struct ra_gl_ctx_params {
     // function or if you override it yourself.
     void (*swap_buffers)(struct ra_ctx *ctx);
 
+    // See ra_swapchain_fns.get_latency.
+    double (*get_latency)(struct ra_ctx *ctx);
+
     // Set to false if the implementation follows normal GL semantics, which is
     // upside down. Set to true if it does *not*, i.e. if rendering is right
     // side up
diff --git a/video/out/opengl/context_glx.c b/video/out/opengl/context_glx.c
index 462f2cf592..fe210c5f7d 100644
--- a/video/out/opengl/context_glx.c
+++ b/video/out/opengl/context_glx.c
@@ -41,11 +41,20 @@
 #include "context.h"
 #include "utils.h"
 
+// Must be >= max. assumed and supported display latency in frames.
+#define SYNC_SAMPLES 16
+
 struct priv {
     GL gl;
     XVisualInfo *vinfo;
     GLXContext context;
     GLXFBConfig fbc;
+
+    Bool (*XGetSyncValues)(Display*, GLXDrawable, int64_t*, int64_t*, int64_t*);
+    uint64_t ust[SYNC_SAMPLES];
+    uint64_t last_sbc;
+    uint64_t last_msc;
+    double latency;
 };
 
 static void glx_uninit(struct ra_ctx *ctx)
@@ -161,6 +170,13 @@ static bool create_context_x11_gl3(struct ra_ctx *ctx, GL *gl, int gl_version,
 
     mpgl_load_functions(gl, (void *)glXGetProcAddressARB, glxstr, vo->log);
 
+    if (gl_check_extension(glxstr, "GLX_OML_sync_control")) {
+        p->XGetSyncValues =
+            (void *)glXGetProcAddressARB((const GLubyte *)"glXGetSyncValuesOML");
+    }
+    if (p->XGetSyncValues)
+        MP_VERBOSE(vo, "Using GLX_OML_sync_control.\n");
+
     return true;
 }
 
@@ -208,9 +224,91 @@ static void set_glx_attrib(int *attribs, int name, int value)
     }
 }
 
+static double update_latency_oml(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    assert(p->XGetSyncValues);
+
+    p->last_sbc += 1;
+
+    memmove(&p->ust[1], &p->ust[0], (SYNC_SAMPLES - 1) * sizeof(p->ust[0]));
+    p->ust[0] = 0;
+
+    int64_t ust, msc, sbc;
+    if (!p->XGetSyncValues(ctx->vo->x11->display, ctx->vo->x11->window,
+                           &ust, &msc, &sbc))
+        return -1;
+
+    p->ust[0] = ust;
+
+    uint64_t last_msc = p->last_msc;
+    p->last_msc = msc;
+
+    // There was a driver-level discontinuity.
+    if (msc != last_msc + 1)
+        return -1;
+
+    // No frame displayed yet.
+    if (!ust || !sbc || !msc)
+        return -1;
+
+    // We really need to know the time since the vsync happened. There is no way
+    // to get the UST at the time which the frame was queued. So we have to make
+    // assumptions about the UST. The extension spec doesn't define what the UST
+    // is (not even its unit).
+    // Simply assume UST is a simple CLOCK_MONOTONIC usec value. The swap buffer
+    // call happened "some" but very small time ago, so we can get away with
+    // querying the current time. There is also the implicit assumption that
+    // mpv's timer and the UST use the same clock (which it does on POSIX).
+    struct timespec ts;
+    if (clock_gettime(CLOCK_MONOTONIC, &ts))
+        return -1;
+    uint64_t now_monotonic = ts.tv_sec * 1000000LL + ts.tv_nsec / 1000;
+
+    // Actually we need two consecutive displays before we can accurately
+    // measure the latency (because we need to compute vsync_duration).
+    if (!p->ust[1])
+        return -1;
+
+    // Display frame duration.
+    int64_t vsync_duration = p->ust[0] - p->ust[1];
+
+    // Display latency in frames.
+    int64_t n_frames = p->last_sbc - sbc;
+
+    // Too high latency, or other nonsense.
+    if (n_frames < 0 || n_frames >= SYNC_SAMPLES)
+        return -1;
+
+    // Values were not recorded? (Temporary failures etc.)
+    if (!p->ust[n_frames])
+        return -1;
+
+    // Time since last frame display event.
+    int64_t latency_us = now_monotonic - p->ust[n_frames];
+
+    // The frame display event probably happened very recently (about within one
+    // vsync), but the corresponding video frame can be much older.
+    latency_us = (n_frames + 1) * vsync_duration - latency_us;
+
+    return latency_us / (1000.0 * 1000.0);
+}
+
 static void glx_swap_buffers(struct ra_ctx *ctx)
 {
+    struct priv *p = ctx->priv;
+
     glXSwapBuffers(ctx->vo->x11->display, ctx->vo->x11->window);
+
+    if (p->XGetSyncValues)
+        p->latency = update_latency_oml(ctx);
+}
+
+static double glx_get_latency(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    return p->latency;
 }
 
 static bool glx_init(struct ra_ctx *ctx)
@@ -296,11 +394,14 @@ static bool glx_init(struct ra_ctx *ctx)
 
     struct ra_gl_ctx_params params = {
         .swap_buffers = glx_swap_buffers,
+        .get_latency  = glx_get_latency,
     };
 
     if (!ra_gl_ctx_init(ctx, gl, params))
         goto uninit;
 
+    p->latency = -1;
+
     return true;
 
 uninit:
diff --git a/video/out/vo.c b/video/out/vo.c
index a33d9fd15f..466759f595 100644
--- a/video/out/vo.c
+++ b/video/out/vo.c
@@ -141,6 +141,7 @@ struct vo_internal {
     double estimated_vsync_jitter;
     bool expecting_vsync;
     int64_t num_successive_vsyncs;
+    double last_vo_latency;
 
     int64_t flip_queue_offset; // queue flip events at most this much in advance
     int64_t timing_offset;     // same (but from options; not VO configured)
@@ -481,6 +482,10 @@ static void update_vsync_timing_after_swap(struct vo *vo)
     int64_t now = mp_time_us();
     int64_t prev_vsync = in->prev_vsync;
 
+    // If we can, use a "made up" expected display time.
+    if (in->last_vo_latency >= 0)
+        now += in->last_vo_latency * (1000.0 * 1000.0);
+
     in->prev_vsync = now;
 
     if (!in->expecting_vsync) {
@@ -910,11 +915,15 @@ bool vo_render_frame_external(struct vo *vo)
 
         vo->driver->flip_page(vo);
 
+        double latency =
+            vo->driver->get_latency ? vo->driver->get_latency(vo) : -1;
+
         MP_STATS(vo, "end video-flip");
 
         pthread_mutex_lock(&in->lock);
         in->dropped_frame = prev_drop_count < vo->in->drop_count;
         in->rendering = false;
+        in->last_vo_latency = latency;
 
         update_vsync_timing_after_swap(vo);
     }
diff --git a/video/out/vo.h b/video/out/vo.h
index 3c00bb988e..848f71e472 100644
--- a/video/out/vo.h
+++ b/video/out/vo.h
@@ -374,6 +374,11 @@ struct vo_driver {
      */
     void (*flip_page)(struct vo *vo);
 
+    /*
+     * See struct ra_swapchain. Optional.
+     */
+    double (*get_latency)(struct vo *vo);
+
     /* These optional callbacks can be provided if the GUI framework used by
      * the VO requires entering a message loop for receiving events and does
      * not call vo_wakeup() from a separate thread when there are new events.
diff --git a/video/out/vo_gpu.c b/video/out/vo_gpu.c
index a80ba233c2..3535ae3e7e 100644
--- a/video/out/vo_gpu.c
+++ b/video/out/vo_gpu.c
@@ -98,6 +98,13 @@ static void flip_page(struct vo *vo)
     sw->fns->swap_buffers(sw);
 }
 
+static double get_latency(struct vo *vo)
+{
+    struct gpu_priv *p = vo->priv;
+    struct ra_swapchain *sw = p->ctx->swapchain;
+    return sw->fns->get_latency ? sw->fns->get_latency(sw) : -1;
+}
+
 static int query_format(struct vo *vo, int format)
 {
     struct gpu_priv *p = vo->priv;
@@ -326,6 +333,7 @@ const struct vo_driver video_out_gpu = {
     .get_image = get_image,
     .draw_frame = draw_frame,
     .flip_page = flip_page,
+    .get_latency = get_latency,
     .wait_events = wait_events,
     .wakeup = wakeup,
     .uninit = uninit,
author	wm4 <wm4@nowhere>	2018-08-31 16:33:15 +0200
committer	Anton Kindestam <antonki@kth.se>	2018-12-06 10:30:14 +0100
commit	83884fdf03fc991679bea53d3d5bddf97ed16a9b (patch)
tree	84248a21d9a50185dcc51bfb79077f84bdd0e96c
parent	8b83c8996686072bc743b112ae5cb3bf93aa33ed (diff)
download	mpv-83884fdf03fc991679bea53d3d5bddf97ed16a9b.tar.bz2 mpv-83884fdf03fc991679bea53d3d5bddf97ed16a9b.tar.xz