vo, vo_gpu, glx: correct GLX_OML_sync_control usage

I misunderstood how this extension works. If I understand it correctly now, it's worse than I thought. They key thing is that the (ust, msc, sbc) tripple is not for a single swap event. Instead, (ust, msc) run independently from sbc. Assuming a CFR display/compositor, this means you can at best know the vsync phase and frequency, but not the exact time a sbc changed value. There is GLX_INTEL_swap_event, which might work as expected, but it has no EGL equivalent (while GLX_OML_sync_control does, in theory). Redo the context_glx sync code. Now it's either more correct or less correct. I wanted to add proper skip detection (if a vsync gets skipped due to rendering taking too long and other problems), but it turned out to be too complex, so only some unused fields in vo.h are left of it. The "generic" skip detection has to do. The vsync_duration field is also unused by vo.c. Actually this seems to be an improvement. In cases where the flip call timing is off, but the real driver-level timing apparently still works, this will not report vsync skips or higher vsync jitter anymore. I could observe this with screenshots and fullscreen switching. On the other hand, maybe it just introduces an A/V offset or so. Why the fuck can't there be a proper API for retrieving these statistics? I'm not even asking for much.
author: wm4 <wm4@nowhere> 2018-09-21 15:42:37 +0200
committer: Anton Kindestam <antonki@kth.se> 2018-12-06 10:32:27 +0100
commit: f4ce3b8bb9ac715e3390f047697d6905eab55ef1 (patch)
tree: 34c0055f9b513f95691782e04e555c0dba8c6ad8 /video
parent: ace16fcfff5ab48f31ad1906532e412584b5e8ea (diff)
download: mpv-f4ce3b8bb9ac715e3390f047697d6905eab55ef1.tar.bz2
mpv-f4ce3b8bb9ac715e3390f047697d6905eab55ef1.tar.xz
3 files changed, 128 insertions, 84 deletions
diff --git a/video/out/opengl/context_glx.c b/video/out/opengl/context_glx.c
index 2a6a2a4cf1..1f3225a9ac 100644
--- a/video/out/opengl/context_glx.c
+++ b/video/out/opengl/context_glx.c
@@ -37,13 +37,11 @@
 #define GLX_CONTEXT_ES2_PROFILE_BIT_EXT         0x00000004
 #endif
 
+#include "osdep/timer.h"
 #include "video/out/x11_common.h"
 #include "context.h"
 #include "utils.h"
 
-// Must be >= max. assumed and supported display latency in frames.
-#define SYNC_SAMPLES 16
-
 struct priv {
     GL gl;
     XVisualInfo *vinfo;
@@ -51,10 +49,14 @@ struct priv {
     GLXFBConfig fbc;
 
     Bool (*XGetSyncValues)(Display*, GLXDrawable, int64_t*, int64_t*, int64_t*);
-    uint64_t ust[SYNC_SAMPLES];
-    uint64_t last_sbc;
-    uint64_t last_msc;
-    double latency;
+    int64_t last_ust;
+    int64_t last_msc;
+    int64_t last_sbc;
+    int64_t last_sbc_mp_time;
+    int64_t user_sbc;
+    int64_t vsync_duration;
+    int64_t last_skipped_vsyncs;
+    int64_t last_queue_display_time;
 };
 
 static void glx_uninit(struct ra_ctx *ctx)
@@ -224,75 +226,97 @@ static void set_glx_attrib(int *attribs, int name, int value)
     }
 }
 
-static double update_latency_oml(struct ra_ctx *ctx)
+static void update_vsync_oml(struct ra_ctx *ctx)
 {
     struct priv *p = ctx->priv;
 
     assert(p->XGetSyncValues);
 
-    p->last_sbc += 1;
-
-    memmove(&p->ust[1], &p->ust[0], (SYNC_SAMPLES - 1) * sizeof(p->ust[0]));
-    p->ust[0] = 0;
-
+    p->last_skipped_vsyncs = 0;
+    p->user_sbc += 1;
+
+    // This extension returns two unrelated values:
+    //  (ust, msc): clock time and incrementing counter of last vsync (this is
+    //              reported continuously, even if we don't swap)
+    //  sbc:        swap counter of frame that was last displayed (every swap
+    //              increments the user_sbc, and the reported sbc is the sbc
+    //              of the frame that was just displayed)
+    // Invariants:
+    //  - ust and msc change in lockstep (no value can change with the other)
+    //  - msc is incremented; if you query it in a loop, and your thread isn't
+    //    frozen or starved by the scheduler, it will usually not change or
+    //    be incremented by 1 (while the ust will be incremented by vsync
+    //    duration)
+    //  - sbc is never higher than the user_sbc
+    //  - (ust, msc) are equal to or higher by vsync increments than the display
+    //    time of the frame referenced by the sbc
+    // Note that (ust, msc) and sbc are not locked to each other. The following
+    // can easily happen if vsync skips occur:
+    //  - you draw a frame, in the meantime hardware swaps sbc_1
+    //  - another display vsync happens during drawing
+    //  - you call swap()
+    //  - query (ust, msc) and sbc
+    //  - sbc contains sbc_1, but (ust, msc) contains the vsync after it
+    // As a consequence, it's hard to detect the latency or vsync skips.
     int64_t ust, msc, sbc;
     if (!p->XGetSyncValues(ctx->vo->x11->display, ctx->vo->x11->window,
                            &ust, &msc, &sbc))
-        return -1;
+    {
+        // Assume the extension is effectively unsupported.
+        p->vsync_duration = -1;
+        p->last_skipped_vsyncs = -1;
+        p->last_queue_display_time = -1;
+        return;
+    }
 
-    p->ust[0] = ust;
+    int64_t ust_passed = p->last_ust ? ust - p->last_ust : 0;
+    p->last_ust = ust;
 
-    uint64_t last_msc = p->last_msc;
+    int64_t msc_passed = p->last_msc ? msc - p->last_msc : 0;
     p->last_msc = msc;
 
-    // There was a driver-level discontinuity.
-    if (msc != last_msc + 1)
-        return -1;
-
-    // No frame displayed yet.
-    if (!ust || !sbc || !msc)
-        return -1;
-
-    // We really need to know the time since the vsync happened. There is no way
-    // to get the UST at the time which the frame was queued. So we have to make
-    // assumptions about the UST. The extension spec doesn't define what the UST
-    // is (not even its unit).
-    // Simply assume UST is a simple CLOCK_MONOTONIC usec value. The swap buffer
-    // call happened "some" but very small time ago, so we can get away with
-    // querying the current time. There is also the implicit assumption that
-    // mpv's timer and the UST use the same clock (which it does on POSIX).
-    struct timespec ts;
-    if (clock_gettime(CLOCK_MONOTONIC, &ts))
-        return -1;
-    uint64_t now_monotonic = ts.tv_sec * 1000000LL + ts.tv_nsec / 1000;
-
-    // Actually we need two consecutive displays before we can accurately
-    // measure the latency (because we need to compute vsync_duration).
-    if (!p->ust[1])
-        return -1;
-
-    // Display frame duration.
-    int64_t vsync_duration = p->ust[0] - p->ust[1];
-
-    // Display latency in frames.
-    int64_t n_frames = p->last_sbc - sbc;
-
-    // Too high latency, or other nonsense.
-    if (n_frames < 0 || n_frames >= SYNC_SAMPLES)
-        return -1;
-
-    // Values were not recorded? (Temporary failures etc.)
-    if (!p->ust[n_frames])
-        return -1;
-
-    // Time since last frame display event.
-    int64_t latency_us = now_monotonic - p->ust[n_frames];
-
-    // The frame display event probably happened very recently (about within one
-    // vsync), but the corresponding video frame can be much older.
-    latency_us = (n_frames + 1) * vsync_duration - latency_us;
-
-    return latency_us / (1000.0 * 1000.0);
+    int64_t sbc_passed = sbc - p->last_sbc;
+    p->last_sbc = sbc;
+
+    // Display frame duration. This makes assumptions about UST (see below).
+    if (msc_passed && ust_passed)
+        p->vsync_duration = ust_passed / msc_passed;
+
+    // Only if a new frame was displayed (sbc increased) we have sort-of a
+    // chance that the current (ust, msc) is for the sbc. But this is racy,
+    // because skipped frames drops could have increased the msc right after the
+    // display event and before we queried the values. This code hopes for the
+    // best and ignores this.
+    if (sbc_passed) {
+        // The extension spec doesn't define what the UST is (not even its unit).
+        // Simply assume UST is a simple CLOCK_MONOTONIC usec value. The swap
+        // buffer call happened "some" but very small time ago, so we can get
+        // away with querying the current time. There is also the implicit
+        // assumption that mpv's timer and the UST use the same clock (which it
+        // does on POSIX).
+        struct timespec ts;
+        if (clock_gettime(CLOCK_MONOTONIC, &ts))
+            return;
+        uint64_t now_monotonic = ts.tv_sec * 1000000LL + ts.tv_nsec / 1000;
+        uint64_t ust_mp_time = mp_time_us() - (now_monotonic - ust);
+
+        // Assume this is exactly when the actual display event for this sbc
+        // happened. This is subject to the race mentioned above.
+        p->last_sbc_mp_time = ust_mp_time;
+    }
+
+    // At least one frame needs to be actually displayed before
+    // p->last_sbc_mp_time is set.
+    if (!sbc)
+        return;
+
+    // Extrapolate from the last sbc time (when a frame was actually displayed),
+    // and by adding the number of frames that were queued since to it.
+    // For every unit the sbc is smaller than user_sbc, the actual display
+    // is one frame ahead (assumes glx_swap_buffers() is called for every
+    // vsync).
+    p->last_queue_display_time =
+        p->last_sbc_mp_time + (p->user_sbc - sbc) * p->vsync_duration;
 }
 
 static void glx_swap_buffers(struct ra_ctx *ctx)
@@ -302,13 +326,15 @@ static void glx_swap_buffers(struct ra_ctx *ctx)
     glXSwapBuffers(ctx->vo->x11->display, ctx->vo->x11->window);
 
     if (p->XGetSyncValues)
-        p->latency = update_latency_oml(ctx);
+        update_vsync_oml(ctx);
 }
 
 static void glx_get_vsync(struct ra_ctx *ctx, struct vo_vsync_info *info)
 {
     struct priv *p = ctx->priv;
-    info->latency = p->latency;
+    info->vsync_duration = p->vsync_duration;
+    info->skipped_vsyncs = p->last_skipped_vsyncs;
+    info->last_queue_display_time = p->last_queue_display_time;
 }
 
 static bool glx_init(struct ra_ctx *ctx)
@@ -400,7 +426,9 @@ static bool glx_init(struct ra_ctx *ctx)
     if (!ra_gl_ctx_init(ctx, gl, params))
         goto uninit;
 
-    p->latency = -1;
+    p->vsync_duration = -1;
+    p->last_skipped_vsyncs = -1;
+    p->last_queue_display_time = -1;
 
     return true;
 
diff --git a/video/out/vo.c b/video/out/vo.c
index 4110e1f353..0d4ed140ae 100644
--- a/video/out/vo.c
+++ b/video/out/vo.c
@@ -479,7 +479,7 @@ static void update_vsync_timing_after_swap(struct vo *vo,
 {
     struct vo_internal *in = vo->in;
 
-    int64_t vsync_time = vsync->last_queue_time;
+    int64_t vsync_time = vsync->last_queue_display_time;
     int64_t prev_vsync = in->prev_vsync;
     in->prev_vsync = vsync_time;
 
@@ -911,15 +911,15 @@ bool vo_render_frame_external(struct vo *vo)
         vo->driver->flip_page(vo);
 
         struct vo_vsync_info vsync = {
-            .last_queue_time = mp_time_us(),
-            .latency = -1,
+            .last_queue_display_time = -1,
+            .skipped_vsyncs = -1,
         };
         if (vo->driver->get_vsync)
             vo->driver->get_vsync(vo, &vsync);
 
-        // If we can, use a "made up" expected display time.
-        if (vsync.latency >= 0)
-            vsync.last_queue_time += vsync.latency * (1000.0 * 1000.0);
+        // Make up some crap if presentation feedback is missing.
+        if (vsync.last_queue_display_time < 0)
+            vsync.last_queue_display_time = mp_time_us();
 
         MP_STATS(vo, "end video-flip");
 
diff --git a/video/out/vo.h b/video/out/vo.h
index f64d94a5ba..3514b6df5c 100644
--- a/video/out/vo.h
+++ b/video/out/vo.h
@@ -263,19 +263,33 @@ struct vo_frame {
     uint64_t frame_id;
 };
 
-// Presentation feedback.
+// Presentation feedback. See get_vsync() for how backends should fill this
+// struct.
 struct vo_vsync_info {
-    // Last mp_time_us() timestamp at which a frame was queued.
-    int64_t last_queue_time;
-
-    // The latency at which swap_buffers() is performed. This is in seconds, and
-    // valid values are always >= 0. Essentially, it's the predicted time the
-    // last shown frame will take until it is actually displayed on the physical
-    // screen. (A reasonable implementation is returning the actual measured
-    // value for the last frame which was actually displayed. The assumption is
-    // that the latency usually doesn't change.)
+    // mp_time_us() timestamp at which the last queued frame will likely be
+    // displayed (this is in the future, unless the frame is instantly output).
     // -1 if unset or unsupported.
-    double latency;
+    // This implies the latency of the output.
+    int64_t last_queue_display_time;
+
+    // Time between 2 vsync events in microseconds. The difference should be the
+    // from 2 times sampled from the same reference point (it should not be the
+    // difference between e.g. the end of scanout and the start of the next one;
+    // it must be continuous).
+    // -1 if unsupported.
+    //  0 if supported, but no value available yet. It is assumed that the value
+    //    becomes available after enough swap_buffers() calls were done.
+    // >0 values are taken for granted. Very bad things will happen if it's
+    //    inaccurate.
+    int64_t vsync_duration;
+
+    // Number of skipped physical vsyncs at some point in time. Typically, this
+    // value is some time in the past by an offset that equals to the latency.
+    // This value is reset and newly sampled at every swap_buffers() call.
+    // This can be used to detect delayed frames iff you try to call
+    // swap_buffers() for every physical vsync.
+    // -1 if unset or unsupported.
+    int64_t skipped_vsyncs;
 };
 
 struct vo_driver {
@@ -393,6 +407,8 @@ struct vo_driver {
      * Return presentation feedback. The implementation should not touch fields
      * it doesn't support; the info fields are preinitialized to neutral values.
      * Usually called once after flip_page(), but can be called any time.
+     * The values returned by this are always relative to the last flip_page()
+     * call.
      */
     void (*get_vsync)(struct vo *vo, struct vo_vsync_info *info);
author	wm4 <wm4@nowhere>	2018-09-21 15:42:37 +0200
committer	Anton Kindestam <antonki@kth.se>	2018-12-06 10:32:27 +0100
commit	f4ce3b8bb9ac715e3390f047697d6905eab55ef1 (patch)
tree	34c0055f9b513f95691782e04e555c0dba8c6ad8 /video
parent	ace16fcfff5ab48f31ad1906532e412584b5e8ea (diff)
download	mpv-f4ce3b8bb9ac715e3390f047697d6905eab55ef1.tar.bz2 mpv-f4ce3b8bb9ac715e3390f047697d6905eab55ef1.tar.xz