demux_timeline: add heuristic to fix shifted seeks with separate audio

If you have a EDL stream with separate sources for audio and video stream (like ytdl_hook now creates), you can get the problem that the video stream seeks to a different position than audio due to different key frame granularity. In particular, if you seek backward, the video might undershoot the seek target by a lot. Then video will resume from an earlier position than audio, and the player plays silence. This is annoying. Fix this by explicitly implementing a heuristic to detect separate audio/video streams, determining where a video seek ends up, and then seeking the audio stream to the video destination. This also makes sure to not seek audio with SEEK_FORWARD, so it will always seek before the video position. Non-precise seeks still skip audio to the video target, so this helps with ensuring that audio is present at the final seek target. The implementation is very annoying, because the only way to determine the seek target is to actually read a packet. Thus a 1-packet queue needs to be added. In theory, we could get the seek target from the index of the video file (especially if it's mp4), but libavformat does not have public API that exports this index, so we're stuck with this roundabout generic method. Note that this is only for non-precise seeks. If precise seeks are done, the problem is handled by the frontend by skipping unwanted video frames. But non-precise seeking should still work. (Personally I prefer non-precise seek mode by default because they're still significantly faster.) It also needs to be said that this is the 4th implementation of this seek adjustment thing in mpv. The 1st implementation is in the frontend (look for MPContext.seek_slave). This works only if the external audio stream is known as such on the frontend level. The 2nd implementation is in the demuxer level packet cache (top of execute_cache_seek()). This is similar to code that any demuxer needs to handle non-precise seeks sufficiently nicely. The 3rd is in demux_mkv.c. Since mkv is an interleaved format, this implementation mostly consists on trying to pick index entries for video packets if a video stream is selected. Maybe these "redundant" implementations could be avoided by exposing separate streams through the demuxer API (and making them individually seekable) or something like this, but this is messy and not without problems for multiple reasons. So for now this commit is the best way to fix the observed behavior.
author: wm4 <wm4@nowhere> 2019-01-11 15:04:43 +0100
committer: wm4 <wm4@nowhere> 2019-09-19 20:37:04 +0200
commit: 7d0e0b3a5c118663390be72079ab47cd674a5ba9 (patch)
tree: 7ec10a69f86e82427a446be60ea78d278bb63ed5
parent: 87db2f24e8eca23664dc23128b8fca2b5d07125d (diff)
download: mpv-7d0e0b3a5c118663390be72079ab47cd674a5ba9.tar.bz2
mpv-7d0e0b3a5c118663390be72079ab47cd674a5ba9.tar.xz
1 files changed, 80 insertions, 11 deletions
diff --git a/demux/demux_timeline.c b/demux/demux_timeline.c
index 9a1fb8aed4..82760582b5 100644
--- a/demux/demux_timeline.c
+++ b/demux/demux_timeline.c
@@ -72,6 +72,8 @@ struct virtual_source {
     bool eof_reached;
     double dts;                 // highest read DTS (or PTS if no DTS available)
     bool any_selected;          // at least one stream is actually selected
+
+    struct demux_packet *next;
 };
 
 struct priv {
@@ -88,6 +90,8 @@ struct priv {
 };
 
 static bool add_tl(struct demuxer *demuxer, struct timeline_par *par);
+static bool do_read_next_packet(struct demuxer *demuxer,
+                                struct virtual_source *src);
 
 static void update_slave_stats(struct demuxer *demuxer, struct demuxer *slave)
 {
@@ -178,6 +182,7 @@ static void reselect_streams(struct demuxer *demuxer)
         if (!was_selected && src->any_selected) {
             src->eof_reached = false;
             src->dts = MP_NOPTS_VALUE;
+            TA_FREEP(&src->next);
         }
     }
 }
@@ -189,6 +194,7 @@ static void close_lazy_segments(struct demuxer *demuxer,
     for (int n = 0; n < src->num_segments; n++) {
         struct segment *seg = src->segments[n];
         if (seg != src->current && seg->d && seg->lazy) {
+            TA_FREEP(&src->next); // might depend on one of the sub-demuxers
             demux_free(seg->d);
             seg->d = NULL;
         }
@@ -249,28 +255,76 @@ static void switch_segment(struct demuxer *demuxer, struct virtual_source *src,
     src->eos_packets = 0;
 }
 
+static void seek_source(struct demuxer *demuxer, struct virtual_source *src,
+                        double pts, int flags)
+{
+    struct segment *new = src->segments[src->num_segments - 1];
+    for (int n = 0; n < src->num_segments; n++) {
+        if (pts < src->segments[n]->end) {
+            new = src->segments[n];
+            break;
+        }
+    }
+
+    switch_segment(demuxer, src, new, pts, flags, false);
+
+    src->dts = MP_NOPTS_VALUE;
+    TA_FREEP(&src->next);
+}
+
 static void d_seek(struct demuxer *demuxer, double seek_pts, int flags)
 {
     struct priv *p = demuxer->priv;
 
-    double pts = seek_pts * ((flags & SEEK_FACTOR) ? p->duration : 1);
-
+    seek_pts = seek_pts * ((flags & SEEK_FACTOR) ? p->duration : 1);
     flags &= SEEK_FORWARD | SEEK_HR;
 
+    // The intention is to seek audio streams to the same target as video
+    // streams if they are separate streams. Video streams usually have more
+    // coarse keyframe snapping, which could leave video without audio.
+    struct virtual_source *master = NULL;
+    bool has_slaves = false;
     for (int x = 0; x < p->num_sources; x++) {
         struct virtual_source *src = p->sources[x];
 
-        struct segment *new = src->segments[src->num_segments - 1];
-        for (int n = 0; n < src->num_segments; n++) {
-            if (pts < src->segments[n]->end) {
-                new = src->segments[n];
-                break;
+        bool any_audio = false, any_video = false;
+        for (int i = 0; i < src->num_streams; i++) {
+            struct virtual_stream *str = src->streams[i];
+            if (str->selected) {
+                if (str->sh->type == STREAM_VIDEO)
+                    any_video = true;
+                if (str->sh->type == STREAM_AUDIO)
+                    any_audio = true;
             }
         }
 
-        switch_segment(demuxer, src, new, pts, flags, false);
+        if (any_video)
+            master = src;
+        // A true slave stream is audio-only; this also prevents that the master
+        // stream is considered a slave stream.
+        if (any_audio && !any_video)
+            has_slaves = true;
+    }
 
-        src->dts = MP_NOPTS_VALUE;
+    if (!has_slaves)
+        master = NULL;
+
+    if (master) {
+        seek_source(demuxer, master, seek_pts, flags);
+        do_read_next_packet(demuxer, master);
+        if (master->next && master->next->pts != MP_NOPTS_VALUE) {
+            // Assume we got a seek target. Actually apply the heuristic.
+            MP_VERBOSE(demuxer, "adjust seek target from %f to %f\n", seek_pts,
+                       master->next->pts);
+            seek_pts = master->next->pts;
+            flags &= ~(unsigned)SEEK_FORWARD;
+        }
+    }
+
+    for (int x = 0; x < p->num_sources; x++) {
+        struct virtual_source *src = p->sources[x];
+        if (src != master)
+            seek_source(demuxer, src, seek_pts, flags);
     }
 }
 
@@ -300,8 +354,22 @@ static bool d_read_packet(struct demuxer *demuxer, struct demux_packet **out_pkt
     if (!src)
         return false;
 
+    if (!do_read_next_packet(demuxer, src))
+        return false;
+    *out_pkt = src->next;
+    src->next = NULL;
+    return true;
+}
+
+static bool do_read_next_packet(struct demuxer *demuxer,
+                                struct virtual_source *src)
+{
+    if (src->next)
+        return 1;
+
     struct segment *seg = src->current;
-    assert(seg && seg->d);
+    if (!seg || !seg->d)
+        return 0;
 
     struct demux_packet *pkt = demux_read_any_packet(seg->d);
     if (!pkt || (!src->no_clip && pkt->pts >= seg->end))
@@ -387,7 +455,7 @@ static bool d_read_packet(struct demuxer *demuxer, struct demux_packet **out_pkt
         src->dts = dts;
 
     pkt->stream = vs->sh->index;
-    *out_pkt = pkt;
+    src->next = pkt;
     return true;
 
 drop:
@@ -558,6 +626,7 @@ static void d_close(struct demuxer *demuxer)
         struct virtual_source *src = p->sources[x];
 
         src->current = NULL;
+        TA_FREEP(&src->next);
         close_lazy_segments(demuxer, src);
     }
author	wm4 <wm4@nowhere>	2019-01-11 15:04:43 +0100
committer	wm4 <wm4@nowhere>	2019-09-19 20:37:04 +0200
commit	7d0e0b3a5c118663390be72079ab47cd674a5ba9 (patch)
tree	7ec10a69f86e82427a446be60ea78d278bb63ed5
parent	87db2f24e8eca23664dc23128b8fca2b5d07125d (diff)
download	mpv-7d0e0b3a5c118663390be72079ab47cd674a5ba9.tar.bz2 mpv-7d0e0b3a5c118663390be72079ab47cd674a5ba9.tar.xz