path: root/demux
diff options
authorwm4 <wm4@nowhere>2017-12-17 21:34:50 +0100
committerStefano Pigozzi <>2017-12-23 00:32:59 +0100
commitcc8759a855dcd5da168874e055089065a7ac44d0 (patch)
tree2374bf999ffd3b98ae959cdff1401c6fc036eb23 /demux
parent08dbc8f43e82984aaf2ff489b35a5b93396628ff (diff)
demux: always discard cached packets on track switches
This fixes weird behavior in the following case: - open a file - make sure the max. demuxer forward cache is smaller than the file's video track - make sure the max. readahead duration is larger than the file's duration - disable the audio track - seek to the beginning of the file - once the cache has filled enable the audio track - a queue overflow warning should appear (- looking at the seek ranges is also interesting) The queue overflow warning happens because the packed queue for the video track will use up the full quota set by --demuxer-max-bytes. When the audio track is enabled, reading an audio packet would technically overflow the packet cache by the size of whatever packet is read next. This means the demuxer signals EOF to the decoder, and once playback has consumed enough video packets so that audio packets can be read again, the decoder resumes from EOF. This interacts badly with A/V synchronization and the whole thing can randomly crap itself until audio has fully recovered. We didn't care about this so far, but we want to raise the readahead duration to something very high, so that the demuxer cache is fully used. This means this case can be hit quite quickly by switching audio or subtitle tracks, and is not really an obscure corner case anymore. Fix this by always losing all cache. Since the cache can't be used anyway until the newly selected track has been read, this is not much of a disadvantage. The only thing that could be brought up is that unselecting the track again could resume operation normally. (Maybe this would be useful if network died completely without chance of recovery. Then you could watch the already buffered video anyway by deselecting the audio track again.) But given the headaches, this seems like the better solution. Unfortunately this requires adding new new strange fields and strangely fragmenting state management functions again. I'm sure whoever works on this in the future will hate me. Currently it seems like the lesser evil, and much simpler and robust than the other potential solutions. In case this needs to be revisited, here is a reminder for readers from the future what alternative solutions were considered, without those disadvantages: A first attempted solution allowed the demuxer to buffer some additional packets on track switching. This would allow it to read enough data to feed the decoder at least. But it was still awkward, as it didn't allow the demuxer to continue prefetching the newly selected track. It also barely worked, because you could make the forward buffer "over full" by seeking back with seekable cache enabled, and then it couldn't read packets anyway. As alternative solution, we could always demux and cache all tracks, even if they're deselected. This would also not require a network-level seek for the "refresh" logic (it's the thing that lets the video decoder continue as if nothing happened, while actually seeking back in the stream to get the missing audio packets, in the case of enabling a previously disabled audio track). But it would also possibly waste network and memory resources, depending on what the user actually wants. A second solution would just account the queue sizes for each stream separately. We could freely fill up the audio packet queue, even if the video queue is full. Since the demuxer API returns interleaved packets and doesn't let you predict which packet type comes next, this is not as simple as it sounds, but it'd probably tie in nicely with the "refresh" logic. A third solution would be removing buffered video packets from the end of the packet queue. Since the "refresh" logic gets these anyway, there is no reason to keep them if they prevent the audio packet queue from catching up with the video one. But this would require additional logic, would interact badly with a bunch of other corner cases. And as far as the code goes, it's rather complex, because all the logic is written with FIFO behavior in mind (including the fact that the packet queue is a singly linked list with no backwards links, making removal from the end harder).
Diffstat (limited to 'demux')
1 files changed, 43 insertions, 8 deletions
diff --git a/demux/demux.c b/demux/demux.c
index 5a052fc787..94966f1fc4 100644
--- a/demux/demux.c
+++ b/demux/demux.c
@@ -258,7 +258,8 @@ struct demux_stream {
bool eager; // try to keep at least 1 packet queued
// if false, this stream is disabled, or passively
// read (like subtitles)
- bool refreshing;
+ bool refreshing; // finding old position after track switches
+ bool eof; // end of demuxed stream? (true if no more packets)
bool global_correct_dts;// all observed so far
bool global_correct_pos;
@@ -274,11 +275,14 @@ struct demux_stream {
double bitrate;
size_t fw_packs; // number of packets in buffer (forward)
size_t fw_bytes; // total bytes of packets in buffer (forward)
- bool eof; // end of demuxed stream? (true if no more packets)
struct demux_packet *reader_head; // points at current decoder position
bool skip_to_keyframe;
bool attached_picture_added;
+ // for refresh seeks: pos/dts of last packet returned to reader
+ int64_t last_ret_pos;
+ double last_ret_dts;
// for closed captions (demuxer_feed_caption)
struct sh_stream *cc;
bool ignore_eof; // ignore stream in underrun detection
@@ -537,19 +541,26 @@ static void free_empty_cached_ranges(struct demux_internal *in)
-static void ds_clear_reader_state(struct demux_stream *ds)
+static void ds_clear_reader_queue_state(struct demux_stream *ds)
ds->in->fw_bytes -= ds->fw_bytes;
ds->reader_head = NULL;
+ ds->fw_bytes = 0;
+ ds->fw_packs = 0;
ds->eof = false;
+static void ds_clear_reader_state(struct demux_stream *ds)
+ ds_clear_reader_queue_state(ds);
ds->base_ts = ds->last_br_ts = MP_NOPTS_VALUE;
ds->last_br_bytes = 0;
ds->bitrate = -1;
ds->skip_to_keyframe = false;
ds->attached_picture_added = false;
- ds->fw_bytes = 0;
- ds->fw_packs = 0;
+ ds->last_ret_pos = -1;
+ ds->last_ret_dts = MP_NOPTS_VALUE;
static void update_stream_selection_state(struct demux_internal *in,
@@ -1474,6 +1485,9 @@ static struct demux_packet *dequeue_packet(struct demux_stream *ds)
ds->fw_bytes -= bytes;
ds->in->fw_bytes -= bytes;
+ ds->last_ret_pos = pkt->pos;
+ ds->last_ret_dts = pkt->dts;
// The returned packet is mutated etc. and will be owned by the user.
pkt = demux_copy_packet(pkt);
if (!pkt)
@@ -2449,10 +2463,31 @@ static void initiate_refresh_seek(struct demux_internal *in,
for (int n = 0; n < in->num_streams; n++) {
struct demux_stream *ds = in->streams[n]->ds;
+ bool correct_pos = ds->queue->correct_pos;
+ bool correct_dts = ds->queue->correct_dts;
+ // We need to re-read all packets anyway, so discard the buffered
+ // data. (In theory, we could keep the packets, and be able to use
+ // it for seeking if partially read streams are deselected again,
+ // but this causes other problems like queue overflows when
+ // selecting a new stream.)
+ ds_clear_reader_queue_state(ds);
+ clear_queue(ds->queue);
// Streams which didn't have any packets yet will return all packets,
// other streams return packets only starting from the last position.
- if (ds->queue->last_pos != -1 || ds->queue->last_dts != MP_NOPTS_VALUE)
- ds->refreshing |= ds->selected;
+ if (ds->selected && (ds->last_ret_pos != -1 ||
+ ds->last_ret_dts != MP_NOPTS_VALUE))
+ {
+ ds->refreshing = true;
+ ds->queue->correct_dts = correct_dts;
+ ds->queue->correct_pos = correct_pos;
+ ds->queue->last_pos = ds->last_ret_pos;
+ ds->queue->last_dts = ds->last_ret_dts;
+ }
+ update_seek_ranges(in->current_range);
start_ts -= 1.0; // small offset to get correct overlap