summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorwm4 <wm4@nowhere>2019-05-31 16:07:13 +0200
committerwm4 <wm4@nowhere>2019-09-19 20:37:05 +0200
commitda6e862c4fff9b33996607ce7ab18d3714704620 (patch)
treeb608913be80a3e9b97d51ef87fe53df4011fdaeb
parentab19888ba49b52e80c1b9ba4aa305b6ee8548059 (diff)
downloadmpv-da6e862c4fff9b33996607ce7ab18d3714704620.tar.bz2
mpv-da6e862c4fff9b33996607ce7ab18d3714704620.tar.xz
f_decoder_wrapper: hack for discarding preroll in backward playback mode
Some audio codecs will discard or cut the first frames when starting decoding. While some of that works through well-defined mechanisms (like initial padding), it's in general very codec/decoder specific, and not really predictable. In addition, libavcodec isn't very good with reporting "dropped" frames (and our internal interface reflects this). It seems our only chance to handle this is through timestamps. In theory, it would be best to discard frames that have timestamps before the "resume" position. But since video has reordered timestamps, we'd need to put some effort into finding this position. The first video packet doesn't necessarily contain this timestamp. (In theory, we could just do this in the demuxer with some trivial additional work, and set it on the packet's kf_seek_pts field. Although this field is supposed to contain just this value, the field is considered demuxer-internal, and I didn't want to make matters worse by reusing it for the interface to the decoder. With some more effort and buffering, we could calculate this value within the decoder, but fuck that.) The approach chosen in this commit is setting the timestamp to NOPTS. This will break in some obscure situations, but backward playback is a pretty obscure feature to begin with, so I considered this a reasonable implementation choice. Before passing a preroll packet to the decoder, its timestamps are set to NOPTS. Frames that are returned from the decoder and have the NOPTS timestamp are considered preroll and are discarded. This happens only during "preroll" mode (preroll_discard==true), so it doesn't affect normal forward playback. It's disabled on the first packet with a timestamp, so it can tolerate some crap even in backward playback mode. We don't check the dts fields out of laziness (decoded audio frames don't even have this field). I considered using an approach using the EDL clipping infrastructure (as mentioned in the last two paragraphs in the commit message of commit " demux_lavf: implement bad hack for backward playback of wav"). This didn't work, and I blamed timestamp rounding within mpv for it. But the problem was actually due to Matroska-rounded timestamps. Since the audio frame size isn't exactly aligned to 1ms, there will be an overlap (or gap) in the timestamps. This overlap is much smaller than 1ms, since it's just the sub-millisecond remainder part of the audio frame size. This makes the timestamps discontinuous and unreliable for the purpose we wanted to use it. We can't just smooth the timestamps in the demuxer either.
-rw-r--r--filters/f_decoder_wrapper.c26
1 files changed, 15 insertions, 11 deletions
diff --git a/filters/f_decoder_wrapper.c b/filters/f_decoder_wrapper.c
index 80eb408014..a494d589c4 100644
--- a/filters/f_decoder_wrapper.c
+++ b/filters/f_decoder_wrapper.c
@@ -87,8 +87,7 @@ struct priv {
double start, end;
struct demux_packet *new_segment;
struct mp_frame packet;
- bool packet_fed;
- int preroll_discard;
+ bool packet_fed, preroll_discard;
size_t reverse_queue_byte_size;
struct mp_frame *reverse_queue;
@@ -116,7 +115,7 @@ static void reset_decoder(struct priv *p)
p->packets_without_output = 0;
mp_frame_unref(&p->packet);
p->packet_fed = false;
- p->preroll_discard = 0;
+ p->preroll_discard = false;
talloc_free(p->new_segment);
p->new_segment = NULL;
p->start = p->end = MP_NOPTS_VALUE;
@@ -543,12 +542,14 @@ static void feed_packet(struct priv *p)
if (p->first_packet_pdts == MP_NOPTS_VALUE)
p->first_packet_pdts = pkt_pdts;
- if (packet && packet->back_preroll)
- p->preroll_discard += 1;
+ if (packet && packet->back_preroll) {
+ p->preroll_discard = true;
+ packet->pts = packet->dts = MP_NOPTS_VALUE;
+ }
mp_pin_in_write(p->decoder->f->pins[0], p->packet);
- p->packet = MP_NO_FRAME;
p->packet_fed = true;
+ p->packet = MP_NO_FRAME;
p->packets_without_output += 1;
}
@@ -671,11 +672,14 @@ static void read_frame(struct priv *p)
}
p->packets_without_output = 0;
- if (p->preroll_discard > 0 && frame.type != MP_FRAME_EOF) {
- p->preroll_discard -= 1;
- mp_frame_unref(&frame);
- mp_filter_internal_mark_progress(p->f);
- return;
+ if (p->preroll_discard && frame.type != MP_FRAME_EOF) {
+ double ts = mp_frame_get_pts(frame);
+ if (ts == MP_NOPTS_VALUE) {
+ mp_frame_unref(&frame);
+ mp_filter_internal_mark_progress(p->f);
+ return;
+ }
+ p->preroll_discard = false;
}
bool segment_ended = process_decoded_frame(p, &frame);