From a735a2abd734748aace06987632353f161e8faf5 Mon Sep 17 00:00:00 2001 From: wm4 Date: Tue, 14 Jan 2014 17:38:21 +0100 Subject: demux_mkv: improve robustness by explicitly checking for level 1 elements Matroska makes it pretty hard to resync correctly on broken files: random data returns "valid" EBML IDs with a high probability, and when trying to skip them it's likely that you skip a random amount of data (instead of considering the element length invalid). Improve upon this by skipping known level 1 elements only. Consider everything else invalid and call the resync code. This might result in annoying behavior when Matroska adds new level 1 elements, although it won't be particularly harmful. Matroska doesn't really allow us to do better (even mkvtoolnix explicitly checks for known level 1 elements). Since we now don't always want to combine EBML element skipping and resyncing, remove ebml_read_skip_or_resync_cluster(), and make ebml_read_skip() more tolerant against skipping broken elements. Also, don't resync when reading sub-elements, and instead do resyncing when reading them results in an error. --- demux/demux_mkv.c | 18 +++++++++----- demux/ebml.c | 73 +++++++++++++++++++++++++++---------------------------- demux/ebml.h | 5 ++-- 3 files changed, 50 insertions(+), 46 deletions(-) diff --git a/demux/demux_mkv.c b/demux/demux_mkv.c index 90a62ee90f..d7fb04bec1 100644 --- a/demux/demux_mkv.c +++ b/demux/demux_mkv.c @@ -723,7 +723,7 @@ static int demux_mkv_read_cues(demuxer_t *demuxer) stream_t *s = demuxer->stream; if (opts->index_mode == 0 || opts->index_mode == 2) { - ebml_read_skip(s, NULL); + ebml_read_skip(demuxer->log, -1, s); return 0; } @@ -1128,7 +1128,7 @@ static int read_header_element(struct demuxer *demuxer, uint32_t id, res = 2; } if (!at_filepos && id != EBML_ID_INVALID) - ebml_read_skip(s, NULL); + ebml_read_skip(demuxer->log, -1, s); return res; } @@ -2454,11 +2454,12 @@ static int read_block_group(demuxer_t *demuxer, int64_t end, block->keyframe = false; break; + case MATROSKA_ID_CLUSTER: case EBML_ID_INVALID: goto error; default: - if (ebml_read_skip_or_resync_cluster(demuxer->log, end, s) != 0) + if (ebml_read_skip(demuxer->log, end, s) != 0) goto error; break; } @@ -2519,8 +2520,7 @@ static int read_next_block(demuxer_t *demuxer, struct block_info *block) goto find_next_cluster; default: ; - if (ebml_read_skip_or_resync_cluster - (demuxer->log, mkv_d->cluster_end, s) != 0) + if (ebml_read_skip(demuxer->log, mkv_d->cluster_end, s) != 0) goto find_next_cluster; break; } @@ -2535,7 +2535,13 @@ static int read_next_block(demuxer_t *demuxer, struct block_info *block) break; if (s->eof) return -1; - ebml_read_skip_or_resync_cluster(demuxer->log, -1, s); + // For the sake of robustness, consider even unknown level 1 + // elements the same as unknown/broken IDs. + if (!ebml_is_mkv_level1_id(id) || + ebml_read_skip(demuxer->log, -1, s) != 0) + { + ebml_resync_cluster(demuxer->log, s); + } } next_cluster: mkv_d->cluster_end = ebml_read_length(s, NULL); diff --git a/demux/ebml.c b/demux/ebml.c index c875ef564c..2c540ad093 100644 --- a/demux/ebml.c +++ b/demux/ebml.c @@ -41,6 +41,26 @@ #define SIZE_MAX ((size_t)-1) #endif +// Whether the id is a known Matroska level 1 element (allowed as element on +// global file level, after the level 0 MATROSKA_ID_SEGMENT). +// This (intentionally) doesn't include "global" elements. +bool ebml_is_mkv_level1_id(uint32_t id) +{ + switch (id) { + case MATROSKA_ID_SEEKHEAD: + case MATROSKA_ID_INFO: + case MATROSKA_ID_CLUSTER: + case MATROSKA_ID_TRACKS: + case MATROSKA_ID_CUES: + case MATROSKA_ID_ATTACHMENTS: + case MATROSKA_ID_CHAPTERS: + case MATROSKA_ID_TAGS: + return true; + default: + return false; + } +} + /* * Read: the element content data ID. * Return: the ID. @@ -252,21 +272,32 @@ char *ebml_read_utf8(stream_t *s, uint64_t *length) /* * Skip the current element. + * end: the end of the parent element or -1 (for robust error handling) */ -int ebml_read_skip(stream_t *s, uint64_t *length) +int ebml_read_skip(struct mp_log *log, int64_t end, stream_t *s) { uint64_t len; int l; + int64_t pos = stream_tell(s); + len = ebml_read_length(s, &l); if (len == EBML_UINT_INVALID) - return 1; - if (length) - *length = len + l; + goto invalid; + + int64_t pos2 = stream_tell(s); + if (len >= INT64_MAX - pos2 || (end > 0 && pos2 + len > end)) + goto invalid; - stream_skip(s, len); + if (!stream_skip(s, len)) + goto invalid; return 0; + +invalid: + mp_err(log, "Invalid EBML length at position %"PRId64"\n", pos); + stream_seek(s, pos); + return 1; } /* @@ -291,38 +322,6 @@ int ebml_resync_cluster(struct mp_log *log, stream_t *s) return -1; } -/* - * Skip the current element, or on error, call ebml_resync_cluster(). - * end gives the maximum possible file pos (due to EBML parent element size). - */ -int ebml_read_skip_or_resync_cluster(struct mp_log *log, int64_t end, - stream_t *s) -{ - uint64_t len; - int l; - - len = ebml_read_length(s, &l); - if (len == EBML_UINT_INVALID) - goto resync; - - int64_t pos = stream_tell(s); - - if (end >= 0 && pos + len > end) - goto resync; - - // When reading corrupted elements, len will often be a random high number, - // and stream_skip() will fail when skipping past EOF. - if (!stream_skip(s, len)) { - stream_seek(s, pos); - goto resync; - } - - return 0; - -resync: - return ebml_resync_cluster(log, s) < 0 ? -1 : 1; -} - /* * Read the next element, but only the header. The contents * are supposed to be sub-elements which can be read separately. diff --git a/demux/ebml.h b/demux/ebml.h index 2259563340..b934ee5c34 100644 --- a/demux/ebml.h +++ b/demux/ebml.h @@ -92,6 +92,7 @@ struct ebml_parse_ctx { #define EBML_FLOAT_INVALID -1000000000.0 +bool ebml_is_mkv_level1_id(uint32_t id); uint32_t ebml_read_id (stream_t *s, int *length); uint64_t ebml_read_vlen_uint (bstr *buffer); int64_t ebml_read_vlen_int (bstr *buffer); @@ -101,9 +102,7 @@ int64_t ebml_read_int (stream_t *s, uint64_t *length); double ebml_read_float (stream_t *s, uint64_t *length); char *ebml_read_ascii (stream_t *s, uint64_t *length); char *ebml_read_utf8 (stream_t *s, uint64_t *length); -int ebml_read_skip (stream_t *s, uint64_t *length); -int ebml_read_skip_or_resync_cluster(struct mp_log *log, int64_t end, - stream_t *s); +int ebml_read_skip(struct mp_log *log, int64_t end, stream_t *s); int ebml_resync_cluster(struct mp_log *log, stream_t *s); uint32_t ebml_read_master (stream_t *s, uint64_t *length); -- cgit v1.2.3