summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorwm4 <wm4@nowhere>2014-01-14 17:38:21 +0100
committerwm4 <wm4@nowhere>2014-01-15 20:56:16 +0100
commita735a2abd734748aace06987632353f161e8faf5 (patch)
tree41452c9ca04e24111996256c58d1673a1de59834
parent112c1abe31674fd5f43119109f94b09d885ed357 (diff)
downloadmpv-a735a2abd734748aace06987632353f161e8faf5.tar.bz2
mpv-a735a2abd734748aace06987632353f161e8faf5.tar.xz
demux_mkv: improve robustness by explicitly checking for level 1 elements
Matroska makes it pretty hard to resync correctly on broken files: random data returns "valid" EBML IDs with a high probability, and when trying to skip them it's likely that you skip a random amount of data (instead of considering the element length invalid). Improve upon this by skipping known level 1 elements only. Consider everything else invalid and call the resync code. This might result in annoying behavior when Matroska adds new level 1 elements, although it won't be particularly harmful. Matroska doesn't really allow us to do better (even mkvtoolnix explicitly checks for known level 1 elements). Since we now don't always want to combine EBML element skipping and resyncing, remove ebml_read_skip_or_resync_cluster(), and make ebml_read_skip() more tolerant against skipping broken elements. Also, don't resync when reading sub-elements, and instead do resyncing when reading them results in an error.
-rw-r--r--demux/demux_mkv.c18
-rw-r--r--demux/ebml.c73
-rw-r--r--demux/ebml.h5
3 files changed, 50 insertions, 46 deletions
diff --git a/demux/demux_mkv.c b/demux/demux_mkv.c
index 90a62ee90f..d7fb04bec1 100644
--- a/demux/demux_mkv.c
+++ b/demux/demux_mkv.c
@@ -723,7 +723,7 @@ static int demux_mkv_read_cues(demuxer_t *demuxer)
stream_t *s = demuxer->stream;
if (opts->index_mode == 0 || opts->index_mode == 2) {
- ebml_read_skip(s, NULL);
+ ebml_read_skip(demuxer->log, -1, s);
return 0;
}
@@ -1128,7 +1128,7 @@ static int read_header_element(struct demuxer *demuxer, uint32_t id,
res = 2;
}
if (!at_filepos && id != EBML_ID_INVALID)
- ebml_read_skip(s, NULL);
+ ebml_read_skip(demuxer->log, -1, s);
return res;
}
@@ -2454,11 +2454,12 @@ static int read_block_group(demuxer_t *demuxer, int64_t end,
block->keyframe = false;
break;
+ case MATROSKA_ID_CLUSTER:
case EBML_ID_INVALID:
goto error;
default:
- if (ebml_read_skip_or_resync_cluster(demuxer->log, end, s) != 0)
+ if (ebml_read_skip(demuxer->log, end, s) != 0)
goto error;
break;
}
@@ -2519,8 +2520,7 @@ static int read_next_block(demuxer_t *demuxer, struct block_info *block)
goto find_next_cluster;
default: ;
- if (ebml_read_skip_or_resync_cluster
- (demuxer->log, mkv_d->cluster_end, s) != 0)
+ if (ebml_read_skip(demuxer->log, mkv_d->cluster_end, s) != 0)
goto find_next_cluster;
break;
}
@@ -2535,7 +2535,13 @@ static int read_next_block(demuxer_t *demuxer, struct block_info *block)
break;
if (s->eof)
return -1;
- ebml_read_skip_or_resync_cluster(demuxer->log, -1, s);
+ // For the sake of robustness, consider even unknown level 1
+ // elements the same as unknown/broken IDs.
+ if (!ebml_is_mkv_level1_id(id) ||
+ ebml_read_skip(demuxer->log, -1, s) != 0)
+ {
+ ebml_resync_cluster(demuxer->log, s);
+ }
}
next_cluster:
mkv_d->cluster_end = ebml_read_length(s, NULL);
diff --git a/demux/ebml.c b/demux/ebml.c
index c875ef564c..2c540ad093 100644
--- a/demux/ebml.c
+++ b/demux/ebml.c
@@ -41,6 +41,26 @@
#define SIZE_MAX ((size_t)-1)
#endif
+// Whether the id is a known Matroska level 1 element (allowed as element on
+// global file level, after the level 0 MATROSKA_ID_SEGMENT).
+// This (intentionally) doesn't include "global" elements.
+bool ebml_is_mkv_level1_id(uint32_t id)
+{
+ switch (id) {
+ case MATROSKA_ID_SEEKHEAD:
+ case MATROSKA_ID_INFO:
+ case MATROSKA_ID_CLUSTER:
+ case MATROSKA_ID_TRACKS:
+ case MATROSKA_ID_CUES:
+ case MATROSKA_ID_ATTACHMENTS:
+ case MATROSKA_ID_CHAPTERS:
+ case MATROSKA_ID_TAGS:
+ return true;
+ default:
+ return false;
+ }
+}
+
/*
* Read: the element content data ID.
* Return: the ID.
@@ -252,21 +272,32 @@ char *ebml_read_utf8(stream_t *s, uint64_t *length)
/*
* Skip the current element.
+ * end: the end of the parent element or -1 (for robust error handling)
*/
-int ebml_read_skip(stream_t *s, uint64_t *length)
+int ebml_read_skip(struct mp_log *log, int64_t end, stream_t *s)
{
uint64_t len;
int l;
+ int64_t pos = stream_tell(s);
+
len = ebml_read_length(s, &l);
if (len == EBML_UINT_INVALID)
- return 1;
- if (length)
- *length = len + l;
+ goto invalid;
+
+ int64_t pos2 = stream_tell(s);
+ if (len >= INT64_MAX - pos2 || (end > 0 && pos2 + len > end))
+ goto invalid;
- stream_skip(s, len);
+ if (!stream_skip(s, len))
+ goto invalid;
return 0;
+
+invalid:
+ mp_err(log, "Invalid EBML length at position %"PRId64"\n", pos);
+ stream_seek(s, pos);
+ return 1;
}
/*
@@ -292,38 +323,6 @@ int ebml_resync_cluster(struct mp_log *log, stream_t *s)
}
/*
- * Skip the current element, or on error, call ebml_resync_cluster().
- * end gives the maximum possible file pos (due to EBML parent element size).
- */
-int ebml_read_skip_or_resync_cluster(struct mp_log *log, int64_t end,
- stream_t *s)
-{
- uint64_t len;
- int l;
-
- len = ebml_read_length(s, &l);
- if (len == EBML_UINT_INVALID)
- goto resync;
-
- int64_t pos = stream_tell(s);
-
- if (end >= 0 && pos + len > end)
- goto resync;
-
- // When reading corrupted elements, len will often be a random high number,
- // and stream_skip() will fail when skipping past EOF.
- if (!stream_skip(s, len)) {
- stream_seek(s, pos);
- goto resync;
- }
-
- return 0;
-
-resync:
- return ebml_resync_cluster(log, s) < 0 ? -1 : 1;
-}
-
-/*
* Read the next element, but only the header. The contents
* are supposed to be sub-elements which can be read separately.
*/
diff --git a/demux/ebml.h b/demux/ebml.h
index 2259563340..b934ee5c34 100644
--- a/demux/ebml.h
+++ b/demux/ebml.h
@@ -92,6 +92,7 @@ struct ebml_parse_ctx {
#define EBML_FLOAT_INVALID -1000000000.0
+bool ebml_is_mkv_level1_id(uint32_t id);
uint32_t ebml_read_id (stream_t *s, int *length);
uint64_t ebml_read_vlen_uint (bstr *buffer);
int64_t ebml_read_vlen_int (bstr *buffer);
@@ -101,9 +102,7 @@ int64_t ebml_read_int (stream_t *s, uint64_t *length);
double ebml_read_float (stream_t *s, uint64_t *length);
char *ebml_read_ascii (stream_t *s, uint64_t *length);
char *ebml_read_utf8 (stream_t *s, uint64_t *length);
-int ebml_read_skip (stream_t *s, uint64_t *length);
-int ebml_read_skip_or_resync_cluster(struct mp_log *log, int64_t end,
- stream_t *s);
+int ebml_read_skip(struct mp_log *log, int64_t end, stream_t *s);
int ebml_resync_cluster(struct mp_log *log, stream_t *s);
uint32_t ebml_read_master (stream_t *s, uint64_t *length);