summaryrefslogtreecommitdiffstats
path: root/demux/ebml.c
diff options
context:
space:
mode:
authorwm4 <wm4@nowhere>2014-01-14 17:38:21 +0100
committerwm4 <wm4@nowhere>2014-01-14 17:38:21 +0100
commit3c2f93aec850b279d47ccbb02f30f129a458d6c6 (patch)
tree753622983962b50dbd9815e88b8d3dd9ce1bb7d1 /demux/ebml.c
parentae27e13a0a0d0d69ca3e91ade710ded5208f4fc6 (diff)
downloadmpv-3c2f93aec850b279d47ccbb02f30f129a458d6c6.tar.bz2
mpv-3c2f93aec850b279d47ccbb02f30f129a458d6c6.tar.xz
demux_mkv: improve robustness by explicitly checking for level 1 elements
Matroska makes it pretty hard to resync correctly on broken files: random data returns "valid" EBML IDs with a high probability, and when trying to skip them it's likely that you skip a random amount of data (instead of considering the element length invalid). Improve upon this by skipping known level 1 elements only. Consider everything else invalid and call the resync code. This might result in annoying behavior when Matroska adds new level 1 elements, although it won't be particularly harmful. Matroska doesn't really allow us to do better (even mkvtoolnix explicitly checks for known level 1 elements). Since we now don't always want to combine EBML element skipping and resyncing, remove ebml_read_skip_or_resync_cluster(), and make ebml_read_skip() more tolerant against skipping broken elements. Also, don't resync when reading sub-elements, and instead do resyncing when reading them results in an error.
Diffstat (limited to 'demux/ebml.c')
-rw-r--r--demux/ebml.c73
1 files changed, 36 insertions, 37 deletions
diff --git a/demux/ebml.c b/demux/ebml.c
index c875ef564c..2c540ad093 100644
--- a/demux/ebml.c
+++ b/demux/ebml.c
@@ -41,6 +41,26 @@
#define SIZE_MAX ((size_t)-1)
#endif
+// Whether the id is a known Matroska level 1 element (allowed as element on
+// global file level, after the level 0 MATROSKA_ID_SEGMENT).
+// This (intentionally) doesn't include "global" elements.
+bool ebml_is_mkv_level1_id(uint32_t id)
+{
+ switch (id) {
+ case MATROSKA_ID_SEEKHEAD:
+ case MATROSKA_ID_INFO:
+ case MATROSKA_ID_CLUSTER:
+ case MATROSKA_ID_TRACKS:
+ case MATROSKA_ID_CUES:
+ case MATROSKA_ID_ATTACHMENTS:
+ case MATROSKA_ID_CHAPTERS:
+ case MATROSKA_ID_TAGS:
+ return true;
+ default:
+ return false;
+ }
+}
+
/*
* Read: the element content data ID.
* Return: the ID.
@@ -252,21 +272,32 @@ char *ebml_read_utf8(stream_t *s, uint64_t *length)
/*
* Skip the current element.
+ * end: the end of the parent element or -1 (for robust error handling)
*/
-int ebml_read_skip(stream_t *s, uint64_t *length)
+int ebml_read_skip(struct mp_log *log, int64_t end, stream_t *s)
{
uint64_t len;
int l;
+ int64_t pos = stream_tell(s);
+
len = ebml_read_length(s, &l);
if (len == EBML_UINT_INVALID)
- return 1;
- if (length)
- *length = len + l;
+ goto invalid;
+
+ int64_t pos2 = stream_tell(s);
+ if (len >= INT64_MAX - pos2 || (end > 0 && pos2 + len > end))
+ goto invalid;
- stream_skip(s, len);
+ if (!stream_skip(s, len))
+ goto invalid;
return 0;
+
+invalid:
+ mp_err(log, "Invalid EBML length at position %"PRId64"\n", pos);
+ stream_seek(s, pos);
+ return 1;
}
/*
@@ -292,38 +323,6 @@ int ebml_resync_cluster(struct mp_log *log, stream_t *s)
}
/*
- * Skip the current element, or on error, call ebml_resync_cluster().
- * end gives the maximum possible file pos (due to EBML parent element size).
- */
-int ebml_read_skip_or_resync_cluster(struct mp_log *log, int64_t end,
- stream_t *s)
-{
- uint64_t len;
- int l;
-
- len = ebml_read_length(s, &l);
- if (len == EBML_UINT_INVALID)
- goto resync;
-
- int64_t pos = stream_tell(s);
-
- if (end >= 0 && pos + len > end)
- goto resync;
-
- // When reading corrupted elements, len will often be a random high number,
- // and stream_skip() will fail when skipping past EOF.
- if (!stream_skip(s, len)) {
- stream_seek(s, pos);
- goto resync;
- }
-
- return 0;
-
-resync:
- return ebml_resync_cluster(log, s) < 0 ? -1 : 1;
-}
-
-/*
* Read the next element, but only the header. The contents
* are supposed to be sub-elements which can be read separately.
*/