summaryrefslogtreecommitdiffstats
path: root/demux/ebml.c
diff options
context:
space:
mode:
authorwm4 <wm4@nowhere>2013-03-28 00:01:17 +0100
committerwm4 <wm4@nowhere>2013-03-28 21:45:16 +0100
commitac1c5e6e18afc5043fa078803ef465bb4017c07a (patch)
tree5b82df5c32fd99f04b2ad6496474c5e4d2d84b41 /demux/ebml.c
parent3533ee3ae4b4dc2727dd499081d9edda55925749 (diff)
downloadmpv-ac1c5e6e18afc5043fa078803ef465bb4017c07a.tar.bz2
mpv-ac1c5e6e18afc5043fa078803ef465bb4017c07a.tar.xz
demux_mkv: improve robustness against broken files
Fixes test7.mkv from the Matroska test file collection, as well as some real broken files I've found in the wild. (Unfortunately, true recovery requires resetting the decoders and playback state with a manual seek, but it's still better than just exiting.) If there are broken EBML elements, try harder to skip them correctly. Do this by searching for the next cluster element. The cluster element intentionally has a long ID, so it's a suitable element for resynchronizing (mkvmerge does something similar). We know that data is corrupt if the ID or length fields of an element are malformed. Additionally, if skipping an unknown element goes past the end of the file, we assume it's corrupt and undo the seek. Do this because it often happens that corrupt data is interpreted as correct EBML elements. Since these elements will have a ridiculous values in their length fields due to the large value range that is possible (0-2^56-2), they will go past the end of the file. So instead of skipping them (which would result in playback termination), try to find the next cluster instead. (We still skip unknown elements that are within the file, as this is needed for correct operation. Also, we first execute the seek, because we don't really know where the file ends. Doing it this way is better for unseekable streams too, because it will still work in the non-error case.) This is done as special case in the packet reading function only. On the other hand, that's the only part of the file that's read after initialization is done.
Diffstat (limited to 'demux/ebml.c')
-rw-r--r--demux/ebml.c56
1 files changed, 55 insertions, 1 deletions
diff --git a/demux/ebml.c b/demux/ebml.c
index 3d1da44e58..9dbc2c22f7 100644
--- a/demux/ebml.c
+++ b/demux/ebml.c
@@ -246,7 +246,7 @@ char *ebml_read_utf8(stream_t *s, uint64_t *length)
}
/*
- * Skip the next element.
+ * Skip the current element.
*/
int ebml_read_skip(stream_t *s, uint64_t *length)
{
@@ -265,6 +265,60 @@ int ebml_read_skip(stream_t *s, uint64_t *length)
}
/*
+ * Skip to (probable) next cluster (MATROSKA_ID_CLUSTER) element start position.
+ */
+int ebml_resync_cluster(stream_t *s)
+{
+ int64_t pos = stream_tell(s);
+ uint32_t last_4_bytes = 0;
+ mp_msg(MSGT_DEMUX, MSGL_ERR, "[mkv] Corrupt file detected. "
+ "Trying to resync starting from position %"PRId64"...\n", pos);
+ while (!s->eof) {
+ // Assumes MATROSKA_ID_CLUSTER is 4 bytes, with no 0 bytes.
+ if (last_4_bytes == MATROSKA_ID_CLUSTER) {
+ mp_msg(MSGT_DEMUX, MSGL_ERR,
+ "[mkv] Cluster found at %"PRId64".\n", pos - 4);
+ stream_seek(s, pos - 4);
+ return 0;
+ }
+ last_4_bytes = (last_4_bytes << 8) | stream_read_char(s);
+ pos++;
+ }
+ return -1;
+}
+
+/*
+ * Skip the current element, or on error, call ebml_resync_cluster().
+ */
+int ebml_read_skip_or_resync_cluster(stream_t *s, uint64_t *length)
+{
+ uint64_t len;
+ int l;
+
+ len = ebml_read_length(s, &l);
+ if (len == EBML_UINT_INVALID)
+ goto resync;
+
+ if (length)
+ *length = len + l;
+
+ int64_t pos = stream_tell(s);
+ stream_skip(s, len);
+
+ // When reading corrupted elements, len will often be a random high number,
+ // and stream_skip() will set EOF.
+ if (s->eof) {
+ stream_seek(s, pos);
+ goto resync;
+ }
+
+ return 0;
+
+resync:
+ return ebml_resync_cluster(s) < 0 ? -1 : 1;
+}
+
+/*
* Read the next element, but only the header. The contents
* are supposed to be sub-elements which can be read separately.
*/