summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile4
-rwxr-xr-xTOOLS/matroska.py397
-rw-r--r--libmpdemux/demux_mkv.c40
-rw-r--r--libmpdemux/ebml.c400
-rw-r--r--libmpdemux/ebml.h191
-rw-r--r--libmpdemux/ebml_defs.c382
-rw-r--r--libmpdemux/ebml_types.h433
7 files changed, 1675 insertions, 172 deletions
diff --git a/Makefile b/Makefile
index 3e2174d07f..aefb8553fb 100644
--- a/Makefile
+++ b/Makefile
@@ -894,7 +894,9 @@ TAGS:
tags:
rm -f $@; find . -name '*.[chS]' -o -name '*.asm' | xargs ctags -a
-
+generated_ebml:
+ TOOLS/matroska.py --generate-header >libmpdemux/ebml_types.h
+ TOOLS/matroska.py --generate-definitions >libmpdemux/ebml_defs.c
###### tests / tools #######
diff --git a/TOOLS/matroska.py b/TOOLS/matroska.py
new file mode 100755
index 0000000000..8368f35185
--- /dev/null
+++ b/TOOLS/matroska.py
@@ -0,0 +1,397 @@
+#!/usr/bin/python
+"""
+Generate C definitions for parsing Matroska files.
+Can also be used to directly parse Matroska files and display their contents.
+"""
+
+#
+# This file is part of MPlayer.
+#
+# MPlayer is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# MPlayer is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with MPlayer; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+
+
+elements_ebml = (
+ 'EBML, 1a45dfa3, sub', (
+ 'EBMLVersion, 4286, uint',
+ 'EBMLReadVersion, 42f7, uint',
+ 'EBMLMaxIDLength, 42f2, uint',
+ 'EBMLMaxSizeLength, 42f3, uint',
+ 'DocType, 4282, str',
+ 'DocTypeVersion, 4287, uint',
+ 'DocTypeReadVersion, 4285, uint',
+ ),
+
+ 'CRC32, bf, binary',
+ 'Void, ec, binary',
+)
+
+elements_matroska = (
+ 'Segment, 18538067, sub', (
+
+ 'SeekHead*, 114d9b74, sub', (
+ 'Seek*, 4dbb, sub', (
+ 'SeekID, 53ab, ebml_id',
+ 'SeekPosition, 53ac, uint',
+ ),
+ ),
+
+ 'Info*, 1549a966, sub', (
+ 'SegmentUID, 73a4, binary',
+ 'PrevUID, 3cb923, binary',
+ 'NextUID, 3eb923, binary',
+ 'TimecodeScale, 2ad7b1, uint',
+ 'DateUTC, 4461, sint',
+ 'Title, 7ba9, str',
+ 'MuxingApp, 4d80, str',
+ 'WritingApp, 5741, str',
+ 'Duration, 4489, float',
+ ),
+
+ 'Cluster*, 1f43b675, sub', (
+ 'Timecode, e7, uint',
+ 'BlockGroup*, a0, sub', (
+ 'Block, a1, binary',
+ 'BlockDuration, 9b, uint',
+ 'ReferenceBlock*, fb, sint',
+ ),
+ 'SimpleBlock*, a3, binary',
+ ),
+
+ 'Tracks*, 1654ae6b, sub', (
+ 'TrackEntry*, ae, sub', (
+ 'TrackNumber, d7, uint',
+ 'TrackUID, 73c5, uint',
+ 'TrackType, 83, uint',
+ 'FlagEnabled, b9, uint',
+ 'FlagDefault, 88, uint',
+ 'FlagForced, 55aa, uint',
+ 'FlagLacing, 9c, uint',
+ 'MinCache, 6de7, uint',
+ 'DefaultDuration, 23e383, uint',
+ 'TrackTimecodeScale, 23314f, float',
+ 'MaxBlockAdditionID, 55ee, uint',
+ 'Name, 536e, str',
+ 'Language, 22b59c, str',
+ 'CodecID, 86, str',
+ 'CodecPrivate, 63a2, binary',
+ 'CodecDecodeAll, aa, uint',
+ 'Video, e0, sub', (
+ 'FlagInterlaced, 9a, uint',
+ 'PixelWidth, b0, uint',
+ 'PixelHeight, ba, uint',
+ 'DisplayWidth, 54b0, uint',
+ 'DisplayHeight, 54ba, uint',
+ 'FrameRate, 2383e3, float',
+ ),
+ 'Audio, e1, sub', (
+ 'SamplingFrequency, b5, float',
+ 'Channels, 9f, uint',
+ 'BitDepth, 6264, uint',
+ ),
+ 'ContentEncodings, 6d80, sub', (
+ 'ContentEncoding*, 6240, sub', (
+ 'ContentEncodingOrder, 5031, uint',
+ 'ContentEncodingScope, 5032, uint',
+ 'ContentEncodingType, 5033, uint',
+ 'ContentCompression, 5034, sub', (
+ 'ContentCompAlgo, 4254, uint',
+ 'ContentCompSettings, 4255, binary',
+ ),
+ ),
+ ),
+ ),
+ ),
+
+ 'Cues, 1c53bb6b, sub', (
+ 'CuePoint*, bb, sub', (
+ 'CueTime, b3, uint',
+ 'CueTrackPositions*, b7, sub', (
+ 'CueTrack, f7, uint',
+ 'CueClusterPosition, f1, uint',
+ ),
+ ),
+ ),
+
+ 'Attachments, 1941a469, sub', (
+ 'AttachedFile*, 61a7, sub', (
+ 'FileName, 466e, str',
+ 'FileMimeType, 4660, str',
+ 'FileData, 465c, binary',
+ 'FileUID, 46ae, uint',
+ ),
+ ),
+
+ 'Chapters, 1043a770, sub', (
+ 'EditionEntry*, 45b9, sub', (
+ 'EditionUID, 45bc, uint',
+ 'EditionFlagHidden, 45bd, uint',
+ 'EditionFlagDefault, 45db, uint',
+ 'EditionFlagOrdered, 45dd, uint',
+ 'ChapterAtom*, b6, sub', (
+ 'ChapterUID, 73c4, uint',
+ 'ChapterTimeStart, 91, uint',
+ 'ChapterTimeEnd, 92, uint',
+ 'ChapterFlagHidden, 98, uint',
+ 'ChapterFlagEnabled, 4598, uint',
+ 'ChapterSegmentUID, 6e67, binary',
+ 'ChapterSegmentEditionUID, 6ebc, uint',
+ 'ChapterDisplay*, 80, sub', (
+ 'ChapString, 85, str',
+ 'ChapLanguage*, 437c, str',
+ ),
+ ),
+ ),
+ ),
+ 'Tags*, 1254c367, sub', (
+ 'Tag*, 7373, sub', (
+ 'Targets, 63c0, sub', (
+ 'TargetTypeValue, 68ca, uint',
+ ),
+ ),
+ ),
+ ),
+)
+
+
+import sys
+from math import ldexp
+
+def byte2num(s):
+ return int(s.encode('hex'), 16)
+
+def camelcase_to_words(name):
+ parts = []
+ start = 0
+ for i in range(1, len(name)):
+ if name[i].isupper() and (name[i-1].islower() or
+ name[i+1:i+2].islower()):
+ parts.append(name[start:i])
+ start = i
+ parts.append(name[start:])
+ return '_'.join(parts).lower()
+
+class MatroskaElement(object):
+
+ def __init__(self, name, elid, valtype, namespace):
+ self.name = name
+ self.definename = '%s_ID_%s' % (namespace, name.upper())
+ self.fieldname = camelcase_to_words(name)
+ self.structname = 'ebml_' + self.fieldname
+ self.elid = elid
+ self.valtype = valtype
+ if valtype == 'sub':
+ self.ebmltype = 'EBML_TYPE_SUBELEMENTS'
+ self.valname = 'struct %s' % self.structname
+ else:
+ self.ebmltype = 'EBML_TYPE_' + valtype.upper()
+ try:
+ self.valname = {'uint': 'uint64_t', 'str': 'struct bstr',
+ 'binary': 'struct bstr', 'ebml_id': 'uint32_t',
+ 'float': 'double', 'sint': 'int64_t',
+ }[valtype]
+ except KeyError:
+ raise SyntaxError('Unrecognized value type ' + valtype)
+ self.subelements = ()
+
+ def add_subelements(self, subelements):
+ self.subelements = subelements
+ self.subids = set(x[0].elid for x in subelements)
+
+elementd = {}
+elementlist = []
+def parse_elems(l, namespace):
+ subelements = []
+ for el in l:
+ if isinstance(el, str):
+ name, hexid, eltype = [x.strip() for x in el.split(',')]
+ multiple = name.endswith('*')
+ name = name.strip('*')
+ new = MatroskaElement(name, hexid, eltype, namespace)
+ elementd[hexid] = new
+ elementlist.append(new)
+ subelements.append((new, multiple))
+ else:
+ new.add_subelements(parse_elems(el, namespace))
+ return subelements
+
+parse_elems(elements_ebml, 'EBML')
+parse_elems(elements_matroska, 'MATROSKA')
+
+def generate_C_header():
+ print('// Generated by TOOLS/matroska.py, do not edit manually')
+ print
+
+ for el in elementlist:
+ print('#define %-40s 0x%s' % (el.definename, el.elid))
+
+ print
+
+ for el in reversed(elementlist):
+ if not el.subelements:
+ continue
+ print
+ print('struct %s {' % el.structname)
+ l = max(len(subel.valname) for subel, multiple in el.subelements)+1
+ for subel, multiple in el.subelements:
+ print(' %-*s %s%s;' % (l, subel.valname, (' ', '*')[multiple],
+ subel.fieldname))
+ print
+ for subel, multiple in el.subelements:
+ print(' int n_%s;' % (subel.fieldname))
+ print('};')
+
+ for el in elementlist:
+ if not el.subelements:
+ continue
+ print('extern const struct ebml_elem_desc %s_desc;' % el.structname)
+
+ print
+ print('#define MAX_EBML_SUBELEMENTS %d' % max(len(el.subelements)
+ for el in elementlist))
+
+
+
+def generate_C_definitions():
+ print('// Generated by TOOLS/matroska.py, do not edit manually')
+ print
+ for el in reversed(elementlist):
+ print
+ if el.subelements:
+ print('#define N %s' % el.fieldname)
+ print('E_S("%s", %d)' % (el.name, len(el.subelements)))
+ for subel, multiple in el.subelements:
+ print('F(%s, %s, %d)' % (subel.definename, subel.fieldname,
+ multiple))
+ print('}};')
+ print('#undef N')
+ else:
+ print('E("%s", %s, %s)' % (el.name, el.fieldname, el.ebmltype))
+
+def read(s, length):
+ t = s.read(length)
+ if len(t) != length:
+ raise IOError
+ return t
+
+def read_id(s):
+ t = read(s, 1)
+ i = 0
+ mask = 128
+ if ord(t) == 0:
+ raise SyntaxError
+ while not ord(t) & mask:
+ i += 1
+ mask >>= 1
+ t += read(s, i)
+ return t
+
+def read_vint(s):
+ t = read(s, 1)
+ i = 0
+ mask = 128
+ if ord(t) == 0:
+ raise SyntaxError
+ while not ord(t) & mask:
+ i += 1
+ mask >>= 1
+ t = chr(ord(t) & (mask - 1))
+ t += read(s, i)
+ return i+1, byte2num(t)
+
+def read_str(s, length):
+ return read(s, length)
+
+def read_uint(s, length):
+ t = read(s, length)
+ return byte2num(t)
+
+def read_sint(s, length):
+ i = read_uint(s, length)
+ mask = 1 << (length * 8 - 1)
+ if i & mask:
+ i -= 2 * mask
+ return i
+
+def read_float(s, length):
+ t = read(s, length)
+ i = byte2num(t)
+ if length == 4:
+ f = ldexp((i & 0x7fffff) + (1 << 23), (i >> 23 & 0xff) - 150)
+ if i & (1 << 31):
+ f = -f
+ return f
+ raise SyntaxError
+
+def parse_one(s, depth, parent, maxlen):
+ elid = read_id(s).encode('hex')
+ elem = elementd.get(elid)
+ if parent is not None and elid not in parent.subids and elid not in ('ec', 'bf'):
+ print('Unexpected:', elid)
+ if 1:
+ raise NotImplementedError
+ size, length = read_vint(s)
+ this_length = len(elid) / 2 + size + length
+ if elem is not None:
+ if elem.valtype != 'skip':
+ print depth, elid, elem.name, 'size:', length, 'value:',
+ if elem.valtype == 'sub':
+ print('subelements:')
+ while length > 0:
+ length -= parse_one(s, depth + 1, elem, length)
+ if length < 0:
+ raise SyntaxError
+ elif elem.valtype == 'str':
+ print 'string', repr(read_str(s, length))
+ elif elem.valtype in ('binary', 'ebml_id'):
+ t = read_str(s, length)
+ dec = ''
+ if elem.valtype == 'ebml_id':
+ idelem = elementd.get(t.encode('hex'))
+ if idelem is None:
+ dec = '(UNKNOWN)'
+ else:
+ dec = '(%s)' % idelem.name
+ if len(t) < 20:
+ t = t.encode('hex')
+ else:
+ t = '<skipped %d bytes>' % len(t)
+ print 'binary', t, dec
+ elif elem.valtype == 'uint':
+ print 'uint', read_uint(s, length)
+ elif elem.valtype == 'sint':
+ print 'sint', read_sint(s, length)
+ elif elem.valtype == 'float':
+ print 'float', read_float(s, length)
+ elif elem.valtype == 'skip':
+ read(s, length)
+ else:
+ raise NotImplementedError
+ else:
+ print(depth, 'Unknown element:', elid, 'size:', length)
+ read(s, length)
+ return this_length
+
+def parse_toplevel(s):
+ parse_one(s, 0, None, 1 << 63)
+
+if sys.argv[1] == '--generate-header':
+ generate_C_header()
+elif sys.argv[1] == '--generate-definitions':
+ generate_C_definitions()
+else:
+ s = open(sys.argv[1])
+ while 1:
+ parse_toplevel(s)
diff --git a/libmpdemux/demux_mkv.c b/libmpdemux/demux_mkv.c
index b4a17d036f..99e3302c1d 100644
--- a/libmpdemux/demux_mkv.c
+++ b/libmpdemux/demux_mkv.c
@@ -607,7 +607,7 @@ static int demux_mkv_read_trackaudio(demuxer_t *demuxer, mkv_track_t *track)
len += il;
while (length > 0) {
switch (ebml_read_id(s, &il)) {
- case MATROSKA_ID_AUDIOSAMPLINGFREQ:
+ case MATROSKA_ID_SAMPLINGFREQUENCY:
fnum = ebml_read_float(s, &l);
if (fnum == EBML_FLOAT_INVALID)
return 0;
@@ -616,7 +616,7 @@ static int demux_mkv_read_trackaudio(demuxer_t *demuxer, mkv_track_t *track)
"[mkv] | + Sampling frequency: %f\n", track->a_sfreq);
break;
- case MATROSKA_ID_AUDIOBITDEPTH:
+ case MATROSKA_ID_BITDEPTH:
num = ebml_read_uint(s, &l);
if (num == EBML_UINT_INVALID)
return 0;
@@ -625,7 +625,7 @@ static int demux_mkv_read_trackaudio(demuxer_t *demuxer, mkv_track_t *track)
track->a_bps);
break;
- case MATROSKA_ID_AUDIOCHANNELS:
+ case MATROSKA_ID_CHANNELS:
num = ebml_read_uint(s, &l);
if (num == EBML_UINT_INVALID)
return 0;
@@ -655,7 +655,7 @@ static int demux_mkv_read_trackvideo(demuxer_t *demuxer, mkv_track_t *track)
len += il;
while (length > 0) {
switch (ebml_read_id(s, &il)) {
- case MATROSKA_ID_VIDEOFRAMERATE:
+ case MATROSKA_ID_FRAMERATE:
fnum = ebml_read_float(s, &l);
if (fnum == EBML_FLOAT_INVALID)
return 0;
@@ -666,7 +666,7 @@ static int demux_mkv_read_trackvideo(demuxer_t *demuxer, mkv_track_t *track)
track->default_duration = 1 / track->v_frate;
break;
- case MATROSKA_ID_VIDEODISPLAYWIDTH:
+ case MATROSKA_ID_DISPLAYWIDTH:
num = ebml_read_uint(s, &l);
if (num == EBML_UINT_INVALID)
return 0;
@@ -675,7 +675,7 @@ static int demux_mkv_read_trackvideo(demuxer_t *demuxer, mkv_track_t *track)
track->v_dwidth);
break;
- case MATROSKA_ID_VIDEODISPLAYHEIGHT:
+ case MATROSKA_ID_DISPLAYHEIGHT:
num = ebml_read_uint(s, &l);
if (num == EBML_UINT_INVALID)
return 0;
@@ -684,7 +684,7 @@ static int demux_mkv_read_trackvideo(demuxer_t *demuxer, mkv_track_t *track)
track->v_dheight);
break;
- case MATROSKA_ID_VIDEOPIXELWIDTH:
+ case MATROSKA_ID_PIXELWIDTH:
num = ebml_read_uint(s, &l);
if (num == EBML_UINT_INVALID)
return 0;
@@ -693,7 +693,7 @@ static int demux_mkv_read_trackvideo(demuxer_t *demuxer, mkv_track_t *track)
track->v_width);
break;
- case MATROSKA_ID_VIDEOPIXELHEIGHT:
+ case MATROSKA_ID_PIXELHEIGHT:
num = ebml_read_uint(s, &l);
if (num == EBML_UINT_INVALID)
return 0;
@@ -755,7 +755,7 @@ static int demux_mkv_read_trackentry(demuxer_t *demuxer)
track->tnum);
break;
- case MATROSKA_ID_TRACKNAME:
+ case MATROSKA_ID_NAME:
track->name = ebml_read_utf8(s, &l);
if (track->name == NULL)
goto err_out;
@@ -785,14 +785,14 @@ static int demux_mkv_read_trackentry(demuxer_t *demuxer)
}
break;
- case MATROSKA_ID_TRACKAUDIO:
+ case MATROSKA_ID_AUDIO:
mp_msg(MSGT_DEMUX, MSGL_V, "[mkv] | + Audio track\n");
l = demux_mkv_read_trackaudio(demuxer, track);
if (l == 0)
goto err_out;
break;
- case MATROSKA_ID_TRACKVIDEO:
+ case MATROSKA_ID_VIDEO:
mp_msg(MSGT_DEMUX, MSGL_V, "[mkv] | + Video track\n");
l = demux_mkv_read_trackvideo(demuxer, track);
if (l == 0)
@@ -838,7 +838,7 @@ static int demux_mkv_read_trackentry(demuxer_t *demuxer)
track->private_size);
break;
- case MATROSKA_ID_TRACKLANGUAGE:
+ case MATROSKA_ID_LANGUAGE:
free(track->language);
track->language = ebml_read_utf8(s, &l);
if (track->language == NULL)
@@ -847,7 +847,7 @@ static int demux_mkv_read_trackentry(demuxer_t *demuxer)
track->language);
break;
- case MATROSKA_ID_TRACKFLAGDEFAULT:
+ case MATROSKA_ID_FLAGDEFAULT:
num = ebml_read_uint(s, &l);
if (num == EBML_UINT_INVALID)
goto err_out;
@@ -856,7 +856,7 @@ static int demux_mkv_read_trackentry(demuxer_t *demuxer)
track->default_track);
break;
- case MATROSKA_ID_TRACKDEFAULTDURATION:
+ case MATROSKA_ID_DEFAULTDURATION:
num = ebml_read_uint(s, &l);
if (num == EBML_UINT_INVALID)
goto err_out;
@@ -873,7 +873,7 @@ static int demux_mkv_read_trackentry(demuxer_t *demuxer)
}
break;
- case MATROSKA_ID_TRACKENCODINGS:
+ case MATROSKA_ID_CONTENTENCODINGS:
l = demux_mkv_read_trackencodings(demuxer, track);
if (l == 0)
goto err_out;
@@ -955,7 +955,7 @@ static int demux_mkv_read_cues(demuxer_t *demuxer)
time = track = pos = EBML_UINT_INVALID;
switch (ebml_read_id(s, &il)) {
- case MATROSKA_ID_POINTENTRY:;
+ case MATROSKA_ID_CUEPOINT:;
uint64_t len;
len = ebml_read_length(s, &i);
@@ -970,7 +970,7 @@ static int demux_mkv_read_cues(demuxer_t *demuxer)
time = ebml_read_uint(s, &l);
break;
- case MATROSKA_ID_CUETRACKPOSITION:;
+ case MATROSKA_ID_CUETRACKPOSITIONS:;
uint64_t le = ebml_read_length(s, &i);
l = le + i;
@@ -1386,7 +1386,7 @@ static int demux_mkv_read_seekhead(demuxer_t *demuxer)
seek_pos = EBML_UINT_INVALID;
switch (ebml_read_id(s, &il)) {
- case MATROSKA_ID_SEEKENTRY:;
+ case MATROSKA_ID_SEEK:;
uint64_t len = ebml_read_length(s, &i);
l = len + i;
@@ -2747,7 +2747,7 @@ static int demux_mkv_fill_buffer(demuxer_t *demuxer, demux_stream_t *ds)
if (mkv_d->cluster_size > 0) {
switch (ebml_read_id(s, &il)) {
- case MATROSKA_ID_CLUSTERTIMECODE:;
+ case MATROSKA_ID_TIMECODE:;
uint64_t num = ebml_read_uint(s, &l);
if (num == EBML_UINT_INVALID)
return 0;
@@ -2858,7 +2858,7 @@ static void demux_mkv_seek(demuxer_t *demuxer, float rel_seek_secs,
if (type == MATROSKA_ID_CLUSTER) {
while (!s->eof && stream_tell(s) < end) {
if (ebml_read_id(s, NULL)
- == MATROSKA_ID_CLUSTERTIMECODE) {
+ == MATROSKA_ID_TIMECODE) {
uint64_t tc = ebml_read_uint(s, NULL);
tc *= mkv_d->tc_scale;
add_cluster_position(mkv_d, start, tc);
diff --git a/libmpdemux/ebml.c b/libmpdemux/ebml.c
index 30528e15cb..37e2fe0ffb 100644
--- a/libmpdemux/ebml.c
+++ b/libmpdemux/ebml.c
@@ -1,5 +1,6 @@
/*
* native ebml reader for the Matroska demuxer
+ * new parser copyright (c) 2010 Uoti Urpala
* copyright (c) 2004 Aurelien Jacobs <aurel@gnuage.org>
* based on the one written by Ronald Bultje for gstreamer
*
@@ -23,13 +24,18 @@
#include "config.h"
#include <stdlib.h>
-
-#include "stream/stream.h"
+#include <stdbool.h>
+#include <inttypes.h>
+#include <stddef.h>
+#include <assert.h>
+
+#include <libavutil/intfloat_readwrite.h>
+#include <libavutil/common.h>
+#include "talloc.h"
#include "ebml.h"
-#include "libavutil/common.h"
+#include "stream/stream.h"
#include "mpbswap.h"
-#include "libavutil/intfloat_readwrite.h"
-
+#include "mp_msg.h"
#ifndef SIZE_MAX
#define SIZE_MAX ((size_t)-1)
@@ -288,7 +294,7 @@ char *ebml_read_header(stream_t *s, int *version)
uint32_t id;
char *str = NULL;
- if (ebml_read_master(s, &length) != EBML_ID_HEADER)
+ if (ebml_read_master(s, &length) != EBML_ID_EBML)
return 0;
if (version)
@@ -350,3 +356,385 @@ char *ebml_read_header(stream_t *s, int *version)
return str;
}
+
+
+
+#define EVALARGS(F, ...) F(__VA_ARGS__)
+#define E(str, N, type) const struct ebml_elem_desc ebml_ ## N ## _desc = { str, type };
+#define E_SN(str, count, N) const struct ebml_elem_desc ebml_ ## N ## _desc = { str, EBML_TYPE_SUBELEMENTS, sizeof(struct ebml_ ## N), count, (const struct ebml_field_desc[]){
+#define E_S(str, count) EVALARGS(E_SN, str, count, N)
+#define FN(id, name, multiple, N) { id, multiple, offsetof(struct ebml_ ## N, name), offsetof(struct ebml_ ## N, n_ ## name), &ebml_##name##_desc},
+#define F(id, name, multiple) EVALARGS(FN, id, name, multiple, N)
+#include "ebml_defs.c"
+#undef EVALARGS
+#undef SN
+#undef S
+#undef FN
+#undef F
+
+// Used to read/write pointers to different struct types
+struct generic;
+#define generic_struct struct generic
+
+static uint32_t ebml_parse_id(uint8_t *data, int *length)
+{
+ int len = 1;
+ uint32_t id = *data++;
+ for (int len_mask = 0x80; !(id & len_mask); len_mask >>= 1) {
+ len++;
+ if (len > 4) {
+ *length = -1;
+ return EBML_ID_INVALID;
+ }
+ }
+ *length = len;
+ while (--len)
+ id = (id << 8) | *data++;
+ return id;
+}
+
+static uint64_t parse_vlen(uint8_t *data, int *length, bool is_length)
+{
+ uint64_t r = *data++;
+ int len = 1;
+ int len_mask;
+ for (len_mask = 0x80; !(r & len_mask); len_mask >>= 1) {
+ len++;
+ if (len > 8) {
+ *length = -1;
+ return -1;
+ }
+ }
+ r &= len_mask - 1;
+
+ int num_allones = 0;
+ if (r == len_mask - 1)
+ num_allones++;
+ for (int i = 1; i < len; i++) {
+ if (*data == 255)
+ num_allones++;
+ r = (r << 8) | *data++;
+ }
+ if (is_length && num_allones == len) {
+ // According to Matroska specs this means "unknown length"
+ // Could be supported if there are any actual files using it
+ *length = -1;
+ return -1;
+ }
+ *length = len;
+ return r;
+}
+
+static uint64_t ebml_parse_length(uint8_t *data, int *length)
+{
+ return parse_vlen(data, length, true);
+}
+
+static uint64_t ebml_parse_uint(uint8_t *data, int length)
+{
+ assert(length >= 1 && length <= 8);
+ uint64_t r = 0;
+ while (length--)
+ r = (r << 8) + *data++;
+ return r;
+}
+
+static int64_t ebml_parse_sint(uint8_t *data, int length)
+{
+ assert(length >=1 && length <= 8);
+ int64_t r = 0;
+ if (*data & 0x80)
+ r = -1;
+ while (length--)
+ r = (r << 8) | *data++;
+ return r;
+}
+
+static double ebml_parse_float(uint8_t *data, int length)
+{
+ assert(length == 4 || length == 8);
+ uint64_t i = ebml_parse_uint(data, length);
+ if (length == 4)
+ return av_int2flt(i);
+ else
+ return av_int2dbl(i);
+}
+
+
+// target must be initialized to zero
+static void ebml_parse_element(struct ebml_parse_ctx *ctx, void *target,
+ uint8_t *data, int size,
+ const struct ebml_elem_desc *type, int level)
+{
+ assert(type->type == EBML_TYPE_SUBELEMENTS);
+ assert(level < 8);
+ mp_msg(MSGT_DEMUX, MSGL_DBG2, "%.*s[mkv] Parsing element %s\n",
+ level, " ", type->name);
+
+ char *s = target;
+ int len;
+ uint8_t *end = data + size;
+ uint8_t *p = data;
+ int num_elems[MAX_EBML_SUBELEMENTS] = {};
+ while (p < end) {
+ uint8_t *startp = p;
+ uint32_t id = ebml_parse_id(p, &len);
+ if (len > end - p)
+ goto past_end_error;
+ if (len < 0) {
+ mp_msg(MSGT_DEMUX, MSGL_DBG2, "[mkv] Error parsing subelement "
+ "id\n");
+ goto other_error;
+ }
+ p += len;
+ uint64_t length = ebml_parse_length(p, &len);
+ if (len > end - p)
+ goto past_end_error;
+ if (len < 0) {
+ mp_msg(MSGT_DEMUX, MSGL_DBG2, "[mkv] Error parsing subelement "
+ "length\n");
+ goto other_error;
+ }
+ p += len;
+
+ int field_idx = -1;
+ for (int i = 0; i < type->field_count; i++)
+ if (type->fields[i].id == id) {
+ field_idx = i;
+ num_elems[i]++;
+ break;
+ }
+
+ if (length > end - p) {
+ if (field_idx >= 0 && type->fields[field_idx].desc->type
+ != EBML_TYPE_SUBELEMENTS) {
+ mp_msg(MSGT_DEMUX, MSGL_DBG2, "[mkv] Subelement content goes "
+ "past end of containing element\n");
+ goto other_error;
+ }
+ // Try to parse what is possible from inside this partial element
+ ctx->has_errors = true;
+ length = end - p;
+ }
+ p += length;
+
+ continue;
+
+ past_end_error:
+ mp_msg(MSGT_DEMUX, MSGL_DBG2, "[mkv] Subelement headers go "
+ "past end of containing element\n");
+ other_error:
+ ctx->has_errors = true;
+ end = startp;
+ break;
+ }
+
+ for (int i = 0; i < type->field_count; i++)
+ if (num_elems[i] && type->fields[i].multiple) {
+ char *ptr = s + type->fields[i].offset;
+ switch (type->fields[i].desc->type) {
+ case EBML_TYPE_SUBELEMENTS:
+ num_elems[i] = FFMIN(num_elems[i],
+ 1000000000 / type->fields[i].desc->size);
+ int size = num_elems[i] * type->fields[i].desc->size;
+ *(generic_struct **) ptr = talloc_zero_size(ctx->talloc_ctx,
+ size);
+ break;
+ case EBML_TYPE_UINT:
+ *(uint64_t **) ptr = talloc_zero_array(ctx->talloc_ctx,
+ uint64_t, num_elems[i]);
+ break;
+ case EBML_TYPE_SINT:
+ *(int64_t **) ptr = talloc_zero_array(ctx->talloc_ctx,
+ int64_t, num_elems[i]);
+ break;
+ case EBML_TYPE_FLOAT:
+ *(double **) ptr = talloc_zero_array(ctx->talloc_ctx,
+ double, num_elems[i]);
+ break;
+ case EBML_TYPE_STR:
+ case EBML_TYPE_BINARY:
+ *(struct bstr **) ptr = talloc_zero_array(ctx->talloc_ctx,
+ struct bstr,
+ num_elems[i]);
+ break;
+ case EBML_TYPE_EBML_ID:
+ *(int32_t **) ptr = talloc_zero_array(ctx->talloc_ctx,
+ uint32_t, num_elems[i]);
+ break;
+ default:
+ abort();
+ }
+ }
+
+ while (data < end) {
+ int len;
+ uint32_t id = ebml_parse_id(data, &len);
+ assert(len >= 0 && len <= end - data);
+ data += len;
+ uint64_t length = ebml_parse_length(data, &len);
+ assert(len >= 0 && len <= end - data);
+ data += len;
+ if (length > end - data) {
+ // Try to parse what is possible from inside this partial element
+ length = end - data;
+ mp_msg(MSGT_DEMUX, MSGL_DBG2, "[mkv] Next subelement content goes "
+ "past end of containing element, will be truncated\n");
+ }
+ int field_idx = -1;
+ for (int i = 0; i < type->field_count; i++)
+ if (type->fields[i].id == id) {
+ field_idx = i;
+ break;
+ }
+ if (field_idx < 0) {
+ if (id == 0xec)
+ mp_msg(MSGT_DEMUX, MSGL_DBG2, "%.*s[mkv] Ignoring Void element "
+ "size: %"PRIu64"\n", level+1, " ", length);
+ else if (id == 0xbf)
+ mp_msg(MSGT_DEMUX, MSGL_DBG2, "%.*s[mkv] Ignoring CRC-32 "
+ "element size: %"PRIu64"\n", level+1, " ",
+ length);
+ else
+ mp_msg(MSGT_DEMUX, MSGL_DBG2, "[mkv] Ignoring unrecognized "
+ "subelement. ID: %x size: %"PRIu64"\n", id, length);
+ data += length;
+ continue;
+ }
+ const struct ebml_field_desc *fd = &type->fields[field_idx];
+ const struct ebml_elem_desc *ed = fd->desc;
+ bool multiple = fd->multiple;
+ int *countptr = (int *) (s + fd->count_offset);
+ if (*countptr >= num_elems[field_idx]) {
+ // Shouldn't happen with on any sane file without bugs
+ mp_msg(MSGT_DEMUX, MSGL_ERR, "[mkv] Too many subelems?\n");
+ ctx->has_errors = true;
+ data += length;
+ continue;
+ }
+ if (*countptr > 0 && !multiple) {
+ mp_msg(MSGT_DEMUX, MSGL_DBG2, "[mkv] Another subelement of type "
+ "%x %s (size: %"PRIu64"). Only one allowed. Ignoring.\n",
+ id, ed->name, length);
+ ctx->has_errors = true;
+ data += length;
+ continue;
+ }
+ mp_msg(MSGT_DEMUX, MSGL_DBG2, "%.*s[mkv] Parsing %x %s size: %"PRIu64
+ " value: ", level+1, " ", id, ed->name, length);
+
+ char *fieldptr = s + fd->offset;
+ switch (ed->type) {
+ case EBML_TYPE_SUBELEMENTS:
+ mp_msg(MSGT_DEMUX, MSGL_DBG2, "subelements\n");
+ char *subelptr;
+ if (multiple) {
+ char *array_start = (char *) *(generic_struct **) fieldptr;
+ subelptr = array_start + *countptr * ed->size;
+ } else
+ subelptr = fieldptr;
+ ebml_parse_element(ctx, subelptr, data, length, ed, level + 1);
+ break;
+
+ case EBML_TYPE_UINT:;
+ uint64_t *uintptr;
+#define GETPTR(subelptr, fieldtype) \
+ if (multiple) \
+ subelptr = *(fieldtype **) fieldptr + *countptr; \
+ else \
+ subelptr = (fieldtype *) fieldptr
+ GETPTR(uintptr, uint64_t);
+ if (length < 1 || length > 8) {
+ mp_msg(MSGT_DEMUX, MSGL_DBG2, "uint invalid length %"PRIu64
+ "\n", length);
+ goto error;
+ }
+ *uintptr = ebml_parse_uint(data, length);
+ mp_msg(MSGT_DEMUX, MSGL_DBG2, "uint %"PRIu64"\n", *uintptr);
+ break;
+
+ case EBML_TYPE_SINT:;
+ int64_t *sintptr;
+ GETPTR(sintptr, int64_t);
+ if (length < 1 || length > 8) {
+ mp_msg(MSGT_DEMUX, MSGL_DBG2, "sint invalid length %"PRIu64
+ "\n", length);
+ goto error;
+ }
+ *sintptr = ebml_parse_sint(data, length);
+ mp_msg(MSGT_DEMUX, MSGL_DBG2, "sint %"PRId64"\n", *sintptr);
+ break;
+
+ case EBML_TYPE_FLOAT:;
+ double *floatptr;
+ GETPTR(floatptr, double);
+ if (length != 4 && length != 8) {
+ mp_msg(MSGT_DEMUX, MSGL_DBG2, "float invalid length %"PRIu64
+ "\n", length);
+ goto error;
+ }
+ *floatptr = ebml_parse_float(data, length);
+ mp_msg(MSGT_DEMUX, MSGL_DBG2, "float %f\n", *floatptr);
+ break;
+
+ case EBML_TYPE_STR:
+ case EBML_TYPE_BINARY:;
+ struct bstr *strptr;