diff options
Diffstat (limited to 'libmpdemux/ebml.c')
-rw-r--r-- | libmpdemux/ebml.c | 670 |
1 files changed, 0 insertions, 670 deletions
diff --git a/libmpdemux/ebml.c b/libmpdemux/ebml.c deleted file mode 100644 index fba8177805..0000000000 --- a/libmpdemux/ebml.c +++ /dev/null @@ -1,670 +0,0 @@ -/* - * native ebml reader for the Matroska demuxer - * new parser copyright (c) 2010 Uoti Urpala - * copyright (c) 2004 Aurelien Jacobs <aurel@gnuage.org> - * based on the one written by Ronald Bultje for gstreamer - * - * This file is part of MPlayer. - * - * MPlayer is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * MPlayer is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with MPlayer; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#include "config.h" - -#include <stdlib.h> -#include <stdbool.h> -#include <inttypes.h> -#include <stddef.h> -#include <assert.h> - -#include <libavutil/intfloat.h> -#include <libavutil/common.h> -#include "talloc.h" -#include "ebml.h" -#include "stream/stream.h" -#include "mpbswap.h" -#include "mp_msg.h" - -#ifndef SIZE_MAX -#define SIZE_MAX ((size_t)-1) -#endif - -/* - * Read: the element content data ID. - * Return: the ID. - */ -uint32_t ebml_read_id(stream_t *s, int *length) -{ - int i, len_mask = 0x80; - uint32_t id; - - for (i = 0, id = stream_read_char(s); i < 4 && !(id & len_mask); i++) - len_mask >>= 1; - if (i >= 4) - return EBML_ID_INVALID; - if (length) - *length = i + 1; - while (i--) - id = (id << 8) | stream_read_char(s); - return id; -} - -/* - * Read a variable length unsigned int. - */ -uint64_t ebml_read_vlen_uint(uint8_t *buffer, int *length) -{ - int i, j, num_ffs = 0, len_mask = 0x80; - uint64_t num; - - for (i = 0, num = *buffer++; i < 8 && !(num & len_mask); i++) - len_mask >>= 1; - if (i >= 8) - return EBML_UINT_INVALID; - j = i + 1; - if (length) - *length = j; - if ((int) (num &= (len_mask - 1)) == len_mask - 1) - num_ffs++; - while (i--) { - num = (num << 8) | *buffer++; - if ((num & 0xFF) == 0xFF) - num_ffs++; - } - if (j == num_ffs) - return EBML_UINT_INVALID; - return num; -} - -/* - * Read a variable length signed int. - */ -int64_t ebml_read_vlen_int(uint8_t *buffer, int *length) -{ - uint64_t unum; - int l; - - /* read as unsigned number first */ - unum = ebml_read_vlen_uint(buffer, &l); - if (unum == EBML_UINT_INVALID) - return EBML_INT_INVALID; - if (length) - *length = l; - - return unum - ((1 << ((7 * l) - 1)) - 1); -} - -/* - * Read: element content length. - */ -uint64_t ebml_read_length(stream_t *s, int *length) -{ - int i, j, num_ffs = 0, len_mask = 0x80; - uint64_t len; - - for (i = 0, len = stream_read_char(s); i < 8 && !(len & len_mask); i++) - len_mask >>= 1; - if (i >= 8) - return EBML_UINT_INVALID; - j = i + 1; - if (length) - *length = j; - if ((int) (len &= (len_mask - 1)) == len_mask - 1) - num_ffs++; - while (i--) { - len = (len << 8) | stream_read_char(s); - if ((len & 0xFF) == 0xFF) - num_ffs++; - } - if (j == num_ffs) - return EBML_UINT_INVALID; - if (len >= 1ULL<<63) // Can happen if stream_read_char returns EOF - return EBML_UINT_INVALID; - return len; -} - -/* - * Read the next element as an unsigned int. - */ -uint64_t ebml_read_uint(stream_t *s, uint64_t *length) -{ - uint64_t len, value = 0; - int l; - - len = ebml_read_length(s, &l); - if (len == EBML_UINT_INVALID || len < 1 || len > 8) - return EBML_UINT_INVALID; - if (length) - *length = len + l; - - while (len--) - value = (value << 8) | stream_read_char(s); - - return value; -} - -/* - * Read the next element as a signed int. - */ -int64_t ebml_read_int(stream_t *s, uint64_t *length) -{ - int64_t value = 0; - uint64_t len; - int l; - - len = ebml_read_length(s, &l); - if (len == EBML_UINT_INVALID || len < 1 || len > 8) - return EBML_INT_INVALID; - if (length) - *length = len + l; - - len--; - l = stream_read_char(s); - if (l & 0x80) - value = -1; - value = (value << 8) | l; - while (len--) - value = (value << 8) | stream_read_char(s); - - return value; -} - -/* - * Read the next element as a float. - */ -double ebml_read_float(stream_t *s, uint64_t *length) -{ - double value; - uint64_t len; - int l; - - len = ebml_read_length(s, &l); - switch (len) { - case 4: - value = av_int2float(stream_read_dword(s)); - break; - - case 8: - value = av_int2double(stream_read_qword(s)); - break; - - default: - return EBML_FLOAT_INVALID; - } - - if (length) - *length = len + l; - - return value; -} - -/* - * Read the next element as an ASCII string. - */ -char *ebml_read_ascii(stream_t *s, uint64_t *length) -{ - uint64_t len; - char *str; - int l; - - len = ebml_read_length(s, &l); - if (len == EBML_UINT_INVALID) - return NULL; - if (len > SIZE_MAX - 1) - return NULL; - if (length) - *length = len + l; - - str = malloc(len + 1); - if (stream_read(s, str, len) != (int) len) { - free(str); - return NULL; - } - str[len] = '\0'; - - return str; -} - -/* - * Read the next element as a UTF-8 string. - */ -char *ebml_read_utf8(stream_t *s, uint64_t *length) -{ - return ebml_read_ascii(s, length); -} - -/* - * Skip the next element. - */ -int ebml_read_skip(stream_t *s, uint64_t *length) -{ - uint64_t len; - int l; - - len = ebml_read_length(s, &l); - if (len == EBML_UINT_INVALID) - return 1; - if (length) - *length = len + l; - - stream_skip(s, len); - - return 0; -} - -/* - * Read the next element, but only the header. The contents - * are supposed to be sub-elements which can be read separately. - */ -uint32_t ebml_read_master(stream_t *s, uint64_t *length) -{ - uint64_t len; - uint32_t id; - - id = ebml_read_id(s, NULL); - if (id == EBML_ID_INVALID) - return id; - - len = ebml_read_length(s, NULL); - if (len == EBML_UINT_INVALID) - return EBML_ID_INVALID; - if (length) - *length = len; - - return id; -} - - - -#define EVALARGS(F, ...) F(__VA_ARGS__) -#define E(str, N, type) const struct ebml_elem_desc ebml_ ## N ## _desc = { str, type }; -#define E_SN(str, count, N) const struct ebml_elem_desc ebml_ ## N ## _desc = { str, EBML_TYPE_SUBELEMENTS, sizeof(struct ebml_ ## N), count, (const struct ebml_field_desc[]){ -#define E_S(str, count) EVALARGS(E_SN, str, count, N) -#define FN(id, name, multiple, N) { id, multiple, offsetof(struct ebml_ ## N, name), offsetof(struct ebml_ ## N, n_ ## name), &ebml_##name##_desc}, -#define F(id, name, multiple) EVALARGS(FN, id, name, multiple, N) -#include "ebml_defs.c" -#undef EVALARGS -#undef SN -#undef S -#undef FN -#undef F - -// Used to read/write pointers to different struct types -struct generic; -#define generic_struct struct generic - -static uint32_t ebml_parse_id(uint8_t *data, int *length) -{ - int len = 1; - uint32_t id = *data++; - for (int len_mask = 0x80; !(id & len_mask); len_mask >>= 1) { - len++; - if (len > 4) { - *length = -1; - return EBML_ID_INVALID; - } - } - *length = len; - while (--len) - id = (id << 8) | *data++; - return id; -} - -static uint64_t parse_vlen(uint8_t *data, int *length, bool is_length) -{ - uint64_t r = *data++; - int len = 1; - int len_mask; - for (len_mask = 0x80; !(r & len_mask); len_mask >>= 1) { - len++; - if (len > 8) { - *length = -1; - return -1; - } - } - r &= len_mask - 1; - - int num_allones = 0; - if (r == len_mask - 1) - num_allones++; - for (int i = 1; i < len; i++) { - if (*data == 255) - num_allones++; - r = (r << 8) | *data++; - } - if (is_length && num_allones == len) { - // According to Matroska specs this means "unknown length" - // Could be supported if there are any actual files using it - *length = -1; - return -1; - } - *length = len; - return r; -} - -static uint64_t ebml_parse_length(uint8_t *data, int *length) -{ - return parse_vlen(data, length, true); -} - -static uint64_t ebml_parse_uint(uint8_t *data, int length) -{ - assert(length >= 1 && length <= 8); - uint64_t r = 0; - while (length--) - r = (r << 8) + *data++; - return r; -} - -static int64_t ebml_parse_sint(uint8_t *data, int length) -{ - assert(length >=1 && length <= 8); - int64_t r = 0; - if (*data & 0x80) - r = -1; - while (length--) - r = (r << 8) | *data++; - return r; -} - -static double ebml_parse_float(uint8_t *data, int length) -{ - assert(length == 4 || length == 8); - uint64_t i = ebml_parse_uint(data, length); - if (length == 4) - return av_int2float(i); - else - return av_int2double(i); -} - - -// target must be initialized to zero -static void ebml_parse_element(struct ebml_parse_ctx *ctx, void *target, - uint8_t *data, int size, - const struct ebml_elem_desc *type, int level) -{ - assert(type->type == EBML_TYPE_SUBELEMENTS); - assert(level < 8); - mp_msg(MSGT_DEMUX, MSGL_DBG2, "%.*s[mkv] Parsing element %s\n", - level, " ", type->name); - - char *s = target; - int len; - uint8_t *end = data + size; - uint8_t *p = data; - int num_elems[MAX_EBML_SUBELEMENTS] = {}; - while (p < end) { - uint8_t *startp = p; - uint32_t id = ebml_parse_id(p, &len); - if (len > end - p) - goto past_end_error; - if (len < 0) { - mp_msg(MSGT_DEMUX, MSGL_DBG2, "[mkv] Error parsing subelement " - "id\n"); - goto other_error; - } - p += len; - uint64_t length = ebml_parse_length(p, &len); - if (len > end - p) - goto past_end_error; - if (len < 0) { - mp_msg(MSGT_DEMUX, MSGL_DBG2, "[mkv] Error parsing subelement " - "length\n"); - goto other_error; - } - p += len; - - int field_idx = -1; - for (int i = 0; i < type->field_count; i++) - if (type->fields[i].id == id) { - field_idx = i; - num_elems[i]++; - break; - } - - if (length > end - p) { - if (field_idx >= 0 && type->fields[field_idx].desc->type - != EBML_TYPE_SUBELEMENTS) { - mp_msg(MSGT_DEMUX, MSGL_DBG2, "[mkv] Subelement content goes " - "past end of containing element\n"); - goto other_error; - } - // Try to parse what is possible from inside this partial element - ctx->has_errors = true; - length = end - p; - } - p += length; - - continue; - - past_end_error: - mp_msg(MSGT_DEMUX, MSGL_DBG2, "[mkv] Subelement headers go " - "past end of containing element\n"); - other_error: - ctx->has_errors = true; - end = startp; - break; - } - - for (int i = 0; i < type->field_count; i++) - if (num_elems[i] && type->fields[i].multiple) { - char *ptr = s + type->fields[i].offset; - switch (type->fields[i].desc->type) { - case EBML_TYPE_SUBELEMENTS: - num_elems[i] = FFMIN(num_elems[i], - 1000000000 / type->fields[i].desc->size); - int size = num_elems[i] * type->fields[i].desc->size; - *(generic_struct **) ptr = talloc_zero_size(ctx->talloc_ctx, - size); - break; - case EBML_TYPE_UINT: - *(uint64_t **) ptr = talloc_zero_array(ctx->talloc_ctx, - uint64_t, num_elems[i]); - break; - case EBML_TYPE_SINT: - *(int64_t **) ptr = talloc_zero_array(ctx->talloc_ctx, - int64_t, num_elems[i]); - break; - case EBML_TYPE_FLOAT: - *(double **) ptr = talloc_zero_array(ctx->talloc_ctx, - double, num_elems[i]); - break; - case EBML_TYPE_STR: - case EBML_TYPE_BINARY: - *(struct bstr **) ptr = talloc_zero_array(ctx->talloc_ctx, - struct bstr, - num_elems[i]); - break; - case EBML_TYPE_EBML_ID: - *(int32_t **) ptr = talloc_zero_array(ctx->talloc_ctx, - uint32_t, num_elems[i]); - break; - default: - abort(); - } - } - - while (data < end) { - int len; - uint32_t id = ebml_parse_id(data, &len); - assert(len >= 0 && len <= end - data); - data += len; - uint64_t length = ebml_parse_length(data, &len); - assert(len >= 0 && len <= end - data); - data += len; - if (length > end - data) { - // Try to parse what is possible from inside this partial element - length = end - data; - mp_msg(MSGT_DEMUX, MSGL_DBG2, "[mkv] Next subelement content goes " - "past end of containing element, will be truncated\n"); - } - int field_idx = -1; - for (int i = 0; i < type->field_count; i++) - if (type->fields[i].id == id) { - field_idx = i; - break; - } - if (field_idx < 0) { - if (id == 0xec) - mp_msg(MSGT_DEMUX, MSGL_DBG2, "%.*s[mkv] Ignoring Void element " - "size: %"PRIu64"\n", level+1, " ", length); - else if (id == 0xbf) - mp_msg(MSGT_DEMUX, MSGL_DBG2, "%.*s[mkv] Ignoring CRC-32 " - "element size: %"PRIu64"\n", level+1, " ", - length); - else - mp_msg(MSGT_DEMUX, MSGL_DBG2, "[mkv] Ignoring unrecognized " - "subelement. ID: %x size: %"PRIu64"\n", id, length); - data += length; - continue; - } - const struct ebml_field_desc *fd = &type->fields[field_idx]; - const struct ebml_elem_desc *ed = fd->desc; - bool multiple = fd->multiple; - int *countptr = (int *) (s + fd->count_offset); - if (*countptr >= num_elems[field_idx]) { - // Shouldn't happen with on any sane file without bugs - mp_msg(MSGT_DEMUX, MSGL_ERR, "[mkv] Too many subelems?\n"); - ctx->has_errors = true; - data += length; - continue; - } - if (*countptr > 0 && !multiple) { - mp_msg(MSGT_DEMUX, MSGL_DBG2, "[mkv] Another subelement of type " - "%x %s (size: %"PRIu64"). Only one allowed. Ignoring.\n", - id, ed->name, length); - ctx->has_errors = true; - data += length; - continue; - } - mp_msg(MSGT_DEMUX, MSGL_DBG2, "%.*s[mkv] Parsing %x %s size: %"PRIu64 - " value: ", level+1, " ", id, ed->name, length); - - char *fieldptr = s + fd->offset; - switch (ed->type) { - case EBML_TYPE_SUBELEMENTS: - mp_msg(MSGT_DEMUX, MSGL_DBG2, "subelements\n"); - char *subelptr; - if (multiple) { - char *array_start = (char *) *(generic_struct **) fieldptr; - subelptr = array_start + *countptr * ed->size; - } else - subelptr = fieldptr; - ebml_parse_element(ctx, subelptr, data, length, ed, level + 1); - break; - - case EBML_TYPE_UINT:; - uint64_t *uintptr; -#define GETPTR(subelptr, fieldtype) \ - if (multiple) \ - subelptr = *(fieldtype **) fieldptr + *countptr; \ - else \ - subelptr = (fieldtype *) fieldptr - GETPTR(uintptr, uint64_t); - if (length < 1 || length > 8) { - mp_msg(MSGT_DEMUX, MSGL_DBG2, "uint invalid length %"PRIu64 - "\n", length); - goto error; - } - *uintptr = ebml_parse_uint(data, length); - mp_msg(MSGT_DEMUX, MSGL_DBG2, "uint %"PRIu64"\n", *uintptr); - break; - - case EBML_TYPE_SINT:; - int64_t *sintptr; - GETPTR(sintptr, int64_t); - if (length < 1 || length > 8) { - mp_msg(MSGT_DEMUX, MSGL_DBG2, "sint invalid length %"PRIu64 - "\n", length); - goto error; - } - *sintptr = ebml_parse_sint(data, length); - mp_msg(MSGT_DEMUX, MSGL_DBG2, "sint %"PRId64"\n", *sintptr); - break; - - case EBML_TYPE_FLOAT:; - double *floatptr; - GETPTR(floatptr, double); - if (length != 4 && length != 8) { - mp_msg(MSGT_DEMUX, MSGL_DBG2, "float invalid length %"PRIu64 - "\n", length); - goto error; - } - *floatptr = ebml_parse_float(data, length); - mp_msg(MSGT_DEMUX, MSGL_DBG2, "float %f\n", *floatptr); - break; - - case EBML_TYPE_STR: - case EBML_TYPE_BINARY:; - struct bstr *strptr; - GETPTR(strptr, struct bstr); - strptr->start = data; - strptr->len = length; - if (ed->type == EBML_TYPE_STR) - mp_msg(MSGT_DEMUX, MSGL_DBG2, "string \"%.*s\"\n", - BSTR_P(*strptr)); - else - mp_msg(MSGT_DEMUX, MSGL_DBG2, "binary %zd bytes\n", - strptr->len); - break; - - case EBML_TYPE_EBML_ID:; - uint32_t *idptr; - GETPTR(idptr, uint32_t); - *idptr = ebml_parse_id(data, &len); - if (len != length) { - mp_msg(MSGT_DEMUX, MSGL_DBG2, "ebml_id broken value\n"); - goto error; - } - mp_msg(MSGT_DEMUX, MSGL_DBG2, "ebml_id %x\n", (unsigned)*idptr); - break; - default: - abort(); - } - *countptr += 1; - error: - data += length; - } -} - -// target must be initialized to zero -int ebml_read_element(struct stream *s, struct ebml_parse_ctx *ctx, - void *target, const struct ebml_elem_desc *desc) -{ - ctx->has_errors = false; - int msglevel = ctx->no_error_messages ? MSGL_DBG2 : MSGL_WARN; - uint64_t length = ebml_read_length(s, &ctx->bytes_read); - if (s->eof) { - mp_msg(MSGT_DEMUX, msglevel, "[mkv] Unexpected end of file " - "- partial or corrupt file?\n"); - return -1; - } - if (length > 1000000000) { - mp_msg(MSGT_DEMUX, msglevel, "[mkv] Refusing to read element over " - "100 MB in size\n"); - return -1; - } - ctx->talloc_ctx = talloc_size(NULL, length + 8); - int read_len = stream_read(s, ctx->talloc_ctx, length); - ctx->bytes_read += read_len; - if (read_len < length) - mp_msg(MSGT_DEMUX, msglevel, "[mkv] Unexpected end of file " - "- partial or corrupt file?\n"); - ebml_parse_element(ctx, target, ctx->talloc_ctx, read_len, desc, 0); - if (ctx->has_errors) - mp_msg(MSGT_DEMUX, msglevel, "[mkv] Error parsing element %s\n", - desc->name); - return 0; -} |