From de4ea4bbd1c5101bf0b572a4facd58875845f495 Mon Sep 17 00:00:00 2001 From: wm4 Date: Mon, 12 Oct 2015 21:56:44 +0200 Subject: ass: use a bitmap for checking duplicate events The loop in check_duplicate_event() essentially makes event processing with ass_process_chunk() O(n^2). Using a bitmap instead of a loop brings it back to O(n). This could be interpreted as an API change: since the event list is freely modifieable by the API user through ASS_Track public fields, libass can't know if the internal bitmap went out of sync with the public event list. We just redefine it so that calling ass_process_chunk() means the API user agrees not to manipulate the event list otherwise. --- libass/Makefile.am | 2 +- libass/ass.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- libass/ass.h | 6 +++++- 3 files changed, 61 insertions(+), 4 deletions(-) diff --git a/libass/Makefile.am b/libass/Makefile.am index c0e4692..43e825d 100644 --- a/libass/Makefile.am +++ b/libass/Makefile.am @@ -3,7 +3,7 @@ AM_CFLAGS = -std=gnu99 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter \ -Wpointer-arith -Wredundant-decls -D_GNU_SOURCE LIBASS_LT_CURRENT = 7 -LIBASS_LT_REVISION = 0 +LIBASS_LT_REVISION = 1 LIBASS_LT_AGE = 2 yasm_verbose = $(yasm_verbose_$(V)) diff --git a/libass/ass.c b/libass/ass.c index 0bb678f..68c8600 100644 --- a/libass/ass.c +++ b/libass/ass.c @@ -53,6 +53,10 @@ struct parser_priv { char *fontdata; int fontdata_size; int fontdata_used; + + // contains bitmap of ReadOrder IDs of all read events + uint32_t *read_order_bitmap; + int read_order_elems; // size in uint32_t units of read_order_bitmap }; #define ASS_STYLES_ALLOC 20 @@ -67,6 +71,7 @@ void ass_free_track(ASS_Track *track) int i; if (track->parser_priv) { + free(track->parser_priv->read_order_bitmap); free(track->parser_priv->fontname); free(track->parser_priv->fontdata); free(track->parser_priv); @@ -150,6 +155,45 @@ void ass_free_style(ASS_Track *track, int sid) free(style->FontName); } +static int resize_read_order_bitmap(ASS_Track *track, int max_id) +{ + // Don't allow malicious files to OOM us easily. Also avoids int overflows. + if (max_id < 0 || max_id >= 10 * 1024 * 1024 * 8) + goto fail; + if (max_id >= track->parser_priv->read_order_elems * 32) { + int oldelems = track->parser_priv->read_order_elems; + int elems = ((max_id + 31) / 32 + 1) * 2; + assert(elems >= oldelems); + track->parser_priv->read_order_elems = elems; + void *new_bitmap = + realloc(track->parser_priv->read_order_bitmap, elems * 4); + if (!new_bitmap) + goto fail; + track->parser_priv->read_order_bitmap = new_bitmap; + memset(track->parser_priv->read_order_bitmap + oldelems, 0, + (elems - oldelems) * 4); + } + return 0; + +fail: + free(track->parser_priv->read_order_bitmap); + track->parser_priv->read_order_bitmap = NULL; + track->parser_priv->read_order_elems = 0; + return -1; +} + +static int test_and_set_read_order_bit(ASS_Track *track, int id) +{ + if (resize_read_order_bitmap(track, id) < 0) + return -1; + int index = id / 32; + int bit = 1 << (id % 32); + if (track->parser_priv->read_order_bitmap[index] & bit) + return 1; + track->parser_priv->read_order_bitmap[index] |= bit; + return 0; +} + // ============================================================================================== /** @@ -846,8 +890,10 @@ void ass_process_codec_private(ASS_Track *track, char *data, int size) static int check_duplicate_event(ASS_Track *track, int ReadOrder) { - int i; - for (i = 0; i < track->n_events - 1; ++i) // ignoring last event, it is the one we are comparing with + if (track->parser_priv->read_order_bitmap) + return test_and_set_read_order_bit(track, ReadOrder) > 0; + // ignoring last event, it is the one we are comparing with + for (int i = 0; i < track->n_events - 1; i++) if (track->events[i].ReadOrder == ReadOrder) return 1; return 0; @@ -870,6 +916,13 @@ void ass_process_chunk(ASS_Track *track, char *data, int size, char *token; ASS_Event *event; + if (!track->parser_priv->read_order_bitmap) { + for (int i = 0; i < track->n_events; i++) { + if (test_and_set_read_order_bit(track, track->events[i].ReadOrder) < 0) + break; + } + } + if (!track->event_format) { ass_msg(track->library, MSGL_WARN, "Event format header missing"); return; diff --git a/libass/ass.h b/libass/ass.h index c5d0936..cbcc9f0 100644 --- a/libass/ass.h +++ b/libass/ass.h @@ -24,7 +24,7 @@ #include #include "ass_types.h" -#define LIBASS_VERSION 0x01300000 +#define LIBASS_VERSION 0x01300001 #ifdef __cplusplus extern "C" { @@ -562,6 +562,10 @@ void ass_process_codec_private(ASS_Track *track, char *data, int size); /** * \brief Parse a chunk of subtitle stream data. A chunk contains exactly one * event in Matroska format. See the Matroska specification for details. + * In later libass versions (since LIBASS_VERSION==0x01300001), using this + * function means you agree not to modify events manually, or using other + * functions manipulating the event list like ass_process_data(). If you do + * anyway, the internal duplicate checking might break. * \param track track * \param data string to parse * \param size length of data -- cgit v1.2.3