From ebe798cbc004e704cf4070230204b358160aeaf0 Mon Sep 17 00:00:00 2001 From: wm4 Date: Mon, 19 May 2014 01:18:48 +0200 Subject: demux_subreader: remove support for some subtitle formats Drop: sami, vplayer, rt, pjs, mpsub, aqt, jacosub. None of these seem to be actually in use, except sami. Sami is very complex, and the results subreader produces are not very useful. For all these formats, there are still parsers in FFmpeg. We remove the subreader implementation, because it might contain security relevant bugs and such. (This is old, unmaintained C string parsing code, written in times where absolutely nobody cared about security. The kind of awesome code.) We keep the other formats, because they're (mostly) commonly used and relatively simple, for UTF16 support (still missing in FFmpeg), and for the sake of Libav. --- demux/demux_subreader.c | 633 +----------------------------------------------- 1 file changed, 4 insertions(+), 629 deletions(-) (limited to 'demux/demux_subreader.c') diff --git a/demux/demux_subreader.c b/demux/demux_subreader.c index 28fb96208e..d3a792c9d2 100644 --- a/demux/demux_subreader.c +++ b/demux/demux_subreader.c @@ -45,17 +45,10 @@ #define SUB_MICRODVD 0 #define SUB_SUBRIP 1 #define SUB_SUBVIEWER 2 -#define SUB_SAMI 3 -#define SUB_VPLAYER 4 -#define SUB_RT 5 -#define SUB_SSA 6 -#define SUB_PJS 7 -#define SUB_MPSUB 8 -#define SUB_AQTITLE 9 -#define SUB_SUBVIEWER2 10 -#define SUB_SUBRIP09 11 -#define SUB_JACOSUB 12 -#define SUB_MPL2 13 +#define SUB_SSA 3 +#define SUB_SUBVIEWER2 4 +#define SUB_SUBRIP09 5 +#define SUB_MPL2 6 #define SUB_MAX_TEXT 12 #define SUB_ALIGNMENT_BOTTOMLEFT 1 @@ -123,182 +116,6 @@ static int eol(char p) { return p=='\r' || p=='\n' || p=='\0'; } -/* Remove leading and trailing space */ -static void trail_space(char *s) { - int i = 0; - while (isspace(s[i])) ++i; - int copylen = strlen(s + i); - if (i) memmove(s, s + i, copylen); - i = strlen(s) - 1; - while (i > 0 && isspace(s[i])) s[i--] = '\0'; -} - -static char *stristr(const char *haystack, const char *needle) { - int len = 0; - const char *p = haystack; - - if (!(haystack && needle)) return NULL; - - len=strlen(needle); - while (*p != '\0') { - if (strncasecmp(p, needle, len) == 0) return (char*)p; - p++; - } - - return NULL; -} - -static void sami_add_line(subtitle *current, char *buffer, char **pos) { - char *p = *pos; - *p = 0; - trail_space(buffer); - if (*buffer && current->lines < SUB_MAX_TEXT) - current->text[current->lines++] = strdup(buffer); - *pos = buffer; -} - -static subtitle *sub_read_line_sami(stream_t* st, subtitle *current, - struct readline_args *args) -{ - int utf16 = args->utf16; - static char line[LINE_LEN+1]; - static char *s = NULL, *slacktime_s; - char text[LINE_LEN+1], *p=NULL, *q; - int state; - - current->lines = current->start = current->end = 0; - current->alignment = SUB_ALIGNMENT_BOTTOMCENTER; - state = 0; - - /* read the first line */ - if (!s) - if (!(s = stream_read_line(st, line, LINE_LEN, utf16))) return 0; - - do { - switch (state) { - - case 0: /* find "START=" or "Slacktime:" */ - slacktime_s = stristr (s, "Slacktime:"); - if (slacktime_s) - args->sub_slacktime = strtol (slacktime_s+10, NULL, 0) / 10; - - s = stristr (s, "Start="); - if (s) { - current->start = strtol (s + 6, &s, 0) / 10; - /* eat '>' */ - for (; *s != '>' && *s != '\0'; s++); - s++; - state = 1; continue; - } - break; - - case 1: /* find (optional) " TAG */ - if (*s == '\0') - break; - s++; - continue; - - case 2: /* find ">" */ - if ((s = strchr (s, '>'))) { s++; state = 3; p = text; continue; } - break; - - case 3: /* get all text until '<' appears */ - if (p - text >= LINE_LEN) - sami_add_line(current, text, &p); - if (*s == '\0') break; - else if (!strncasecmp (s, "
", 4)) { - sami_add_line(current, text, &p); - s += 4; - } - else if (*s == '{') { state = 5; ++s; continue; } - else if (*s == '<') { state = 4; } - else if (!strncasecmp (s, " ", 6)) { *p++ = ' '; s += 6; } - else if (*s == '\t') { *p++ = ' '; s++; } - else if (*s == '\r' || *s == '\n') { s++; } - else *p++ = *s++; - - /* skip duplicated space */ - if (p > text + 2) if (*(p-1) == ' ' && *(p-2) == ' ') p--; - - continue; - - case 4: /* get current->end or skip */ - q = stristr (s, "Start="); - if (q) { - current->end = strtol (q + 6, &q, 0) / 10 - 1; - *p = '\0'; trail_space (text); - if (text[0] != '\0') - current->text[current->lines++] = strdup (text); - if (current->lines > 0) { state = 99; break; } - state = 0; continue; - } - s = strchr (s, '>'); - if (s) { s++; state = 3; continue; } - break; - case 5: /* get rid of {...} text, but read the alignment code */ - if ((*s == '\\') && (*(s + 1) == 'a')) { - if (stristr(s, "\\a1") != NULL) { - current->alignment = SUB_ALIGNMENT_BOTTOMLEFT; - s = s + 3; - } - if (stristr(s, "\\a2") != NULL) { - current->alignment = SUB_ALIGNMENT_BOTTOMCENTER; - s = s + 3; - } else if (stristr(s, "\\a3") != NULL) { - current->alignment = SUB_ALIGNMENT_BOTTOMRIGHT; - s = s + 3; - } else if ((stristr(s, "\\a4") != NULL) || (stristr(s, "\\a5") != NULL) || (stristr(s, "\\a8") != NULL)) { - current->alignment = SUB_ALIGNMENT_TOPLEFT; - s = s + 3; - } else if (stristr(s, "\\a6") != NULL) { - current->alignment = SUB_ALIGNMENT_TOPCENTER; - s = s + 3; - } else if (stristr(s, "\\a7") != NULL) { - current->alignment = SUB_ALIGNMENT_TOPRIGHT; - s = s + 3; - } else if (stristr(s, "\\a9") != NULL) { - current->alignment = SUB_ALIGNMENT_MIDDLELEFT; - s = s + 3; - } else if (stristr(s, "\\a10") != NULL) { - current->alignment = SUB_ALIGNMENT_MIDDLECENTER; - s = s + 4; - } else if (stristr(s, "\\a11") != NULL) { - current->alignment = SUB_ALIGNMENT_MIDDLERIGHT; - s = s + 4; - } - } - if (*s == '}') state = 3; - ++s; - continue; - } - - /* read next line */ - if (state != 99 && !(s = stream_read_line (st, line, LINE_LEN, utf16))) { - if (current->start > 0) { - break; // if it is the last subtitle - } else { - return 0; - } - } - - } while (state != 99); - - // For the last subtitle - if (current->end <= 0) { - current->end = current->start + args->sub_slacktime; - sami_add_line(current, text, &p); - } - - return current; -} - - static const char *sub_readtext(const char *source, char **dest) { int len=0; const char *p=source; @@ -499,98 +316,6 @@ static subtitle *sub_read_line_subviewer2(stream_t *st,subtitle *current, return current; } - -static subtitle *sub_read_line_vplayer(stream_t *st,subtitle *current, - struct readline_args *args) -{ - int utf16 = args->utf16; - char line[LINE_LEN+1]; - int a1,a2,a3; - char *p=NULL, separator; - int len,plen; - - while (!current->text[0]) { - if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL; - if ((len=sscanf (line, "%d:%d:%d%c%n",&a1,&a2,&a3,&separator,&plen)) < 4) - continue; - - if (!(current->start = a1*360000+a2*6000+a3*100)) - continue; - /* removed by wodzu - p=line; - // finds the body of the subtitle - for (i=0; i<3; i++){ - p=strchr(p,':'); - if (p==NULL) break; - ++p; - } - if (p==NULL) { - printf("Skipping incorrect subtitle line!\n"); - continue; - } - */ - // by wodzu: hey! this time we know what length it has! what is - // that magic for? it can't deal with space instead of third - // colon! look, what simple it can be: - p = &line[ plen ]; - - if (*p!='|') { - // - return set_multiline_text(args, current, p, 0); - } - } - return current; -} - -static subtitle *sub_read_line_rt(stream_t *st,subtitle *current, - struct readline_args *args) -{ - int utf16 = args->utf16; - - //TODO: This format uses quite rich (sub/super)set of xhtml - // I couldn't check it since DTD is not included. - // WARNING: full XML parses can be required for proper parsing - char line[LINE_LEN+1]; - int a1,a2,a3,a4,b1,b2,b3,b4; - char *p=NULL,*next=NULL; - int len,plen; - - while (!current->text[0]) { - if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL; - //TODO: it seems that format of time is not easily determined, it may be 1:12, 1:12.0 or 0:1:12.0 - //to describe the same moment in time. Maybe there are even more formats in use. - //if ((len=sscanf (line, "