diff options
Diffstat (limited to 'sub/subreader.c')
-rw-r--r-- | sub/subreader.c | 334 |
1 files changed, 74 insertions, 260 deletions
diff --git a/sub/subreader.c b/sub/subreader.c index 090cd0a8b4..8c5a259196 100644 --- a/sub/subreader.c +++ b/sub/subreader.c @@ -32,7 +32,6 @@ #include "core/mp_msg.h" #include "subreader.h" #include "core/mp_common.h" -#include "subassconvert.h" #include "core/options.h" #include "stream/stream.h" #include "libavutil/common.h" @@ -48,42 +47,33 @@ #include <iconv.h> #endif -char *sub_cp=NULL; - - -int suboverlap_enabled = 1; - // Parameter struct for the format-specific readline functions struct readline_args { int utf16; struct MPOpts *opts; + + // subtitle reader state used by some formats + + float mpsub_multiplier; + float mpsub_position; + int sub_slacktime; + + /* + Some subtitling formats, namely AQT and Subrip09, define the end of a + subtitle as the beginning of the following. Since currently we read one + subtitle at time, for these format we keep two global *subtitle, + previous_aqt_sub and previous_subrip09_sub, pointing to previous subtitle, + so we can change its end when we read current subtitle starting time. + We use a single global unsigned long, + previous_sub_end, for both (and even future) formats, to store the end of + the previous sub: it is initialized to 0 in sub_read_file and eventually + modified by sub_read_aqt_line or sub_read_subrip09_line. + */ + unsigned long previous_sub_end; }; /* Maximal length of line of a subtitle */ #define LINE_LEN 1000 -static float mpsub_position=0; -static float mpsub_multiplier=1.; -static int sub_slacktime = 20000; //20 sec - -int sub_no_text_pp=0; // 1 => do not apply text post-processing - // like {\...} elimination in SSA format. - -int sub_match_fuzziness=0; // level of sub name matching fuzziness - -/* Use the SUB_* constant defined in the header file */ -int sub_format=SUB_INVALID; -/* - Some subtitling formats, namely AQT and Subrip09, define the end of a - subtitle as the beginning of the following. Since currently we read one - subtitle at time, for these format we keep two global *subtitle, - previous_aqt_sub and previous_subrip09_sub, pointing to previous subtitle, - so we can change its end when we read current subtitle starting time. - We use a single global unsigned long, - previous_sub_end, for both (and even future) formats, to store the end of - the previous sub: it is initialized to 0 in sub_read_file and eventually - modified by sub_read_aqt_line or sub_read_subrip09_line. - */ -unsigned long previous_sub_end; static int eol(char p) { return p=='\r' || p=='\n' || p=='\0'; @@ -145,7 +135,7 @@ static subtitle *sub_read_line_sami(stream_t* st, subtitle *current, case 0: /* find "START=" or "Slacktime:" */ slacktime_s = stristr (s, "Slacktime:"); if (slacktime_s) - sub_slacktime = strtol (slacktime_s+10, NULL, 0) / 10; + args->sub_slacktime = strtol (slacktime_s+10, NULL, 0) / 10; s = stristr (s, "Start="); if (s) { @@ -181,7 +171,7 @@ static subtitle *sub_read_line_sami(stream_t* st, subtitle *current, sami_add_line(current, text, &p); s += 4; } - else if ((*s == '{') && !sub_no_text_pp) { state = 5; ++s; continue; } + else if ((*s == '{') && !args->opts->sub_no_text_pp) { state = 5; ++s; continue; } else if (*s == '<') { state = 4; } else if (!strncasecmp (s, " ", 6)) { *p++ = ' '; s += 6; } else if (*s == '\t') { *p++ = ' '; s++; } @@ -207,7 +197,7 @@ static subtitle *sub_read_line_sami(stream_t* st, subtitle *current, if (s) { s++; state = 3; continue; } break; case 5: /* get rid of {...} text, but read the alignment code */ - if ((*s == '\\') && (*(s + 1) == 'a') && !sub_no_text_pp) { + if ((*s == '\\') && (*(s + 1) == 'a') && !args->opts->sub_no_text_pp) { if (stristr(s, "\\a1") != NULL) { current->alignment = SUB_ALIGNMENT_BOTTOMLEFT; s = s + 3; @@ -256,7 +246,7 @@ static subtitle *sub_read_line_sami(stream_t* st, subtitle *current, // For the last subtitle if (current->end <= 0) { - current->end = current->start + sub_slacktime; + current->end = current->start + args->sub_slacktime; sami_add_line(current, text, &p); } @@ -308,7 +298,6 @@ static subtitle *sub_read_line_microdvd(stream_t *st,subtitle *current, int utf16 = args->utf16; char line[LINE_LEN+1]; char line2[LINE_LEN+1]; - char *p; do { if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL; @@ -319,13 +308,7 @@ static subtitle *sub_read_line_microdvd(stream_t *st,subtitle *current, "{%ld}{%ld}%[^\r\n]", &(current->start), &(current->end), line2) < 3)); - if (args->opts->ass_enabled) { - subassconvert_microdvd(line2, line, LINE_LEN + 1); - p = line; - } else - p = line2; - - return set_multiline_text(current, p, 0); + return set_multiline_text(current, line2, 0); } static subtitle *sub_read_line_mpl2(stream_t *st,subtitle *current, @@ -379,8 +362,8 @@ static subtitle *sub_read_line_subrip(stream_t* st, subtitle *current, return current; } -static subtitle *sub_ass_read_line_subviewer(stream_t *st, subtitle *current, - struct readline_args *args) +static subtitle *sub_read_line_subviewer(stream_t *st, subtitle *current, + struct readline_args *args) { int utf16 = args->utf16; int a1, a2, a3, a4, b1, b2, b3, b4, j = 0; @@ -426,74 +409,14 @@ static subtitle *sub_ass_read_line_subviewer(stream_t *st, subtitle *current, j += len; } - /* Use the ASS/SSA converter to transform the whole lines */ if (full_line[0]) { - char converted_line[LINE_LEN + 1]; - subassconvert_subrip(full_line, converted_line, LINE_LEN + 1); - current->text[0] = strdup(converted_line); + current->text[0] = strdup(full_line); current->lines = 1; } } return current; } -static subtitle *sub_read_line_subviewer(stream_t *st,subtitle *current, - struct readline_args *args) -{ - int utf16 = args->utf16; - char line[LINE_LEN+1]; - int a1,a2,a3,a4,b1,b2,b3,b4; - char *p=NULL; - int i,len; - - if (args->opts->ass_enabled) - return sub_ass_read_line_subviewer(st, current, args); - while (!current->text[0]) { - if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL; - if ((len=sscanf (line, "%d:%d:%d%*1[,.:]%d --> %d:%d:%d%*1[,.:]%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4)) < 8) - continue; - current->start = a1*360000+a2*6000+a3*100+a4/10; - current->end = b1*360000+b2*6000+b3*100+b4/10; - for (i=0; i<SUB_MAX_TEXT;) { - int blank = 1; - if (!stream_read_line (st, line, LINE_LEN, utf16)) break; - len=0; - for (p=line; *p!='\n' && *p!='\r' && *p; p++,len++) - if (*p != ' ' && *p != '\t') - blank = 0; - if (len && !blank) { - int j=0,skip=0; - char *curptr=current->text[i]=malloc (len+1); - if (!current->text[i]) return ERR; - //strncpy (current->text[i], line, len); current->text[i][len]='\0'; - for(; j<len; j++) { - /* let's filter html tags ::atmos */ - if(line[j]=='>') { - skip=0; - continue; - } - if(line[j]=='<') { - skip=1; - continue; - } - if(skip) { - continue; - } - *curptr=line[j]; - curptr++; - } - *curptr='\0'; - - i++; - } else { - break; - } - } - current->lines=i; - } - return current; -} - static subtitle *sub_read_line_subviewer2(stream_t *st,subtitle *current, struct readline_args *args) { @@ -684,20 +607,6 @@ static subtitle *sub_read_line_ssa(stream_t *st,subtitle *current, return current; } -static void sub_pp_ssa(subtitle *sub) -{ - for (int i = 0; i < sub->lines; i++) { - char *s, *d; - s = d = sub->text[i]; - while (1) { - while (*s == '{') - while (*s && *s++ != '}'); - if (!(*d++ = *s++)) - break; - } - } -} - /* * PJS subtitles reader. * That's the "Phoenix Japanimation Society" format. @@ -761,10 +670,10 @@ static subtitle *sub_read_line_mpsub(stream_t *st, subtitle *current, if (!stream_read_line(st, line, LINE_LEN, utf16)) return NULL; } while (sscanf (line, "%f %f", &a, &b) !=2); - mpsub_position += a*mpsub_multiplier; - current->start=(int) mpsub_position; - mpsub_position += b*mpsub_multiplier; - current->end=(int) mpsub_position; + args->mpsub_position += a*args->mpsub_multiplier; + current->start=(int) args->mpsub_position; + args->mpsub_position += b*args->mpsub_multiplier; + current->end=(int) args->mpsub_position; while (num < SUB_MAX_TEXT) { if (!stream_read_line (st, line, LINE_LEN, utf16)) { @@ -805,8 +714,8 @@ retry: break; } - if (!previous_sub_end) - previous_sub_end = (current->start) ? current->start - 1 : 0; + if (!args->previous_sub_end) + args->previous_sub_end = (current->start) ? current->start - 1 : 0; if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL; @@ -846,8 +755,8 @@ retry: current->start = a1*360000+a2*6000+a3*100; - if (!previous_sub_end) - previous_sub_end = (current->start) ? current->start - 1 : 0; + if (!args->previous_sub_end) + args->previous_sub_end = (current->start) ? current->start - 1 : 0; if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL; @@ -1109,14 +1018,12 @@ static int sub_autodetect (stream_t* st, int *uses_time, int utf16) { return SUB_INVALID; // too many bad lines } -extern float sub_delay; -extern float sub_fps; - #ifdef CONFIG_ICONV -static iconv_t icdsc = (iconv_t)(-1); +static const char* guess_cp(stream_t *st, const char *preferred_language, const char *fallback); -void subcp_open (stream_t *st) +static iconv_t subcp_open (stream_t *st, const char *sub_cp) { + iconv_t icdsc = (iconv_t)(-1); char *tocp = "UTF-8"; if (sub_cp){ @@ -1139,18 +1046,18 @@ void subcp_open (stream_t *st) } else mp_msg(MSGT_SUBREADER,MSGL_ERR,"SUB: error opening iconv descriptor.\n"); } + return icdsc; } -void subcp_close (void) +static void subcp_close (iconv_t icdsc) { if (icdsc != (iconv_t)(-1)){ (void) iconv_close (icdsc); - icdsc = (iconv_t)(-1); mp_msg(MSGT_SUBREADER,MSGL_V,"SUB: closed iconv descriptor.\n"); } } -subtitle* subcp_recode (subtitle *sub) +static subtitle* subcp_recode (iconv_t icdsc, subtitle *sub) { int l=sub->lines; size_t ileft, oleft; @@ -1184,7 +1091,8 @@ subtitle* subcp_recode (subtitle *sub) } #endif -static void adjust_subs_time(subtitle* sub, float subtime, float fps, int block, +static void adjust_subs_time(subtitle* sub, float subtime, float fps, + float sub_fps, int block, int sub_num, int sub_uses_time) { int n,m; subtitle* nextsub; @@ -1248,10 +1156,11 @@ struct subreader { struct readline_args *args); void (*post)(subtitle *dest); const char *name; + const char *codec_name; }; #ifdef CONFIG_ENCA -const char* guess_buffer_cp(unsigned char* buffer, int buflen, const char *preferred_language, const char *fallback) +static const char* guess_buffer_cp(unsigned char* buffer, int buflen, const char *preferred_language, const char *fallback) { const char **languages; size_t langcnt; @@ -1291,7 +1200,7 @@ const char* guess_buffer_cp(unsigned char* buffer, int buflen, const char *prefe } #define MAX_GUESS_BUFFER_SIZE (256*1024) -const char* guess_cp(stream_t *st, const char *preferred_language, const char *fallback) +static const char* guess_cp(stream_t *st, const char *preferred_language, const char *fallback) { size_t buflen; unsigned char *buffer; @@ -1323,13 +1232,13 @@ sub_data* sub_read_file(char *filename, float fps, struct MPOpts *opts) int uses_time = 0, sub_num = 0, sub_errs = 0; static const struct subreader sr[]= { - { sub_read_line_microdvd, NULL, "microdvd" }, + { sub_read_line_microdvd, NULL, "microdvd", "microdvd" }, { sub_read_line_subrip, NULL, "subviewer" }, - { sub_read_line_subviewer, NULL, "subrip" }, + { sub_read_line_subviewer, NULL, "subrip", "subrip" }, { sub_read_line_sami, NULL, "sami" }, { sub_read_line_vplayer, NULL, "vplayer" }, { sub_read_line_rt, NULL, "rt" }, - { sub_read_line_ssa, sub_pp_ssa, "ssa" }, + { sub_read_line_ssa, NULL, "ssa", "ass-text" }, { sub_read_line_pjs, NULL, "pjs" }, { sub_read_line_mpsub, NULL, "mpsub" }, { sub_read_line_aqt, NULL, "aqt" }, @@ -1343,7 +1252,7 @@ sub_data* sub_read_file(char *filename, float fps, struct MPOpts *opts) if(filename==NULL) return NULL; //qnx segfault fd=open_stream (filename, NULL, NULL); if (!fd) return NULL; - sub_format = SUB_INVALID; + int sub_format = SUB_INVALID; for (utf16 = 0; sub_format == SUB_INVALID && utf16 < 3; utf16++) { sub_format=sub_autodetect (fd, &uses_time, utf16); stream_reset(fd); @@ -1351,7 +1260,10 @@ sub_data* sub_read_file(char *filename, float fps, struct MPOpts *opts) } utf16--; - mpsub_multiplier = (uses_time ? 100.0 : 1.0); + struct readline_args args = {utf16, opts}; + args.sub_slacktime = 20000; //20 sec + args.mpsub_multiplier = (uses_time ? 100.0 : 1.0); + if (sub_format==SUB_INVALID) { mp_msg(MSGT_SUBREADER,MSGL_WARN,"SUB: Could not determine file format\n"); free_stream(fd); @@ -1361,6 +1273,7 @@ sub_data* sub_read_file(char *filename, float fps, struct MPOpts *opts) mp_msg(MSGT_SUBREADER, MSGL_V, "SUB: Detected subtitle file format: %s\n", srp->name); #ifdef CONFIG_ICONV + iconv_t icdsc = (iconv_t)(-1); { int l,k; k = -1; @@ -1371,7 +1284,7 @@ sub_data* sub_read_file(char *filename, float fps, struct MPOpts *opts) break; } } - if (k<0) subcp_open(fd); + if (k<0) icdsc = subcp_open(fd, opts->sub_cp); } #endif @@ -1384,22 +1297,22 @@ sub_data* sub_read_file(char *filename, float fps, struct MPOpts *opts) sub = malloc(sizeof(subtitle)); //This is to deal with those formats (AQT & Subrip) which define the end of a subtitle //as the beginning of the following - previous_sub_end = 0; + args.previous_sub_end = 0; while(1){ if(sub_num>=n_max){ n_max+=16; first=realloc(first,n_max*sizeof(subtitle)); } memset(sub, '\0', sizeof(subtitle)); - sub=srp->read(fd, sub, &(struct readline_args){utf16, opts}); + sub=srp->read(fd, sub, &args); if(!sub) break; // EOF #ifdef CONFIG_ICONV - if (sub!=ERR) sub=subcp_recode(sub); + if (sub!=ERR) sub=subcp_recode(icdsc, sub); #endif if ( sub == ERR ) { #ifdef CONFIG_ICONV - subcp_close(); + subcp_close(icdsc); #endif free(first); free(alloced_sub); @@ -1407,7 +1320,7 @@ sub_data* sub_read_file(char *filename, float fps, struct MPOpts *opts) return NULL; } // Apply any post processing that needs recoding first - if ((sub!=ERR) && !sub_no_text_pp && srp->post) srp->post(sub); + if ((sub!=ERR) && !args.opts->sub_no_text_pp && srp->post) srp->post(sub); if(!sub_num || (first[sub_num - 1].start <= sub->start)){ first[sub_num].start = sub->start; first[sub_num].end = sub->end; @@ -1416,9 +1329,9 @@ sub_data* sub_read_file(char *filename, float fps, struct MPOpts *opts) for(i = 0; i < sub->lines; ++i){ first[sub_num].text[i] = sub->text[i]; } - if (previous_sub_end){ - first[sub_num - 1].end = previous_sub_end; - previous_sub_end = 0; + if (args.previous_sub_end){ + first[sub_num - 1].end = args.previous_sub_end; + args.previous_sub_end = 0; } } else { for(j = sub_num - 1; j >= 0; --j){ @@ -1437,10 +1350,10 @@ sub_data* sub_read_file(char *filename, float fps, struct MPOpts *opts) for(i = 0; i < SUB_MAX_TEXT; ++i){ first[j].text[i] = sub->text[i]; } - if (previous_sub_end){ + if (args.previous_sub_end){ first[j].end = first[j - 1].end; - first[j - 1].end = previous_sub_end; - previous_sub_end = 0; + first[j - 1].end = args.previous_sub_end; + args.previous_sub_end = 0; } break; } @@ -1452,7 +1365,7 @@ sub_data* sub_read_file(char *filename, float fps, struct MPOpts *opts) free_stream(fd); #ifdef CONFIG_ICONV - subcp_close(); + subcp_close(icdsc); #endif free(alloced_sub); @@ -1469,9 +1382,9 @@ sub_data* sub_read_file(char *filename, float fps, struct MPOpts *opts) // the user didn't forced no-overlapsub and the format is Jacosub or Ssa. // this is because usually overlapping subtitles are found in these formats, // while in others they are probably result of bad timing -if ((suboverlap_enabled == 2) || - ((suboverlap_enabled) && ((sub_format == SUB_JACOSUB) || (sub_format == SUB_SSA)))) { - adjust_subs_time(first, 6.0, fps, 0, sub_num, uses_time);/*~6 secs AST*/ +if ((opts->suboverlap_enabled == 2) || + ((opts->suboverlap_enabled) && ((sub_format == SUB_JACOSUB) || (sub_format == SUB_SSA)))) { + adjust_subs_time(first, 6.0, fps, opts->sub_fps, 0, sub_num, uses_time);/*~6 secs AST*/ // here we manage overlapping subtitles sub_orig = sub_num; n_first = sub_num; @@ -1678,18 +1591,19 @@ if ((suboverlap_enabled == 2) || return_sub = second; } else { //if(suboverlap_enabled) - adjust_subs_time(first, 6.0, fps, 1, sub_num, uses_time);/*~6 secs AST*/ + adjust_subs_time(first, 6.0, fps, opts->sub_fps, 1, sub_num, uses_time);/*~6 secs AST*/ return_sub = first; } if (return_sub == NULL) return NULL; subt_data = talloc_zero(NULL, sub_data); talloc_set_destructor(subt_data, sub_destroy); - subt_data->codec = srp->name; + subt_data->codec = srp->codec_name ? srp->codec_name : "text"; subt_data->filename = strdup(filename); subt_data->sub_uses_time = uses_time; subt_data->sub_num = sub_num; subt_data->sub_errs = sub_errs; subt_data->subtitles = return_sub; + subt_data->fallback_fps = fps; return subt_data; } @@ -1704,103 +1618,3 @@ static int sub_destroy(void *ptr) free( subd->filename ); return 0; } - -#define MAX_SUBLINE 512 -/** - * \brief parse text and append it to subtitle in sub - * \param sub subtitle struct to add text to - * \param txt text to parse - * \param len length of text in txt - * \param endpts pts at which this subtitle text should be removed again - * - * <> and {} are interpreted as comment delimiters, "\n", "\N", '\n', '\r' - * and '\0' are interpreted as newlines, duplicate, leading and trailing - * newlines are ignored. - */ -void sub_add_text(subtitle *sub, const char *txt, int len, double endpts) { - int comment = 0; - int double_newline = 1; // ignore newlines at the beginning - int i, pos; - char *buf; - if (sub->lines >= SUB_MAX_TEXT) return; - pos = 0; - buf = malloc(MAX_SUBLINE + 1); - sub->text[sub->lines] = buf; - sub->endpts[sub->lines] = endpts; - for (i = 0; i < len && pos < MAX_SUBLINE; i++) { - char c = txt[i]; - if (c == '<') comment |= 1; - if (c == '{') comment |= 2; - if (comment) { - if (c == '}') comment &= ~2; - if (c == '>') comment &= ~1; - continue; - } - if (pos == MAX_SUBLINE - 1) { - i--; - c = 0; - } - if (c == '\\' && i + 1 < len) { - c = txt[++i]; - if (c == 'n' || c == 'N') c = 0; - } - if (c == '\n' || c == '\r') c = 0; - if (c) { - double_newline = 0; - buf[pos++] = c; - } else if (!double_newline) { - if (sub->lines >= SUB_MAX_TEXT - 1) { - mp_msg(MSGT_VO, MSGL_WARN, "Too many subtitle lines\n"); - break; - } - double_newline = 1; - buf[pos] = 0; - sub->lines++; - pos = 0; - buf = malloc(MAX_SUBLINE + 1); - sub->text[sub->lines] = buf; - sub->endpts[sub->lines] = endpts; - } - } - buf[pos] = 0; - if (sub->lines < SUB_MAX_TEXT && - strlen(sub->text[sub->lines])) - sub->lines++; - if (sub->lines > 1 && - strcmp(sub->text[sub->lines-1], sub->text[sub->lines-2]) == 0) { - // remove duplicate lines. These can happen with some - // "clever" ASS effects. - sub->lines--; - sub->endpts[sub->lines-1] = - FFMAX(sub->endpts[sub->lines-1], - sub->endpts[sub->lines]); - free(sub->text[sub->lines]); - } -} - -/** - * \brief remove outdated subtitle lines. - * \param sub subtitle struct to modify - * \param pts current pts. All lines with endpts <= this will be removed. - * Use MP_NOPTS_VALUE to remove all lines - * \return 1 if sub was modified, 0 otherwise. - */ -int sub_clear_text(subtitle *sub, double pts) { - int i = 0; - int changed = 0; - while (i < sub->lines) { - double endpts = sub->endpts[i]; - if (pts == MP_NOPTS_VALUE || (endpts != MP_NOPTS_VALUE && pts >= endpts)) { - int j; - free(sub->text[i]); - for (j = i + 1; j < sub->lines; j++) { - sub->text[j - 1] = sub->text[j]; - sub->endpts[j - 1] = sub->endpts[j]; - } - sub->lines--; - changed = 1; - } else - i++; - } - return changed; -} |