summaryrefslogtreecommitdiffstats
path: root/subreader.c
diff options
context:
space:
mode:
authorreimar <reimar@b3059339-0415-0410-9bf9-f77b7e298cf2>2010-02-28 15:24:30 +0000
committerreimar <reimar@b3059339-0415-0410-9bf9-f77b7e298cf2>2010-02-28 15:24:30 +0000
commitb20e08ed94c850b419ed9b9e1736cf32da71afc7 (patch)
treeb51c40e39a64389c76aee29974ad397209c67e9d /subreader.c
parente9a5e7f667d1b0c0dec0053ad9ec6f7bc3162b60 (diff)
downloadmpv-b20e08ed94c850b419ed9b9e1736cf32da71afc7.tar.bz2
mpv-b20e08ed94c850b419ed9b9e1736cf32da71afc7.tar.xz
Extend stream_read_line to support reading lines from UTF-16 encoded files
and use this to support reading UTF-16 encoded subtitle files in subreader.c git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@30799 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'subreader.c')
-rw-r--r--subreader.c95
1 files changed, 50 insertions, 45 deletions
diff --git a/subreader.c b/subreader.c
index 4f8782353d..0169925727 100644
--- a/subreader.c
+++ b/subreader.c
@@ -111,7 +111,7 @@ static char *stristr(const char *haystack, const char *needle) {
return NULL;
}
-static subtitle *sub_read_line_sami(stream_t* st, subtitle *current) {
+static subtitle *sub_read_line_sami(stream_t* st, subtitle *current, int utf16) {
static char line[LINE_LEN+1];
static char *s = NULL, *slacktime_s;
char text[LINE_LEN+1], *p=NULL, *q;
@@ -123,7 +123,7 @@ static subtitle *sub_read_line_sami(stream_t* st, subtitle *current) {
/* read the first line */
if (!s)
- if (!(s = stream_read_line(st, line, LINE_LEN))) return 0;
+ if (!(s = stream_read_line(st, line, LINE_LEN, utf16))) return 0;
do {
switch (state) {
@@ -230,7 +230,7 @@ static subtitle *sub_read_line_sami(stream_t* st, subtitle *current) {
}
/* read next line */
- if (state != 99 && !(s = stream_read_line (st, line, LINE_LEN))) {
+ if (state != 99 && !(s = stream_read_line (st, line, LINE_LEN, utf16))) {
if (current->start > 0) {
break; // if it is the last subtitle
} else {
@@ -274,14 +274,14 @@ static char *sub_readtext(char *source, char **dest) {
else return NULL; // last text field
}
-static subtitle *sub_read_line_microdvd(stream_t *st,subtitle *current) {
+static subtitle *sub_read_line_microdvd(stream_t *st,subtitle *current, int utf16) {
char line[LINE_LEN+1];
char line2[LINE_LEN+1];
char *p, *next;
int i;
do {
- if (!stream_read_line (st, line, LINE_LEN)) return NULL;
+ if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL;
} while ((sscanf (line,
"{%ld}{}%[^\r\n]",
&(current->start), line2) < 2) &&
@@ -302,14 +302,14 @@ static subtitle *sub_read_line_microdvd(stream_t *st,subtitle *current) {
return current;
}
-static subtitle *sub_read_line_mpl2(stream_t *st,subtitle *current) {
+static subtitle *sub_read_line_mpl2(stream_t *st,subtitle *current, int utf16) {
char line[LINE_LEN+1];
char line2[LINE_LEN+1];
char *p, *next;
int i;
do {
- if (!stream_read_line (st, line, LINE_LEN)) return NULL;
+ if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL;
} while ((sscanf (line,
"[%ld][%ld]%[^\r\n]",
&(current->start), &(current->end), line2) < 3));
@@ -328,19 +328,19 @@ static subtitle *sub_read_line_mpl2(stream_t *st,subtitle *current) {
return current;
}
-static subtitle *sub_read_line_subrip(stream_t* st, subtitle *current) {
+static subtitle *sub_read_line_subrip(stream_t* st, subtitle *current, int utf16) {
char line[LINE_LEN+1];
int a1,a2,a3,a4,b1,b2,b3,b4;
char *p=NULL, *q=NULL;
int len;
while (1) {
- if (!stream_read_line (st, line, LINE_LEN)) return NULL;
+ if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL;
if (sscanf (line, "%d:%d:%d.%d,%d:%d:%d.%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4) < 8) continue;
current->start = a1*360000+a2*6000+a3*100+a4;
current->end = b1*360000+b2*6000+b3*100+b4;
- if (!stream_read_line (st, line, LINE_LEN)) return NULL;
+ if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL;
p=q=line;
for (current->lines=1; current->lines < SUB_MAX_TEXT; current->lines++) {
@@ -358,21 +358,21 @@ static subtitle *sub_read_line_subrip(stream_t* st, subtitle *current) {
return current;
}
-static subtitle *sub_read_line_subviewer(stream_t *st,subtitle *current) {
+static subtitle *sub_read_line_subviewer(stream_t *st,subtitle *current, int utf16) {
char line[LINE_LEN+1];
int a1,a2,a3,a4,b1,b2,b3,b4;
char *p=NULL;
int i,len;
while (!current->text[0]) {
- if (!stream_read_line (st, line, LINE_LEN)) return NULL;
+ if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL;
if ((len=sscanf (line, "%d:%d:%d%[,.:]%d --> %d:%d:%d%[,.:]%d",&a1,&a2,&a3,(char *)&i,&a4,&b1,&b2,&b3,(char *)&i,&b4)) < 10)
continue;
current->start = a1*360000+a2*6000+a3*100+a4/10;
current->end = b1*360000+b2*6000+b3*100+b4/10;
for (i=0; i<SUB_MAX_TEXT;) {
int blank = 1;
- if (!stream_read_line (st, line, LINE_LEN)) break;
+ if (!stream_read_line (st, line, LINE_LEN, utf16)) break;
len=0;
for (p=line; *p!='\n' && *p!='\r' && *p; p++,len++)
if (*p != ' ' && *p != '\t')
@@ -410,21 +410,21 @@ static subtitle *sub_read_line_subviewer(stream_t *st,subtitle *current) {
return current;
}
-static subtitle *sub_read_line_subviewer2(stream_t *st,subtitle *current) {
+static subtitle *sub_read_line_subviewer2(stream_t *st,subtitle *current, int utf16) {
char line[LINE_LEN+1];
int a1,a2,a3,a4;
char *p=NULL;
int i,len;
while (!current->text[0]) {
- if (!stream_read_line (st, line, LINE_LEN)) return NULL;
+ if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL;
if (line[0]!='{')
continue;
if ((len=sscanf (line, "{T %d:%d:%d:%d",&a1,&a2,&a3,&a4)) < 4)
continue;
current->start = a1*360000+a2*6000+a3*100+a4/10;
for (i=0; i<SUB_MAX_TEXT;) {
- if (!stream_read_line (st, line, LINE_LEN)) break;
+ if (!stream_read_line (st, line, LINE_LEN, utf16)) break;
if (line[0]=='}') break;
len=0;
for (p=line; *p!='\n' && *p!='\r' && *p; ++p,++len);
@@ -443,14 +443,14 @@ static subtitle *sub_read_line_subviewer2(stream_t *st,subtitle *current) {
}
-static subtitle *sub_read_line_vplayer(stream_t *st,subtitle *current) {
+static subtitle *sub_read_line_vplayer(stream_t *st,subtitle *current, int utf16) {
char line[LINE_LEN+1];
int a1,a2,a3;
char *p=NULL, *next,separator;
int i,len,plen;
while (!current->text[0]) {
- if (!stream_read_line (st, line, LINE_LEN)) return NULL;
+ if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL;
if ((len=sscanf (line, "%d:%d:%d%c%n",&a1,&a2,&a3,&separator,&plen)) < 4)
continue;
@@ -489,7 +489,7 @@ static subtitle *sub_read_line_vplayer(stream_t *st,subtitle *current) {
return current;
}
-static subtitle *sub_read_line_rt(stream_t *st,subtitle *current) {
+static subtitle *sub_read_line_rt(stream_t *st,subtitle *current, int utf16) {
//TODO: This format uses quite rich (sub/super)set of xhtml
// I couldn't check it since DTD is not included.
// WARNING: full XML parses can be required for proper parsing
@@ -499,7 +499,7 @@ static subtitle *sub_read_line_rt(stream_t *st,subtitle *current) {
int i,len,plen;
while (!current->text[0]) {
- if (!stream_read_line (st, line, LINE_LEN)) return NULL;
+ if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL;
//TODO: it seems that format of time is not easily determined, it may be 1:12, 1:12.0 or 0:1:12.0
//to describe the same moment in time. Maybe there are even more formats in use.
//if ((len=sscanf (line, "<Time Begin=\"%d:%d:%d.%d\" End=\"%d:%d:%d.%d\"",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4)) < 8)
@@ -539,7 +539,7 @@ static subtitle *sub_read_line_rt(stream_t *st,subtitle *current) {
return current;
}
-static subtitle *sub_read_line_ssa(stream_t *st,subtitle *current) {
+static subtitle *sub_read_line_ssa(stream_t *st,subtitle *current, int utf16) {
/*
* Sub Station Alpha v4 (and v2?) scripts have 9 commas before subtitle
* other Sub Station Alpha scripts have only 8 commas before subtitle
@@ -563,7 +563,7 @@ static subtitle *sub_read_line_ssa(stream_t *st,subtitle *current) {
char *tmp;
do {
- if (!stream_read_line (st, line, LINE_LEN)) return NULL;
+ if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL;
} while (sscanf (line, "Dialogue: Marked=%d,%d:%d:%d.%d,%d:%d:%d.%d,"
"%[^\n\r]", &nothing,
&hour1, &min1, &sec1, &hunsec1,
@@ -640,11 +640,11 @@ static void sub_pp_ssa(subtitle *sub) {
*
* by set, based on code by szabi (dunnowhat sub format ;-)
*/
-static subtitle *sub_read_line_pjs(stream_t *st,subtitle *current) {
+static subtitle *sub_read_line_pjs(stream_t *st,subtitle *current, int utf16) {
char line[LINE_LEN+1];
char text[LINE_LEN+1], *s, *d;
- if (!stream_read_line (st, line, LINE_LEN))
+ if (!stream_read_line (st, line, LINE_LEN, utf16))
return NULL;
/* skip spaces */
for (s=line; *s && isspace(*s); s++);
@@ -678,7 +678,7 @@ static subtitle *sub_read_line_pjs(stream_t *st,subtitle *current) {
return current;
}
-static subtitle *sub_read_line_mpsub(stream_t *st, subtitle *current) {
+static subtitle *sub_read_line_mpsub(stream_t *st, subtitle *current, int utf16) {
char line[LINE_LEN+1];
float a,b;
int num=0;
@@ -686,7 +686,7 @@ static subtitle *sub_read_line_mpsub(stream_t *st, subtitle *current) {
do
{
- if (!stream_read_line(st, line, LINE_LEN)) return NULL;
+ if (!stream_read_line(st, line, LINE_LEN, utf16)) return NULL;
} while (sscanf (line, "%f %f", &a, &b) !=2);
mpsub_position += a*mpsub_multiplier;
@@ -695,7 +695,7 @@ static subtitle *sub_read_line_mpsub(stream_t *st, subtitle *current) {
current->end=(int) mpsub_position;
while (num < SUB_MAX_TEXT) {
- if (!stream_read_line (st, line, LINE_LEN)) {
+ if (!stream_read_line (st, line, LINE_LEN, utf16)) {
if (num == 0) return NULL;
else return current;
}
@@ -723,14 +723,14 @@ static subtitle *sub_read_line_mpsub(stream_t *st, subtitle *current) {
subtitle *previous_aqt_sub = NULL;
#endif
-static subtitle *sub_read_line_aqt(stream_t *st,subtitle *current) {
+static subtitle *sub_read_line_aqt(stream_t *st,subtitle *current, int utf16) {
char line[LINE_LEN+1];
char *next;
int i;
while (1) {
// try to locate next subtitle
- if (!stream_read_line (st, line, LINE_LEN))
+ if (!stream_read_line (st, line, LINE_LEN, utf16))
return NULL;
if (!(sscanf (line, "-->> %ld", &(current->start)) <1))
break;
@@ -745,14 +745,14 @@ static subtitle *sub_read_line_aqt(stream_t *st,subtitle *current) {
previous_aqt_sub = current;
#endif
- if (!stream_read_line (st, line, LINE_LEN))
+ if (!stream_read_line (st, line, LINE_LEN, utf16))
return NULL;
sub_readtext((char *) &line,&current->text[0]);
current->lines = 1;
current->end = current->start; // will be corrected by next subtitle
- if (!stream_read_line (st, line, LINE_LEN))
+ if (!stream_read_line (st, line, LINE_LEN, utf16))
return current;
next = line,i=1;
@@ -780,7 +780,7 @@ static subtitle *sub_read_line_aqt(stream_t *st,subtitle *current) {
subtitle *previous_subrip09_sub = NULL;
#endif
-static subtitle *sub_read_line_subrip09(stream_t *st,subtitle *current) {
+static subtitle *sub_read_line_subrip09(stream_t *st,subtitle *current, int utf16) {
char line[LINE_LEN+1];
int a1,a2,a3;
char * next=NULL;
@@ -788,7 +788,7 @@ static subtitle *sub_read_line_subrip09(stream_t *st,subtitle *current) {
while (1) {
// try to locate next subtitle
- if (!stream_read_line (st, line, LINE_LEN))
+ if (!stream_read_line (st, line, LINE_LEN, utf16))
return NULL;
if (!((len=sscanf (line, "[%d:%d:%d]",&a1,&a2,&a3)) < 3))
break;
@@ -805,7 +805,7 @@ static subtitle *sub_read_line_subrip09(stream_t *st,subtitle *current) {
previous_subrip09_sub = current;
#endif
- if (!stream_read_line (st, line, LINE_LEN))
+ if (!stream_read_line (st, line, LINE_LEN, utf16))
return NULL;
next = line,i=0;
@@ -832,7 +832,7 @@ static subtitle *sub_read_line_subrip09(stream_t *st,subtitle *current) {
return current;
}
-static subtitle *sub_read_line_jacosub(stream_t* st, subtitle * current)
+static subtitle *sub_read_line_jacosub(stream_t* st, subtitle * current, int utf16)
{
char line1[LINE_LEN], line2[LINE_LEN], directive[LINE_LEN], *p, *q;
unsigned a1, a2, a3, a4, b1, b2, b3, b4, comment = 0;
@@ -844,7 +844,7 @@ static subtitle *sub_read_line_jacosub(stream_t* st, subtitle * current)
memset(line2, 0, LINE_LEN);
memset(directive, 0, LINE_LEN);
while (!current->text[0]) {
- if (!stream_read_line(st, line1, LINE_LEN)) {
+ if (!stream_read_line(st, line1, LINE_LEN, utf16)) {
return NULL;
}
if (sscanf
@@ -1002,7 +1002,7 @@ static subtitle *sub_read_line_jacosub(stream_t* st, subtitle * current)
(*(p + 1) == '~') || (*(p + 1) == '{')) {
++p;
} else if (eol(*(p + 1))) {
- if (!stream_read_line(st, directive, LINE_LEN))
+ if (!stream_read_line(st, directive, LINE_LEN, utf16))
return NULL;
trail_space(directive);
av_strlcat(line2, directive, LINE_LEN);
@@ -1022,14 +1022,14 @@ static subtitle *sub_read_line_jacosub(stream_t* st, subtitle * current)
return current;
}
-static int sub_autodetect (stream_t* st, int *uses_time) {
+static int sub_autodetect (stream_t* st, int *uses_time, int utf16) {
char line[LINE_LEN+1];
int i,j=0;
char p;
while (j < 100) {
j++;
- if (!stream_read_line (st, line, LINE_LEN))
+ if (!stream_read_line (st, line, LINE_LEN, utf16))
return SUB_INVALID;
if (sscanf (line, "{%d}{%d}", &i, &i)==2)
@@ -1283,7 +1283,7 @@ static void adjust_subs_time(subtitle* sub, float subtime, float fps, int block,
}
struct subreader {
- subtitle * (*read)(stream_t *st,subtitle *dest);
+ subtitle * (*read)(stream_t *st,subtitle *dest,int utf16);
void (*post)(subtitle *dest);
const char *name;
};
@@ -1350,6 +1350,7 @@ const char* guess_cp(stream_t *st, const char *preferred_language, const char *f
#endif
sub_data* sub_read_file (char *filename, float fps) {
+ int utf16;
stream_t* fd;
int n_max, n_first, i, j, sub_first, sub_orig;
subtitle *first, *second, *sub, *return_sub, *alloced_sub = NULL;
@@ -1378,15 +1379,19 @@ sub_data* sub_read_file (char *filename, float fps) {
i = 0;
fd=open_stream (filename, NULL, &i); if (!fd) return NULL;
- sub_format=sub_autodetect (fd, &uses_time);
+ sub_format = SUB_INVALID;
+ for (utf16 = 0; sub_format == SUB_INVALID && utf16 < 3; utf16++) {
+ sub_format=sub_autodetect (fd, &uses_time, utf16);
+ stream_reset(fd);
+ stream_seek(fd,0);
+ }
+ utf16--;
+
mpsub_multiplier = (uses_time ? 100.0 : 1.0);
if (sub_format==SUB_INVALID) {mp_msg(MSGT_SUBREADER,MSGL_WARN,"SUB: Could not determine file format\n");return NULL;}
srp=sr+sub_format;
mp_msg(MSGT_SUBREADER, MSGL_V, "SUB: Detected subtitle file format: %s\n", srp->name);
- stream_reset(fd);
- stream_seek(fd,0);
-
#ifdef CONFIG_ICONV
sub_utf8_prev=sub_utf8;
{
@@ -1430,7 +1435,7 @@ sub_data* sub_read_file (char *filename, float fps) {
sub = &first[sub_num];
#endif
memset(sub, '\0', sizeof(subtitle));
- sub=srp->read(fd,sub);
+ sub=srp->read(fd,sub,utf16);
if(!sub) break; // EOF
#ifdef CONFIG_ICONV
if ((sub!=ERR) && sub_utf8 == 2) sub=subcp_recode(sub);