summaryrefslogtreecommitdiffstats
path: root/libass
diff options
context:
space:
mode:
authoreugeni <eugeni@b3059339-0415-0410-9bf9-f77b7e298cf2>2006-08-22 22:11:01 +0000
committereugeni <eugeni@b3059339-0415-0410-9bf9-f77b7e298cf2>2006-08-22 22:11:01 +0000
commit5762122f3768886f24446588f8f1006b660abf8b (patch)
tree26f63561802f80e82d12687a2818f18395353558 /libass
parent7764a187e0449a9a205f3c6172de8934116682a4 (diff)
downloadmpv-5762122f3768886f24446588f8f1006b660abf8b.tar.bz2
mpv-5762122f3768886f24446588f8f1006b660abf8b.tar.xz
SSA/ASS parser reworked, with 2 main results:
support for script embedded fonts (fonts, uuencoded directly into script) added; matroska interface functions have got more sensible names. git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@19498 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'libass')
-rw-r--r--libass/ass.c342
-rw-r--r--libass/ass.h4
-rw-r--r--libass/ass_types.h6
3 files changed, 237 insertions, 115 deletions
diff --git a/libass/ass.c b/libass/ass.c
index e5f9f5b8f4..f0a9bb1843 100644
--- a/libass/ass.c
+++ b/libass/ass.c
@@ -18,6 +18,7 @@
#include <iconv.h>
extern char *sub_cp;
#endif
+extern int extract_embedded_fonts;
#include "mp_msg.h"
#include "ass.h"
@@ -26,12 +27,27 @@ extern char *sub_cp;
char *get_path(char *);
+struct parser_priv_s {
+ enum {PST_UNKNOWN = 0, PST_INFO, PST_STYLES, PST_EVENTS, PST_FONTS} state;
+ char* fontname;
+ char* fontdata;
+ int fontdata_size;
+ int fontdata_used;
+};
+
#define ASS_STYLES_ALLOC 20
#define ASS_EVENTS_ALLOC 200
void ass_free_track(ass_track_t* track) {
int i;
+ if (track->parser_priv) {
+ if (track->parser_priv->fontname)
+ free(track->parser_priv->fontname);
+ if (track->parser_priv->fontdata)
+ free(track->parser_priv->fontdata);
+ free(track->parser_priv);
+ }
if (track->style_format)
free(track->style_format);
if (track->event_format)
@@ -379,67 +395,207 @@ static int process_style(ass_track_t* track, char *str)
}
-/**
- * \brief Parse a header line
- * \param track track
- * \param str string to parse, zero-terminated
-*/
-static int process_header_line(ass_track_t* track, char *str)
+static int process_styles_line(ass_track_t* track, char *str)
{
- static int events_section_started = 0;
-
- mp_msg(MSGT_GLOBAL, MSGL_DBG2, "=== Header: %s\n", str);
- if (strncmp(str, "PlayResX:", 9)==0) {
- track->PlayResX = atoi(str + 9);
- } else if (strncmp(str,"PlayResY:", 9)==0) {
- track->PlayResY = atoi(str + 9);
- } else if (strncmp(str,"Timer:", 6)==0) {
- track->Timer = atof(str + 6);
- } else if (strstr(str,"Styles]")) {
- events_section_started = 0;
- if (strchr(str, '+'))
- track->track_type = TRACK_TYPE_ASS;
- else
- track->track_type = TRACK_TYPE_SSA;
- } else if (strncmp(str,"[Events]", 8)==0) {
- events_section_started = 1;
- } else if (strncmp(str,"Format:", 7)==0) {
+ if (!strncmp(str,"Format:", 7)) {
char* p = str + 7;
skip_spaces(&p);
- if (events_section_started) {
- track->event_format = strdup(p);
- mp_msg(MSGT_GLOBAL, MSGL_DBG2, "Event format: %s\n", track->event_format);
- } else {
- track->style_format = strdup(p);
- mp_msg(MSGT_GLOBAL, MSGL_DBG2, "Style format: %s\n", track->style_format);
- }
- } else if (strncmp(str,"Style:", 6)==0) {
+ track->style_format = strdup(p);
+ mp_msg(MSGT_GLOBAL, MSGL_DBG2, "Style format: %s\n", track->style_format);
+ } else if (!strncmp(str,"Style:", 6)) {
char* p = str + 6;
skip_spaces(&p);
process_style(track, p);
- } else if (strncmp(str,"WrapStyle:", 10)==0) {
+ }
+ return 0;
+}
+
+static int process_info_line(ass_track_t* track, char *str)
+{
+ if (!strncmp(str, "PlayResX:", 9)) {
+ track->PlayResX = atoi(str + 9);
+ } else if (!strncmp(str,"PlayResY:", 9)) {
+ track->PlayResY = atoi(str + 9);
+ } else if (!strncmp(str,"Timer:", 6)) {
+ track->Timer = atof(str + 6);
+ } else if (!strncmp(str,"WrapStyle:", 10)) {
track->WrapStyle = atoi(str + 10);
}
return 0;
}
+static int process_events_line(ass_track_t* track, char *str)
+{
+ if (!strncmp(str, "Format:", 7)) {
+ char* p = str + 7;
+ skip_spaces(&p);
+ track->event_format = strdup(p);
+ mp_msg(MSGT_GLOBAL, MSGL_DBG2, "Event format: %s\n", track->event_format);
+ } else if (!strncmp(str, "Dialogue:", 9)) {
+ // This should never be reached for embedded subtitles.
+ // They have slightly different format and are parsed in ass_process_chunk,
+ // called directly from demuxer
+ int eid;
+ ass_event_t* event;
+
+ str += 9;
+ skip_spaces(&str);
+
+ eid = ass_alloc_event(track);
+ event = track->events + eid;
+
+ process_event_tail(track, event, str, 0);
+ } else {
+ mp_msg(MSGT_GLOBAL, MSGL_V, "Not understood: %s \n", str);
+ }
+ return 0;
+}
+
+// Copied from mkvtoolnix
+static unsigned char* decode_chars(unsigned char c1, unsigned char c2,
+ unsigned char c3, unsigned char c4, unsigned char* dst, int cnt)
+{
+ uint32_t value;
+ unsigned char bytes[3];
+ int i;
+
+ value = ((c1 - 33) << 18) + ((c2 - 33) << 12) + ((c3 - 33) << 6) + (c4 - 33);
+ bytes[2] = value & 0xff;
+ bytes[1] = (value & 0xff00) >> 8;
+ bytes[0] = (value & 0xff0000) >> 16;
+
+ for (i = 0; i < cnt; ++i)
+ *dst++ = bytes[i];
+ return dst;
+}
+
+static int decode_font(ass_track_t* track)
+{
+ unsigned char* p;
+ unsigned char* q;
+ int i;
+ int size; // original size
+ int dsize; // decoded size
+ unsigned char* buf = 0;
+
+ mp_msg(MSGT_GLOBAL, MSGL_V, "font: %d bytes encoded data \n", track->parser_priv->fontdata_used);
+ size = track->parser_priv->fontdata_used;
+ if (size % 4 == 1) {
+ mp_msg(MSGT_GLOBAL, MSGL_ERR, "bad encoded data size\n");
+ goto error_decode_font;
+ }
+ buf = malloc(size / 4 * 3 + 2);
+ q = buf;
+ for (i = 0, p = (unsigned char*)track->parser_priv->fontdata; i < size / 4; i++, p+=4) {
+ q = decode_chars(p[0], p[1], p[2], p[3], q, 3);
+ }
+ if (size % 4 == 2) {
+ q = decode_chars(p[0], p[1], 0, 0, q, 1);
+ } else if (size % 4 == 3) {
+ q = decode_chars(p[0], p[1], p[2], 0, q, 2);
+ }
+ dsize = q - buf;
+ assert(dsize <= size / 4 * 3 + 2);
+
+ if (extract_embedded_fonts)
+ ass_process_font(track->parser_priv->fontname, (char*)buf, dsize);
+
+error_decode_font:
+ if (buf) free(buf);
+ free(track->parser_priv->fontname);
+ free(track->parser_priv->fontdata);
+ track->parser_priv->fontname = 0;
+ track->parser_priv->fontdata = 0;
+ track->parser_priv->fontdata_size = 0;
+ track->parser_priv->fontdata_used = 0;
+ return 0;
+}
+
+static char* validate_fname(char* name);
+
+static int process_fonts_line(ass_track_t* track, char *str)
+{
+ int len;
+
+ if (!strncmp(str, "fontname:", 9)) {
+ char* p = str + 9;
+ skip_spaces(&p);
+ if (track->parser_priv->fontname) {
+ decode_font(track);
+ }
+ track->parser_priv->fontname = validate_fname(p);
+ mp_msg(MSGT_GLOBAL, MSGL_V, "fontname: %s\n", track->parser_priv->fontname);
+ return 0;
+ }
+
+ if (!track->parser_priv->fontname) {
+ mp_msg(MSGT_GLOBAL, MSGL_V, "Not understood: %s \n", str);
+ return 0;
+ }
+
+ len = strlen(str);
+ if (len > 80) {
+ mp_msg(MSGT_GLOBAL, MSGL_WARN, "Font line too long: %d, %s\n", len, str);
+ return 0;
+ }
+ if (track->parser_priv->fontdata_used + len > track->parser_priv->fontdata_size) {
+ track->parser_priv->fontdata_size += 100 * 1024;
+ track->parser_priv->fontdata = realloc(track->parser_priv->fontdata, track->parser_priv->fontdata_size);
+ }
+ memcpy(track->parser_priv->fontdata + track->parser_priv->fontdata_used, str, len);
+ track->parser_priv->fontdata_used += len;
+
+ return 0;
+}
+
/**
- * \brief Process CodecPrivate section of subtitle stream
+ * \brief Parse a header line
* \param track track
- * \param data string to parse
- * \param size length of data
- CodecPrivate section contains [Stream Info] and [V4+ Styles] sections
+ * \param str string to parse, zero-terminated
*/
-void ass_process_chunk(ass_track_t* track, char *data, int size)
+static int process_line(ass_track_t* track, char *str)
{
- char* str = malloc(size + 1);
- char* p;
- int sid;
+ if (strstr(str, "[Script Info]")) { // FIXME: strstr to skip possible BOM at the beginning of the script
+ track->parser_priv->state = PST_INFO;
+ } else if (!strncmp(str, "[V4 Styles]", 11)) {
+ track->parser_priv->state = PST_STYLES;
+ track->track_type = TRACK_TYPE_SSA;
+ } else if (!strncmp(str, "[V4+ Styles]", 12)) {
+ track->parser_priv->state = PST_STYLES;
+ track->track_type = TRACK_TYPE_ASS;
+ } else if (!strncmp(str, "[Events]", 8)) {
+ track->parser_priv->state = PST_EVENTS;
+ } else if (!strncmp(str, "[Fonts]", 7)) {
+ track->parser_priv->state = PST_FONTS;
+ } else {
+ switch (track->parser_priv->state) {
+ case PST_INFO:
+ process_info_line(track, str);
+ break;
+ case PST_STYLES:
+ process_styles_line(track, str);
+ break;
+ case PST_EVENTS:
+ process_events_line(track, str);
+ break;
+ case PST_FONTS:
+ process_fonts_line(track, str);
+ break;
+ default:
+ break;
+ }
+ }
- memcpy(str, data, size);
- str[size] = '\0';
+ // there is no explicit end-of-font marker in ssa/ass
+ if ((track->parser_priv->state != PST_FONTS) && (track->parser_priv->fontname))
+ decode_font(track);
- p = str;
+ return 0;
+}
+
+static int process_text(ass_track_t* track, char* str)
+{
+ char* p = str;
while(1) {
char* q;
for (;((*p=='\r')||(*p=='\n'));++p) {}
@@ -448,11 +604,30 @@ void ass_process_chunk(ass_track_t* track, char *data, int size)
break;
if (*q != '\0')
*(q++) = '\0';
- process_header_line(track, p);
+ process_line(track, p);
if (*q == '\0')
break;
p = q;
}
+ return 0;
+}
+
+/**
+ * \brief Process CodecPrivate section of subtitle stream
+ * \param track track
+ * \param data string to parse
+ * \param size length of data
+ CodecPrivate section contains [Stream Info] and [V4+ Styles] ([V4 Styles] for SSA) sections
+*/
+void ass_process_codec_private(ass_track_t* track, char *data, int size)
+{
+ char* str = malloc(size + 1);
+ int sid;
+
+ memcpy(str, data, size);
+ str[size] = '\0';
+
+ process_text(track, str);
free(str);
// add "Default" style to the end
@@ -464,6 +639,7 @@ void ass_process_chunk(ass_track_t* track, char *data, int size)
if (!track->event_format) {
// probably an mkv produced by ancient mkvtoolnix
// such files don't have [Events] and Format: headers
+ track->parser_priv->state = PST_EVENTS;
if (track->track_type == TRACK_TYPE_SSA)
track->event_format = strdup("Format: Marked, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text");
else
@@ -488,7 +664,7 @@ static int check_duplicate_event(ass_track_t* track, int ReadOrder)
* \param timecode starting time of the event (milliseconds)
* \param duration duration of the event (milliseconds)
*/
-void ass_process_line(ass_track_t* track, char *data, int size, long long timecode, long long duration)
+void ass_process_chunk(ass_track_t* track, char *data, int size, long long timecode, long long duration)
{
char* str;
int eid;
@@ -535,29 +711,6 @@ void ass_process_line(ass_track_t* track, char *data, int size, long long timeco
free(str);
}
-/**
- * \brief Process a line from external file.
- * \param track track
- * \param str string to parse
- * \param size length of data
-*/
-static void ass_process_external_line(ass_track_t* track, char *str, int size)
-{
- int eid;
- ass_event_t* event;
-
- eid = ass_alloc_event(track);
- event = track->events + eid;
-
- if (strncmp("Dialogue:", str, 9) != 0)
- return;
-
- str += 9;
- while (*str == ' ') {++str;}
-
- process_event_tail(track, event, str, 0);
-}
-
#ifdef USE_ICONV
/** \brief recode buffer to utf-8
* constraint: sub_cp != 0
@@ -641,8 +794,6 @@ ass_track_t* ass_read_file(char* fname)
long sz;
long bytes_read;
char* buf;
- char* p;
- int events_reached;
ass_track_t* track;
FILE* fp = fopen(fname, "rb");
@@ -698,49 +849,15 @@ ass_track_t* ass_read_file(char* fname)
track->name = strdup(fname);
// process header
- events_reached = 0;
- p = buf;
- while (p && (*p)) {
- while (*p == '\n') {++p;}
- if (strncmp(p, "[Events]", 8) == 0) {
- events_reached = 1;
- } else if ((strncmp(p, "Format:", 7) == 0) && (events_reached)) {
- p = strchr(p, '\n');
- if (p == 0) {
- mp_msg(MSGT_GLOBAL, MSGL_WARN, "Incomplete subtitles\n");
- free(buf);
- return 0;
- }
- ass_process_chunk(track, buf, p - buf + 1);
- ++p;
- break;
- }
- p = strchr(p, '\n');
- }
- // process events
- while (p && (*p)) {
- char* next;
- int len;
- while (*p == '\n') {++p;}
- next = strchr(p, '\n');
- len = 0;
- if (next) {
- len = next - p;
- *next = 0;
- } else {
- len = strlen(p);
- }
- ass_process_external_line(track, p, len);
- if (next) {
- p = next + 1;
- continue;
- } else
- break;
- }
-
+ process_text(track, buf);
+
+ // there is no explicit end-of-font marker in ssa/ass
+ if (track->parser_priv->fontname)
+ decode_font(track);
+
free(buf);
- if (!events_reached) {
+ if (track->track_type == TRACK_TYPE_UNKNOWN) {
ass_free_track(track);
return 0;
}
@@ -853,6 +970,7 @@ long long ass_step_sub(ass_track_t* track, long long now, int movement) {
ass_track_t* ass_new_track(void) {
ass_track_t* track = calloc(1, sizeof(ass_track_t));
+ track->parser_priv = calloc(1, sizeof(parser_priv_t));
return track;
}
diff --git a/libass/ass.h b/libass/ass.h
index 54fef6a719..493ba2f379 100644
--- a/libass/ass.h
+++ b/libass/ass.h
@@ -131,7 +131,7 @@ void ass_free_event(ass_track_t* track, int eid);
* \param data string to parse
* \param size length of data
*/
-void ass_process_chunk(ass_track_t* track, char *data, int size);
+void ass_process_codec_private(ass_track_t* track, char *data, int size);
/**
* \brief Process a chunk of subtitle stream data. In matroska, this containes exactly 1 event (or a commentary)
@@ -141,7 +141,7 @@ void ass_process_chunk(ass_track_t* track, char *data, int size);
* \param timecode starting time of the event (milliseconds)
* \param duration duration of the event (milliseconds)
*/
-void ass_process_line(ass_track_t* track, char *data, int size, long long timecode, long long duration);
+void ass_process_chunk(ass_track_t* track, char *data, int size, long long timecode, long long duration);
/**
* \brief Read subtitles from file.
diff --git a/libass/ass_types.h b/libass/ass_types.h
index d742803359..1743bfec10 100644
--- a/libass/ass_types.h
+++ b/libass/ass_types.h
@@ -53,6 +53,8 @@ typedef struct ass_event_s {
char* Text;
} ass_event_t;
+typedef struct parser_priv_s parser_priv_t;
+
/// ass track represent either an external script or a matroska subtitle stream (no real difference between them)
/// it can be used in rendering after the headers are parsed (i.e. events format line read)
typedef struct ass_track_s {
@@ -66,7 +68,7 @@ typedef struct ass_track_s {
char* style_format; // style format line (everything after "Format: ")
char* event_format; // event format line
- enum {TRACK_TYPE_ASS, TRACK_TYPE_SSA} track_type;
+ enum {TRACK_TYPE_UNKNOWN = 0, TRACK_TYPE_ASS, TRACK_TYPE_SSA} track_type;
// script header fields
int PlayResX;
@@ -77,6 +79,8 @@ typedef struct ass_track_s {
int default_style; // index of default style
char* name; // file name in case of external subs, 0 for streams
+
+ parser_priv_t* parser_priv;
} ass_track_t;
#endif