diff options
author | henry <henry@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2004-05-08 17:52:25 +0000 |
---|---|---|
committer | henry <henry@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2004-05-08 17:52:25 +0000 |
commit | c1f6ffe35dc2a0094a751d44565480d60addcf8c (patch) | |
tree | b9141861a371fa90ecf0670c821cb99394102c98 /subreader.c | |
parent | 86b61290762a574b8e86fdeb708f116e8f9f1a14 (diff) | |
download | mpv-c1f6ffe35dc2a0094a751d44565480d60addcf8c.tar.bz2 mpv-c1f6ffe35dc2a0094a751d44565480d60addcf8c.tar.xz |
ENCA support (http://trific.ath.cx/software/enca/)
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@12444 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'subreader.c')
-rw-r--r-- | subreader.c | 89 |
1 files changed, 85 insertions, 4 deletions
diff --git a/subreader.c b/subreader.c index 430cd66dae..b492705f46 100644 --- a/subreader.c +++ b/subreader.c @@ -19,6 +19,10 @@ #include "mp_msg.h" #include "subreader.h" +#ifdef HAVE_ENCA +#include <enca.h> +#endif + #define ERR ((void *) -1) #ifdef USE_ICONV @@ -1037,12 +1041,30 @@ extern float sub_fps; #ifdef USE_ICONV static iconv_t icdsc = (iconv_t)(-1); -void subcp_open (void) +#ifdef HAVE_ENCA +void subcp_open_noenca () +{ + char enca_lang[100], enca_fallback[100]; + if (sscanf(sub_cp, "enca:%2s:%s", enca_lang, enca_fallback) == 2 + || sscanf(sub_cp, "ENCA:%2s:%s", enca_lang, enca_fallback) == 2) { + subcp_open(enca_fallback); + } else { + subcp_open(sub_cp); + } +} +#else +void subcp_open_noenca () +{ + subcp_open(sub_cp); +} +#endif + +void subcp_open (char *current_sub_cp) { char *tocp = "UTF-8"; - if (sub_cp){ - if ((icdsc = iconv_open (tocp, sub_cp)) != (iconv_t)(-1)){ + if (current_sub_cp){ + if ((icdsc = iconv_open (tocp, current_sub_cp)) != (iconv_t)(-1)){ mp_msg(MSGT_SUBREADER,MSGL_V,"SUB: opened iconv descriptor.\n"); sub_utf8 = 2; } else @@ -1246,13 +1268,56 @@ struct subreader { const char *name; }; +#ifdef HAVE_ENCA +#define MAX_GUESS_BUFFER_SIZE (256*1024) +void* guess_cp(FILE *fd, char *preferred_language, char *fallback) +{ + const char **languages; + size_t langcnt, buflen; + EncaAnalyser analyser; + EncaEncoding encoding; + unsigned char *buffer; + char *detected_sub_cp = NULL; + int i; + + buffer = (unsigned char*)malloc(MAX_GUESS_BUFFER_SIZE*sizeof(char)); + buflen = fread(buffer, 1, MAX_GUESS_BUFFER_SIZE, fd); + + languages = enca_get_languages(&langcnt); + mp_msg(MSGT_SUBREADER, MSGL_V, "ENCA supported languages: "); + for (i = 0; i < langcnt; i++) { + mp_msg(MSGT_SUBREADER, MSGL_V, "%s ", languages[i]); + } + mp_msg(MSGT_SUBREADER, MSGL_V, "\n"); + + for (i = 0; i < langcnt; i++) { + if (strcasecmp(languages[i], preferred_language) != 0) continue; + analyser = enca_analyser_alloc(languages[i]); + encoding = enca_analyse_const(analyser, buffer, buflen); + mp_msg(MSGT_SUBREADER, MSGL_INFO, "ENCA detected charset: %s\n", enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV)); + detected_sub_cp = strdup(enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV)); + enca_analyser_free(analyser); + } + + free(languages); + free(buffer); + rewind(fd); + + if (!detected_sub_cp) detected_sub_cp = strdup(fallback); + + return detected_sub_cp; +} +#endif + sub_data* sub_read_file (char *filename, float fps) { //filename is assumed to be malloc'ed, free() is used in sub_free() FILE *fd; int n_max, n_first, i, j, sub_first, sub_orig; subtitle *first, *second, *sub, *return_sub; sub_data *subt_data; + char enca_lang[100], enca_fallback[100]; int uses_time = 0, sub_num = 0, sub_errs = 0; + char *current_sub_cp=NULL; struct subreader sr[]= { { sub_read_line_microdvd, NULL, "microdvd" }, @@ -1283,6 +1348,17 @@ sub_data* sub_read_file (char *filename, float fps) { rewind (fd); +#ifdef HAVE_ENCA + if (sscanf(sub_cp, "enca:%2s:%s", enca_lang, enca_fallback) == 2 + || sscanf(sub_cp, "ENCA:%2s:%s", enca_lang, enca_fallback) == 2) { + current_sub_cp = guess_cp(fd, enca_lang, enca_fallback); + } else { + current_sub_cp = strdup(sub_cp); + } +#else + current_sub_cp = strdup(sub_cp); +#endif + #ifdef USE_ICONV sub_utf8_prev=sub_utf8; { @@ -1296,9 +1372,10 @@ sub_data* sub_read_file (char *filename, float fps) { break; } } - if (k<0) subcp_open(); + if (k<0) subcp_open(current_sub_cp); } #endif + if (current_sub_cp) free(current_sub_cp); sub_num=0;n_max=32; first=(subtitle *)malloc(n_max*sizeof(subtitle)); @@ -1790,7 +1867,11 @@ char** sub_filenames(char* path, char *fname) // does it end with a subtitle extension? found = 0; #ifdef USE_ICONV +#ifdef HAVE_ENCA + for (i = ((sub_cp && strncasecmp(sub_cp, "enca", 4) != 0) ? 3 : 0); sub_exts[i]; i++) { +#else for (i = (sub_cp ? 3 : 0); sub_exts[i]; i++) { +#endif #else for (i = 0; sub_exts[i]; i++) { #endif |