summaryrefslogtreecommitdiffstats
path: root/subreader.c
diff options
context:
space:
mode:
authorhenry <henry@b3059339-0415-0410-9bf9-f77b7e298cf2>2004-05-08 17:52:25 +0000
committerhenry <henry@b3059339-0415-0410-9bf9-f77b7e298cf2>2004-05-08 17:52:25 +0000
commitc1f6ffe35dc2a0094a751d44565480d60addcf8c (patch)
treeb9141861a371fa90ecf0670c821cb99394102c98 /subreader.c
parent86b61290762a574b8e86fdeb708f116e8f9f1a14 (diff)
downloadmpv-c1f6ffe35dc2a0094a751d44565480d60addcf8c.tar.bz2
mpv-c1f6ffe35dc2a0094a751d44565480d60addcf8c.tar.xz
ENCA support (http://trific.ath.cx/software/enca/)
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@12444 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'subreader.c')
-rw-r--r--subreader.c89
1 files changed, 85 insertions, 4 deletions
diff --git a/subreader.c b/subreader.c
index 430cd66dae..b492705f46 100644
--- a/subreader.c
+++ b/subreader.c
@@ -19,6 +19,10 @@
#include "mp_msg.h"
#include "subreader.h"
+#ifdef HAVE_ENCA
+#include <enca.h>
+#endif
+
#define ERR ((void *) -1)
#ifdef USE_ICONV
@@ -1037,12 +1041,30 @@ extern float sub_fps;
#ifdef USE_ICONV
static iconv_t icdsc = (iconv_t)(-1);
-void subcp_open (void)
+#ifdef HAVE_ENCA
+void subcp_open_noenca ()
+{
+ char enca_lang[100], enca_fallback[100];
+ if (sscanf(sub_cp, "enca:%2s:%s", enca_lang, enca_fallback) == 2
+ || sscanf(sub_cp, "ENCA:%2s:%s", enca_lang, enca_fallback) == 2) {
+ subcp_open(enca_fallback);
+ } else {
+ subcp_open(sub_cp);
+ }
+}
+#else
+void subcp_open_noenca ()
+{
+ subcp_open(sub_cp);
+}
+#endif
+
+void subcp_open (char *current_sub_cp)
{
char *tocp = "UTF-8";
- if (sub_cp){
- if ((icdsc = iconv_open (tocp, sub_cp)) != (iconv_t)(-1)){
+ if (current_sub_cp){
+ if ((icdsc = iconv_open (tocp, current_sub_cp)) != (iconv_t)(-1)){
mp_msg(MSGT_SUBREADER,MSGL_V,"SUB: opened iconv descriptor.\n");
sub_utf8 = 2;
} else
@@ -1246,13 +1268,56 @@ struct subreader {
const char *name;
};
+#ifdef HAVE_ENCA
+#define MAX_GUESS_BUFFER_SIZE (256*1024)
+void* guess_cp(FILE *fd, char *preferred_language, char *fallback)
+{
+ const char **languages;
+ size_t langcnt, buflen;
+ EncaAnalyser analyser;
+ EncaEncoding encoding;
+ unsigned char *buffer;
+ char *detected_sub_cp = NULL;
+ int i;
+
+ buffer = (unsigned char*)malloc(MAX_GUESS_BUFFER_SIZE*sizeof(char));
+ buflen = fread(buffer, 1, MAX_GUESS_BUFFER_SIZE, fd);
+
+ languages = enca_get_languages(&langcnt);
+ mp_msg(MSGT_SUBREADER, MSGL_V, "ENCA supported languages: ");
+ for (i = 0; i < langcnt; i++) {
+ mp_msg(MSGT_SUBREADER, MSGL_V, "%s ", languages[i]);
+ }
+ mp_msg(MSGT_SUBREADER, MSGL_V, "\n");
+
+ for (i = 0; i < langcnt; i++) {
+ if (strcasecmp(languages[i], preferred_language) != 0) continue;
+ analyser = enca_analyser_alloc(languages[i]);
+ encoding = enca_analyse_const(analyser, buffer, buflen);
+ mp_msg(MSGT_SUBREADER, MSGL_INFO, "ENCA detected charset: %s\n", enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV));
+ detected_sub_cp = strdup(enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV));
+ enca_analyser_free(analyser);
+ }
+
+ free(languages);
+ free(buffer);
+ rewind(fd);
+
+ if (!detected_sub_cp) detected_sub_cp = strdup(fallback);
+
+ return detected_sub_cp;
+}
+#endif
+
sub_data* sub_read_file (char *filename, float fps) {
//filename is assumed to be malloc'ed, free() is used in sub_free()
FILE *fd;
int n_max, n_first, i, j, sub_first, sub_orig;
subtitle *first, *second, *sub, *return_sub;
sub_data *subt_data;
+ char enca_lang[100], enca_fallback[100];
int uses_time = 0, sub_num = 0, sub_errs = 0;
+ char *current_sub_cp=NULL;
struct subreader sr[]=
{
{ sub_read_line_microdvd, NULL, "microdvd" },
@@ -1283,6 +1348,17 @@ sub_data* sub_read_file (char *filename, float fps) {
rewind (fd);
+#ifdef HAVE_ENCA
+ if (sscanf(sub_cp, "enca:%2s:%s", enca_lang, enca_fallback) == 2
+ || sscanf(sub_cp, "ENCA:%2s:%s", enca_lang, enca_fallback) == 2) {
+ current_sub_cp = guess_cp(fd, enca_lang, enca_fallback);
+ } else {
+ current_sub_cp = strdup(sub_cp);
+ }
+#else
+ current_sub_cp = strdup(sub_cp);
+#endif
+
#ifdef USE_ICONV
sub_utf8_prev=sub_utf8;
{
@@ -1296,9 +1372,10 @@ sub_data* sub_read_file (char *filename, float fps) {
break;
}
}
- if (k<0) subcp_open();
+ if (k<0) subcp_open(current_sub_cp);
}
#endif
+ if (current_sub_cp) free(current_sub_cp);
sub_num=0;n_max=32;
first=(subtitle *)malloc(n_max*sizeof(subtitle));
@@ -1790,7 +1867,11 @@ char** sub_filenames(char* path, char *fname)
// does it end with a subtitle extension?
found = 0;
#ifdef USE_ICONV
+#ifdef HAVE_ENCA
+ for (i = ((sub_cp && strncasecmp(sub_cp, "enca", 4) != 0) ? 3 : 0); sub_exts[i]; i++) {
+#else
for (i = (sub_cp ? 3 : 0); sub_exts[i]; i++) {
+#endif
#else
for (i = 0; sub_exts[i]; i++) {
#endif