diff options
author | wm4 <wm4@nowhere> | 2013-06-24 23:06:34 +0200 |
---|---|---|
committer | wm4 <wm4@nowhere> | 2013-06-25 00:11:57 +0200 |
commit | f48829b546095bd33e243332c965ab58d6481160 (patch) | |
tree | cb49baed797bfef8aa173a776f0175971c59caf0 /core | |
parent | 709389ce653d5ab11abf8de067cfb5932e642898 (diff) | |
download | mpv-f48829b546095bd33e243332c965ab58d6481160.tar.bz2 mpv-f48829b546095bd33e243332c965ab58d6481160.tar.xz |
sub: libguess support for -subcp
Actually this is rather disappointing.
Diffstat (limited to 'core')
-rw-r--r-- | core/charset_conv.c | 28 |
1 files changed, 27 insertions, 1 deletions
diff --git a/core/charset_conv.c b/core/charset_conv.c index 15209b30ea..680c8f83f9 100644 --- a/core/charset_conv.c +++ b/core/charset_conv.c @@ -31,6 +31,10 @@ #include <enca.h> #endif +#ifdef CONFIG_LIBGUESS +#include <libguess.h> +#endif + #ifdef CONFIG_ICONV #include <iconv.h> #endif @@ -67,7 +71,8 @@ bool mp_charset_requires_guess(const char *user_cp) { bstr res[2] = {{0}}; split_colon(user_cp, 2, res); - return bstrcasecmp0(res[0], "enca") == 0; + return bstrcasecmp0(res[0], "enca") == 0 || + bstrcasecmp0(res[0], "guess") == 0; } #ifdef CONFIG_ENCA @@ -102,6 +107,23 @@ static const char *enca_guess(bstr buf, const char *language) } #endif +#ifdef CONFIG_LIBGUESS +static const char *libguess_guess(bstr buf, const char *language) +{ + if (libguess_validate_utf8(buf.start, buf.len)) + return "UTF-8"; + + if (!language || !language[0] || strcmp(language, "help") == 0) { + mp_msg(MSGT_SUBREADER, MSGL_ERR, "libguess needs a language: " + "japanese taiwanese chinese korean russian arabic turkish " + "greek hebrew polish baltic\n"); + return NULL; + } + + return libguess_determine_encoding(buf.start, buf.len, language); +} +#endif + // Runs charset auto-detection on the input buffer, and returns the result. // If auto-detection fails, NULL is returned. // If user_cp doesn't refer to any known auto-detection (for example because @@ -126,6 +148,10 @@ const char *mp_charset_guess(bstr buf, const char *user_cp) if (bstrcasecmp0(type, "enca") == 0) res = enca_guess(buf, lang); #endif +#ifdef CONFIG_LIBGUESS + if (bstrcasecmp0(type, "guess") == 0) + res = libguess_guess(buf, lang); +#endif if (res) { mp_msg(MSGT_SUBREADER, MSGL_DBG2, "%.*s detected charset: '%s'\n", |