From f48829b546095bd33e243332c965ab58d6481160 Mon Sep 17 00:00:00 2001 From: wm4 Date: Mon, 24 Jun 2013 23:06:34 +0200 Subject: sub: libguess support for -subcp Actually this is rather disappointing. --- DOCS/man/en/options.rst | 9 +++++++++ configure | 21 +++++++++++++++++++++ core/charset_conv.c | 28 +++++++++++++++++++++++++++- 3 files changed, 57 insertions(+), 1 deletion(-) diff --git a/DOCS/man/en/options.rst b/DOCS/man/en/options.rst index aae283d213..956758ef7d 100644 --- a/DOCS/man/en/options.rst +++ b/DOCS/man/en/options.rst @@ -2044,6 +2044,15 @@ - ``--subcp=enca:pl`` guess the encoding for Polish, fall back on UTF-8. - ``--subcp=enca`` try universal detection, fall back on UTF-8. + If the player was compiled with libguess support you can use it with: + + ``--subcp=guess::`` + + Note that libguess always needs a language. There is no universal detection + mode. Use ``--subcp=guess:help`` to get a list of languages (like with ENCA, + it will be printed only if the conversion code is somehow called, for + example when loading an external subtitle). + --sub-delay= Delays subtitles by seconds. Can be negative. diff --git a/configure b/configure index b9846e570a..002fe80ff9 100755 --- a/configure +++ b/configure @@ -292,6 +292,7 @@ Installation directories: Optional features: --disable-encoding disable encoding functionality [enable] + --disable-libguess disable libguess [autodetect] --enable-termcap use termcap database for key codes [autodetect] --enable-termios use termios database for key codes [autodetect] --disable-iconv disable iconv for encoding conversion [autodetect] @@ -463,6 +464,7 @@ networking=yes _winsock2_h=auto _smb=auto _libquvi=auto +_libguess=auto _joystick=no _lirc=auto _lircc=auto @@ -663,6 +665,8 @@ for ac_option do --disable-smb) _smb=no ;; --enable-libquvi) _libquvi=yes ;; --disable-libquvi) _libquvi=no ;; + --enable-libguess) _libguess=yes ;; + --disable-libguess) _libguess=no ;; --enable-joystick) _joystick=yes ;; --disable-joystick) _joystick=no ;; --enable-libav) ffmpeg=yes ;; @@ -1685,6 +1689,21 @@ else fi +echocheck "libguess support" +if test "$_libguess" = auto ; then + _libguess=no + if pkg_config_add 'libguess >= 1.0' ; then + _libguess=yes + fi +fi +if test "$_libguess" = yes; then + def_libguess="#define CONFIG_LIBGUESS 1" +else + def_libguess="#undef CONFIG_LIBGUESS" +fi +echores "$_libguess" + + echocheck "Samba support (libsmbclient)" if test "$_smb" = yes; then libs_mplayer="$libs_mplayer -lsmbclient" @@ -3128,6 +3147,7 @@ VF_LAVFI = $vf_lavfi AF_LAVFI = $af_lavfi LIBSMBCLIENT = $_smb LIBQUVI = $_libquvi +LIBGUESS = $_libguess LIBTHEORA = $_theora LIRC = $_lirc MACOSX_BUNDLE = $_macosx_bundle @@ -3326,6 +3346,7 @@ $def_inet_pton $def_networking $def_smb $def_libquvi +$def_libguess $def_socklen_t $def_vstream diff --git a/core/charset_conv.c b/core/charset_conv.c index 15209b30ea..680c8f83f9 100644 --- a/core/charset_conv.c +++ b/core/charset_conv.c @@ -31,6 +31,10 @@ #include #endif +#ifdef CONFIG_LIBGUESS +#include +#endif + #ifdef CONFIG_ICONV #include #endif @@ -67,7 +71,8 @@ bool mp_charset_requires_guess(const char *user_cp) { bstr res[2] = {{0}}; split_colon(user_cp, 2, res); - return bstrcasecmp0(res[0], "enca") == 0; + return bstrcasecmp0(res[0], "enca") == 0 || + bstrcasecmp0(res[0], "guess") == 0; } #ifdef CONFIG_ENCA @@ -102,6 +107,23 @@ static const char *enca_guess(bstr buf, const char *language) } #endif +#ifdef CONFIG_LIBGUESS +static const char *libguess_guess(bstr buf, const char *language) +{ + if (libguess_validate_utf8(buf.start, buf.len)) + return "UTF-8"; + + if (!language || !language[0] || strcmp(language, "help") == 0) { + mp_msg(MSGT_SUBREADER, MSGL_ERR, "libguess needs a language: " + "japanese taiwanese chinese korean russian arabic turkish " + "greek hebrew polish baltic\n"); + return NULL; + } + + return libguess_determine_encoding(buf.start, buf.len, language); +} +#endif + // Runs charset auto-detection on the input buffer, and returns the result. // If auto-detection fails, NULL is returned. // If user_cp doesn't refer to any known auto-detection (for example because @@ -126,6 +148,10 @@ const char *mp_charset_guess(bstr buf, const char *user_cp) if (bstrcasecmp0(type, "enca") == 0) res = enca_guess(buf, lang); #endif +#ifdef CONFIG_LIBGUESS + if (bstrcasecmp0(type, "guess") == 0) + res = libguess_guess(buf, lang); +#endif if (res) { mp_msg(MSGT_SUBREADER, MSGL_DBG2, "%.*s detected charset: '%s'\n", -- cgit v1.2.3