summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorwm4 <wm4@nowhere>2013-06-24 23:06:34 +0200
committerwm4 <wm4@nowhere>2013-06-25 00:11:57 +0200
commitf48829b546095bd33e243332c965ab58d6481160 (patch)
treecb49baed797bfef8aa173a776f0175971c59caf0
parent709389ce653d5ab11abf8de067cfb5932e642898 (diff)
downloadmpv-f48829b546095bd33e243332c965ab58d6481160.tar.bz2
mpv-f48829b546095bd33e243332c965ab58d6481160.tar.xz
sub: libguess support for -subcp
Actually this is rather disappointing.
-rw-r--r--DOCS/man/en/options.rst9
-rwxr-xr-xconfigure21
-rw-r--r--core/charset_conv.c28
3 files changed, 57 insertions, 1 deletions
diff --git a/DOCS/man/en/options.rst b/DOCS/man/en/options.rst
index aae283d213..956758ef7d 100644
--- a/DOCS/man/en/options.rst
+++ b/DOCS/man/en/options.rst
@@ -2044,6 +2044,15 @@
- ``--subcp=enca:pl`` guess the encoding for Polish, fall back on UTF-8.
- ``--subcp=enca`` try universal detection, fall back on UTF-8.
+ If the player was compiled with libguess support you can use it with:
+
+ ``--subcp=guess:<language>:<fallback codepage>``
+
+ Note that libguess always needs a language. There is no universal detection
+ mode. Use ``--subcp=guess:help`` to get a list of languages (like with ENCA,
+ it will be printed only if the conversion code is somehow called, for
+ example when loading an external subtitle).
+
--sub-delay=<sec>
Delays subtitles by <sec> seconds. Can be negative.
diff --git a/configure b/configure
index b9846e570a..002fe80ff9 100755
--- a/configure
+++ b/configure
@@ -292,6 +292,7 @@ Installation directories:
Optional features:
--disable-encoding disable encoding functionality [enable]
+ --disable-libguess disable libguess [autodetect]
--enable-termcap use termcap database for key codes [autodetect]
--enable-termios use termios database for key codes [autodetect]
--disable-iconv disable iconv for encoding conversion [autodetect]
@@ -463,6 +464,7 @@ networking=yes
_winsock2_h=auto
_smb=auto
_libquvi=auto
+_libguess=auto
_joystick=no
_lirc=auto
_lircc=auto
@@ -663,6 +665,8 @@ for ac_option do
--disable-smb) _smb=no ;;
--enable-libquvi) _libquvi=yes ;;
--disable-libquvi) _libquvi=no ;;
+ --enable-libguess) _libguess=yes ;;
+ --disable-libguess) _libguess=no ;;
--enable-joystick) _joystick=yes ;;
--disable-joystick) _joystick=no ;;
--enable-libav) ffmpeg=yes ;;
@@ -1685,6 +1689,21 @@ else
fi
+echocheck "libguess support"
+if test "$_libguess" = auto ; then
+ _libguess=no
+ if pkg_config_add 'libguess >= 1.0' ; then
+ _libguess=yes
+ fi
+fi
+if test "$_libguess" = yes; then
+ def_libguess="#define CONFIG_LIBGUESS 1"
+else
+ def_libguess="#undef CONFIG_LIBGUESS"
+fi
+echores "$_libguess"
+
+
echocheck "Samba support (libsmbclient)"
if test "$_smb" = yes; then
libs_mplayer="$libs_mplayer -lsmbclient"
@@ -3128,6 +3147,7 @@ VF_LAVFI = $vf_lavfi
AF_LAVFI = $af_lavfi
LIBSMBCLIENT = $_smb
LIBQUVI = $_libquvi
+LIBGUESS = $_libguess
LIBTHEORA = $_theora
LIRC = $_lirc
MACOSX_BUNDLE = $_macosx_bundle
@@ -3326,6 +3346,7 @@ $def_inet_pton
$def_networking
$def_smb
$def_libquvi
+$def_libguess
$def_socklen_t
$def_vstream
diff --git a/core/charset_conv.c b/core/charset_conv.c
index 15209b30ea..680c8f83f9 100644
--- a/core/charset_conv.c
+++ b/core/charset_conv.c
@@ -31,6 +31,10 @@
#include <enca.h>
#endif
+#ifdef CONFIG_LIBGUESS
+#include <libguess.h>
+#endif
+
#ifdef CONFIG_ICONV
#include <iconv.h>
#endif
@@ -67,7 +71,8 @@ bool mp_charset_requires_guess(const char *user_cp)
{
bstr res[2] = {{0}};
split_colon(user_cp, 2, res);
- return bstrcasecmp0(res[0], "enca") == 0;
+ return bstrcasecmp0(res[0], "enca") == 0 ||
+ bstrcasecmp0(res[0], "guess") == 0;
}
#ifdef CONFIG_ENCA
@@ -102,6 +107,23 @@ static const char *enca_guess(bstr buf, const char *language)
}
#endif
+#ifdef CONFIG_LIBGUESS
+static const char *libguess_guess(bstr buf, const char *language)
+{
+ if (libguess_validate_utf8(buf.start, buf.len))
+ return "UTF-8";
+
+ if (!language || !language[0] || strcmp(language, "help") == 0) {
+ mp_msg(MSGT_SUBREADER, MSGL_ERR, "libguess needs a language: "
+ "japanese taiwanese chinese korean russian arabic turkish "
+ "greek hebrew polish baltic\n");
+ return NULL;
+ }
+
+ return libguess_determine_encoding(buf.start, buf.len, language);
+}
+#endif
+
// Runs charset auto-detection on the input buffer, and returns the result.
// If auto-detection fails, NULL is returned.
// If user_cp doesn't refer to any known auto-detection (for example because
@@ -126,6 +148,10 @@ const char *mp_charset_guess(bstr buf, const char *user_cp)
if (bstrcasecmp0(type, "enca") == 0)
res = enca_guess(buf, lang);
#endif
+#ifdef CONFIG_LIBGUESS
+ if (bstrcasecmp0(type, "guess") == 0)
+ res = libguess_guess(buf, lang);
+#endif
if (res) {
mp_msg(MSGT_SUBREADER, MSGL_DBG2, "%.*s detected charset: '%s'\n",