summaryrefslogtreecommitdiffstats
path: root/mpvcore
diff options
context:
space:
mode:
authorwm4 <wm4@nowhere>2013-08-15 21:42:17 +0200
committerwm4 <wm4@nowhere>2013-08-15 23:40:03 +0200
commitfe3c44511234e00f67f9fccef281efb95c326576 (patch)
tree085d61668e9f63fcb3d3c5b15cbef16b8878620e /mpvcore
parent00f735d5cba22713ba9a377876b7cfd333c0b2b9 (diff)
downloadmpv-fe3c44511234e00f67f9fccef281efb95c326576.tar.bz2
mpv-fe3c44511234e00f67f9fccef281efb95c326576.tar.xz
sub: allow specifying a fallback codepage if input is not UTF-8
Normally, --subcp always forces conversion. This really always forces conversion, even if the UTF-8 check on the input succeeds. Extend the --subcp to allow codepages as fallback if UTF-8 doesn't work. So, for example --subcp=utf8:cp1250 will use UTF-8 if the input looks like UTF-8, and will fall back to use cp1250 if the UTF-8 check fails. I think this should actually be the default, but on the other hand, this changes the semantics of the option, and a user would actually expect --subcp to force conversion, rather than silently using UTF-8 if that happens to work.
Diffstat (limited to 'mpvcore')
-rw-r--r--mpvcore/charset_conv.c12
1 files changed, 10 insertions, 2 deletions
diff --git a/mpvcore/charset_conv.c b/mpvcore/charset_conv.c
index 594ba4486c..1a2908ad08 100644
--- a/mpvcore/charset_conv.c
+++ b/mpvcore/charset_conv.c
@@ -70,9 +70,13 @@ static int split_colon(const char *user_cp, int max, bstr *out_arr)
bool mp_charset_requires_guess(const char *user_cp)
{
bstr res[2] = {{0}};
- split_colon(user_cp, 2, res);
+ int r = split_colon(user_cp, 2, res);
+ // Note that "utf8" is the UTF-8 codepage, while "utf8:..." specifies UTF-8
+ // by default, plus a codepage that is used if the input is not UTF-8.
return bstrcasecmp0(res[0], "enca") == 0 ||
- bstrcasecmp0(res[0], "guess") == 0;
+ bstrcasecmp0(res[0], "guess") == 0 ||
+ (r > 1 && bstrcasecmp0(res[0], "utf-8") == 0) ||
+ (r > 1 && bstrcasecmp0(res[0], "utf8") == 0);
}
#ifdef CONFIG_ENCA
@@ -155,6 +159,10 @@ const char *mp_charset_guess(bstr buf, const char *user_cp, int flags)
if (bstrcasecmp0(type, "guess") == 0)
res = libguess_guess(buf, lang);
#endif
+ if (bstrcasecmp0(type, "utf8") == 0 || bstrcasecmp0(type, "utf-8") == 0) {
+ if (!fallback)
+ fallback = params[1].start; // must be already 0-terminated
+ }
if (res) {
mp_msg(MSGT_SUBREADER, MSGL_DBG2, "%.*s detected charset: '%s'\n",