diff options
author | wm4 <wm4@nowhere> | 2013-08-15 21:42:17 +0200 |
---|---|---|
committer | wm4 <wm4@nowhere> | 2013-08-15 23:40:03 +0200 |
commit | fe3c44511234e00f67f9fccef281efb95c326576 (patch) | |
tree | 085d61668e9f63fcb3d3c5b15cbef16b8878620e /mpvcore/charset_conv.c | |
parent | 00f735d5cba22713ba9a377876b7cfd333c0b2b9 (diff) | |
download | mpv-fe3c44511234e00f67f9fccef281efb95c326576.tar.bz2 mpv-fe3c44511234e00f67f9fccef281efb95c326576.tar.xz |
sub: allow specifying a fallback codepage if input is not UTF-8
Normally, --subcp always forces conversion. This really always forces
conversion, even if the UTF-8 check on the input succeeds.
Extend the --subcp to allow codepages as fallback if UTF-8 doesn't
work. So, for example --subcp=utf8:cp1250 will use UTF-8 if the input
looks like UTF-8, and will fall back to use cp1250 if the UTF-8 check
fails.
I think this should actually be the default, but on the other hand,
this changes the semantics of the option, and a user would actually
expect --subcp to force conversion, rather than silently using UTF-8
if that happens to work.
Diffstat (limited to 'mpvcore/charset_conv.c')
-rw-r--r-- | mpvcore/charset_conv.c | 12 |
1 files changed, 10 insertions, 2 deletions
diff --git a/mpvcore/charset_conv.c b/mpvcore/charset_conv.c index 594ba4486c..1a2908ad08 100644 --- a/mpvcore/charset_conv.c +++ b/mpvcore/charset_conv.c @@ -70,9 +70,13 @@ static int split_colon(const char *user_cp, int max, bstr *out_arr) bool mp_charset_requires_guess(const char *user_cp) { bstr res[2] = {{0}}; - split_colon(user_cp, 2, res); + int r = split_colon(user_cp, 2, res); + // Note that "utf8" is the UTF-8 codepage, while "utf8:..." specifies UTF-8 + // by default, plus a codepage that is used if the input is not UTF-8. return bstrcasecmp0(res[0], "enca") == 0 || - bstrcasecmp0(res[0], "guess") == 0; + bstrcasecmp0(res[0], "guess") == 0 || + (r > 1 && bstrcasecmp0(res[0], "utf-8") == 0) || + (r > 1 && bstrcasecmp0(res[0], "utf8") == 0); } #ifdef CONFIG_ENCA @@ -155,6 +159,10 @@ const char *mp_charset_guess(bstr buf, const char *user_cp, int flags) if (bstrcasecmp0(type, "guess") == 0) res = libguess_guess(buf, lang); #endif + if (bstrcasecmp0(type, "utf8") == 0 || bstrcasecmp0(type, "utf-8") == 0) { + if (!fallback) + fallback = params[1].start; // must be already 0-terminated + } if (res) { mp_msg(MSGT_SUBREADER, MSGL_DBG2, "%.*s detected charset: '%s'\n", |