From d60270ed3dd41f7afb4cda6523c93a4c6120c898 Mon Sep 17 00:00:00 2001 From: wm4 Date: Tue, 1 Sep 2015 13:37:45 +0200 Subject: sub: fix --sub-codepage UTF-8 with fallback Fixes e.g --sub-codepage=utf8:gb18030 if the subtitle us UTF-8. This was broken in commit e5d31808. Also log the detected charset in verbose mode. --- misc/charset_conv.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'misc') diff --git a/misc/charset_conv.c b/misc/charset_conv.c index c966a00622..bceb52aa58 100644 --- a/misc/charset_conv.c +++ b/misc/charset_conv.c @@ -239,6 +239,9 @@ const char *mp_charset_guess(void *talloc_ctx, struct mp_log *log, bstr buf, if (bstrcasecmp0(type, "utf8") == 0 || bstrcasecmp0(type, "utf-8") == 0) { if (!fallback) fallback = params[1].start; // must be already 0-terminated + int r = bstr_validate_utf8(buf); + if (r >= 0 || (r > -8 && (flags & MP_ICONV_ALLOW_CUTOFF))) + res = "utf-8"; } if (res) { @@ -252,6 +255,7 @@ const char *mp_charset_guess(void *talloc_ctx, struct mp_log *log, bstr buf, if (!res && !(flags & MP_STRICT_UTF8)) res = "UTF-8-BROKEN"; + mp_verbose(log, "Using charset '%s'.\n", res); return res; } -- cgit v1.2.3