From fc7212b2142ec45dbb6a54127d4eeb2bc9b6abd9 Mon Sep 17 00:00:00 2001 From: wm4 Date: Sun, 20 Dec 2015 20:55:24 +0100 Subject: charset_conv: check for UTF-8 if uchardet returns unknown When libuchardet returns an empty string, it can be either ASCII, UTF-8, or an unknown encoding. Try to distinguish it from the unknown case by checking for UTF-8. This avoids an annoying message, and avoids unnecessary processing (we convert invalid UTF-8 sequences to latin1 to workaround libavcodec's braindead UTF-8 check). --- misc/charset_conv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/misc/charset_conv.c b/misc/charset_conv.c index 8181b1392e..0037775586 100644 --- a/misc/charset_conv.c +++ b/misc/charset_conv.c @@ -186,6 +186,8 @@ static const char *mp_uchardet(void *talloc_ctx, struct mp_log *log, bstr buf) iconv_close(icdsc); } } + if (!res && bstr_validate_utf8(buf) >= 0) + res = "utf-8"; uchardet_delete(det); return res; } -- cgit v1.2.3