summaryrefslogtreecommitdiffstats
path: root/misc
diff options
context:
space:
mode:
authorwm4 <wm4@nowhere>2015-12-20 20:55:24 +0100
committerwm4 <wm4@nowhere>2015-12-20 20:55:24 +0100
commitfc7212b2142ec45dbb6a54127d4eeb2bc9b6abd9 (patch)
tree48f905956f5e570e348a2cbc013c377e0ea23299 /misc
parentf2187de8bb9eedf45e0d6629e4d664eb32f311ac (diff)
downloadmpv-fc7212b2142ec45dbb6a54127d4eeb2bc9b6abd9.tar.bz2
mpv-fc7212b2142ec45dbb6a54127d4eeb2bc9b6abd9.tar.xz
charset_conv: check for UTF-8 if uchardet returns unknown
When libuchardet returns an empty string, it can be either ASCII, UTF-8, or an unknown encoding. Try to distinguish it from the unknown case by checking for UTF-8. This avoids an annoying message, and avoids unnecessary processing (we convert invalid UTF-8 sequences to latin1 to workaround libavcodec's braindead UTF-8 check).
Diffstat (limited to 'misc')
-rw-r--r--misc/charset_conv.c2
1 files changed, 2 insertions, 0 deletions
diff --git a/misc/charset_conv.c b/misc/charset_conv.c
index 8181b1392e..0037775586 100644
--- a/misc/charset_conv.c
+++ b/misc/charset_conv.c
@@ -186,6 +186,8 @@ static const char *mp_uchardet(void *talloc_ctx, struct mp_log *log, bstr buf)
iconv_close(icdsc);
}
}
+ if (!res && bstr_validate_utf8(buf) >= 0)
+ res = "utf-8";
uchardet_delete(det);
return res;
}