From 875aeb0f5c0c2853fc85b28727b5b849bee4a74d Mon Sep 17 00:00:00 2001 From: Jeong Woon Choi Date: Fri, 2 Sep 2016 18:32:14 +0900 Subject: charset_conv: Use CP949 instead of EUC-KR iconv distinguishes between euc-kr and cp949, while libguess and libuchardet doesn't (only returns euc-kr). EILSEQ occurs when the input encoding of iconv is set to euc-kr and if the subs contain letters not included in euc-kr. Since cp949 is a extension of euc-kr, choose cp949 instead. Signed-off-by: wm4 --- misc/charset_conv.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/misc/charset_conv.c b/misc/charset_conv.c index fcc346b37d..48e4e9a5ae 100644 --- a/misc/charset_conv.c +++ b/misc/charset_conv.c @@ -291,6 +291,11 @@ bstr mp_iconv_to_utf8(struct mp_log *log, bstr buf, const char *cp, int flags) if (strcasecmp(cp, "UTF-8-BROKEN") == 0) return bstr_sanitize_utf8_latin1(NULL, buf); + // Force CP949 over EUC-KR since iconv distinguishes them and + // EUC-KR causes error on CP949 encoded data + if (strcasecmp(cp, "EUC-KR") == 0) + cp = "CP949"; + iconv_t icdsc; if ((icdsc = iconv_open("UTF-8", cp)) == (iconv_t) (-1)) { if (flags & MP_ICONV_VERBOSE) -- cgit v1.2.3