summaryrefslogtreecommitdiffstats
path: root/misc
diff options
context:
space:
mode:
authorJeong Woon Choi <choijeongwoon@gmail.com>2016-09-02 18:32:14 +0900
committerwm4 <wm4@nowhere>2016-09-02 14:46:11 +0200
commit875aeb0f5c0c2853fc85b28727b5b849bee4a74d (patch)
tree430bc346458cddf69369361af7b470dd834ccada /misc
parentc72df804604c8b7859af76d3503234725b501c96 (diff)
downloadmpv-875aeb0f5c0c2853fc85b28727b5b849bee4a74d.tar.bz2
mpv-875aeb0f5c0c2853fc85b28727b5b849bee4a74d.tar.xz
charset_conv: Use CP949 instead of EUC-KR
iconv distinguishes between euc-kr and cp949, while libguess and libuchardet doesn't (only returns euc-kr). EILSEQ occurs when the input encoding of iconv is set to euc-kr and if the subs contain letters not included in euc-kr. Since cp949 is a extension of euc-kr, choose cp949 instead. Signed-off-by: wm4 <wm4@nowhere>
Diffstat (limited to 'misc')
-rw-r--r--misc/charset_conv.c5
1 files changed, 5 insertions, 0 deletions
diff --git a/misc/charset_conv.c b/misc/charset_conv.c
index fcc346b37d..48e4e9a5ae 100644
--- a/misc/charset_conv.c
+++ b/misc/charset_conv.c
@@ -291,6 +291,11 @@ bstr mp_iconv_to_utf8(struct mp_log *log, bstr buf, const char *cp, int flags)
if (strcasecmp(cp, "UTF-8-BROKEN") == 0)
return bstr_sanitize_utf8_latin1(NULL, buf);
+ // Force CP949 over EUC-KR since iconv distinguishes them and
+ // EUC-KR causes error on CP949 encoded data
+ if (strcasecmp(cp, "EUC-KR") == 0)
+ cp = "CP949";
+
iconv_t icdsc;
if ((icdsc = iconv_open("UTF-8", cp)) == (iconv_t) (-1)) {
if (flags & MP_ICONV_VERBOSE)