From 875aeb0f5c0c2853fc85b28727b5b849bee4a74d Mon Sep 17 00:00:00 2001
From: Jeong Woon Choi <choijeongwoon@gmail.com>
Date: Fri, 2 Sep 2016 18:32:14 +0900
Subject: charset_conv: Use CP949 instead of EUC-KR

iconv distinguishes between euc-kr and cp949, while libguess
and libuchardet doesn't (only returns euc-kr). EILSEQ occurs
when the input encoding of iconv is set to euc-kr and if the subs
contain letters not included in euc-kr. Since cp949 is a extension
of euc-kr, choose cp949 instead.

Signed-off-by: wm4 <wm4@nowhere>
---
 misc/charset_conv.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/misc/charset_conv.c b/misc/charset_conv.c
index fcc346b37d..48e4e9a5ae 100644
--- a/misc/charset_conv.c
+++ b/misc/charset_conv.c
@@ -291,6 +291,11 @@ bstr mp_iconv_to_utf8(struct mp_log *log, bstr buf, const char *cp, int flags)
     if (strcasecmp(cp, "UTF-8-BROKEN") == 0)
         return bstr_sanitize_utf8_latin1(NULL, buf);
 
+    // Force CP949 over EUC-KR since iconv distinguishes them and
+    // EUC-KR causes error on CP949 encoded data
+    if (strcasecmp(cp, "EUC-KR") == 0)
+      cp = "CP949";
+
     iconv_t icdsc;
     if ((icdsc = iconv_open("UTF-8", cp)) == (iconv_t) (-1)) {
         if (flags & MP_ICONV_VERBOSE)
-- 
cgit v1.2.3