From acb51c9243c7861774af6ad592acc07490fa7e7c Mon Sep 17 00:00:00 2001
From: wm4 <wm4@nowhere>
Date: Thu, 15 Aug 2013 19:29:42 +0200
Subject: sub: if charset detection fails, treat it as broken UTF-8

Broken UTF-8 in this context means we treat it as UTF-8, but we also
interpret broken UTF-8 sequences as Latin1.

Also, run our own UTF-8 check function before the charset detectors.
This prevents from ENCA's UTF-8 check possibly messing up (like
detecting 7-bit clean UTF-8 as ASCII, or other things). It also takes
care of UTF-8 detection if no charset detector (ENCA, libguess) is
compiled in, and it lets us deal better with cut-off UTF-8 sequences.
---
 sub/dec_sub.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'sub')

diff --git a/sub/dec_sub.c b/sub/dec_sub.c
index 998cb0db7f..bc492c6381 100644
--- a/sub/dec_sub.c
+++ b/sub/dec_sub.c
@@ -286,7 +286,7 @@ static const char *guess_sub_cp(struct packet_list *subs, const char *usercp)
         memcpy(text.start + text.len + pkt->len, sep, sep_len);
         text.len += pkt->len + sep_len;
     }
-    const char *guess = mp_charset_guess(text, usercp);
+    const char *guess = mp_charset_guess(text, usercp, 0);
     talloc_free(text.start);
     return guess;
 }
-- 
cgit v1.2.3