summaryrefslogtreecommitdiffstats
path: root/sub/dec_sub.c
diff options
context:
space:
mode:
authorwm4 <wm4@nowhere>2013-08-15 19:29:42 +0200
committerwm4 <wm4@nowhere>2013-08-15 23:40:02 +0200
commitacb51c9243c7861774af6ad592acc07490fa7e7c (patch)
tree128dd7ff703495cde69fe56a29bf7b8765a803dc /sub/dec_sub.c
parent380fa71fc79ba40936ea073cfdd183c708141420 (diff)
downloadmpv-acb51c9243c7861774af6ad592acc07490fa7e7c.tar.bz2
mpv-acb51c9243c7861774af6ad592acc07490fa7e7c.tar.xz
sub: if charset detection fails, treat it as broken UTF-8
Broken UTF-8 in this context means we treat it as UTF-8, but we also interpret broken UTF-8 sequences as Latin1. Also, run our own UTF-8 check function before the charset detectors. This prevents from ENCA's UTF-8 check possibly messing up (like detecting 7-bit clean UTF-8 as ASCII, or other things). It also takes care of UTF-8 detection if no charset detector (ENCA, libguess) is compiled in, and it lets us deal better with cut-off UTF-8 sequences.
Diffstat (limited to 'sub/dec_sub.c')
-rw-r--r--sub/dec_sub.c2
1 files changed, 1 insertions, 1 deletions
diff --git a/sub/dec_sub.c b/sub/dec_sub.c
index 998cb0db7f..bc492c6381 100644
--- a/sub/dec_sub.c
+++ b/sub/dec_sub.c
@@ -286,7 +286,7 @@ static const char *guess_sub_cp(struct packet_list *subs, const char *usercp)
memcpy(text.start + text.len + pkt->len, sep, sep_len);
text.len += pkt->len + sep_len;
}
- const char *guess = mp_charset_guess(text, usercp);
+ const char *guess = mp_charset_guess(text, usercp, 0);
talloc_free(text.start);
return guess;
}