From 11f2be2bcc264aa4f24cdad036231e34d09f5630 Mon Sep 17 00:00:00 2001 From: wm4 Date: Sat, 1 Aug 2015 23:25:50 +0200 Subject: charset_conv: make it possible to return an allocated string as guess uchardet is written in C++, and thus doesn't appreciate the value of using static strings, and internally stores the guessed charset as allocated std::string. Add a minimal hack to deal with this. (I don't appreciate that the code is potentially harder to understand by returning either a static or allocated string, but I do appreciate for not having to litter the existing code with strdups.) --- misc/charset_conv.c | 12 ++++++++---- misc/charset_conv.h | 4 ++-- sub/dec_sub.c | 8 ++++---- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/misc/charset_conv.c b/misc/charset_conv.c index 31f53ccecb..343fb7fd90 100644 --- a/misc/charset_conv.c +++ b/misc/charset_conv.c @@ -150,8 +150,9 @@ static const char *libguess_guess(struct mp_log *log, bstr buf, // If user_cp doesn't refer to any known auto-detection (for example because // it's a real iconv codepage), user_cp is returned without even looking at // the buf data. -const char *mp_charset_guess(struct mp_log *log, bstr buf, const char *user_cp, - int flags) +// The return value may (but doesn't have to) be allocated under talloc_ctx. +const char *mp_charset_guess(void *talloc_ctx, struct mp_log *log, bstr buf, + const char *user_cp, int flags) { if (!mp_charset_requires_guess(user_cp)) return user_cp; @@ -225,8 +226,11 @@ const char *mp_charset_guess(struct mp_log *log, bstr buf, const char *user_cp, bstr mp_charset_guess_and_conv_to_utf8(struct mp_log *log, bstr buf, const char *user_cp, int flags) { - return mp_iconv_to_utf8(log, buf, mp_charset_guess(log, buf, user_cp, flags), - flags); + void *tmp = talloc_new(NULL); + const char *cp = mp_charset_guess(log, tmp, buf, user_cp, flags); + bstr res = mp_iconv_to_utf8(log, buf, cp, flags); + talloc_free(tmp); + return res; } // Use iconv to convert buf to UTF-8. diff --git a/misc/charset_conv.h b/misc/charset_conv.h index 93bd91cffe..bd76ae007a 100644 --- a/misc/charset_conv.h +++ b/misc/charset_conv.h @@ -14,8 +14,8 @@ enum { bool mp_charset_is_utf8(const char *user_cp); bool mp_charset_requires_guess(const char *user_cp); -const char *mp_charset_guess(struct mp_log *log, bstr buf, const char *user_cp, - int flags); +const char *mp_charset_guess(void *talloc_ctx, struct mp_log *log, bstr buf, + const char *user_cp, int flags); bstr mp_charset_guess_and_conv_to_utf8(struct mp_log *log, bstr buf, const char *user_cp, int flags); bstr mp_iconv_to_utf8(struct mp_log *log, bstr buf, const char *cp, int flags); diff --git a/sub/dec_sub.c b/sub/dec_sub.c index 1800cf34d9..20fa9eff83 100644 --- a/sub/dec_sub.c +++ b/sub/dec_sub.c @@ -303,8 +303,8 @@ void sub_decode(struct dec_sub *sub, struct demux_packet *packet) pthread_mutex_unlock(&sub->lock); } -static const char *guess_sub_cp(struct mp_log *log, struct packet_list *subs, - const char *usercp) +static const char *guess_sub_cp(struct mp_log *log, void *talloc_ctx, + struct packet_list *subs, const char *usercp) { if (!mp_charset_requires_guess(usercp)) return usercp; @@ -330,7 +330,7 @@ static const char *guess_sub_cp(struct mp_log *log, struct packet_list *subs, memcpy(text.start + text.len + pkt->len, sep, sep_len); text.len += pkt->len + sep_len; } - const char *guess = mp_charset_guess(log, text, usercp, 0); + const char *guess = mp_charset_guess(talloc_ctx, log, text, usercp, 0); talloc_free(text.start); return guess; } @@ -455,7 +455,7 @@ bool sub_read_all_packets(struct dec_sub *sub, struct sh_stream *sh) } if (opts->sub_cp && !sh->sub->is_utf8) - sub->charset = guess_sub_cp(sub->log, subs, opts->sub_cp); + sub->charset = guess_sub_cp(sub->log, sub, subs, opts->sub_cp); if (sub->charset && sub->charset[0] && !mp_charset_is_utf8(sub->charset)) MP_INFO(sub, "Using subtitle charset: %s\n", sub->charset); -- cgit v1.2.3