diff options
Diffstat (limited to 'demux/demux_lavf.c')
-rw-r--r-- | demux/demux_lavf.c | 34 |
1 files changed, 26 insertions, 8 deletions
diff --git a/demux/demux_lavf.c b/demux/demux_lavf.c index 9aa71fcaf1..5383b93934 100644 --- a/demux/demux_lavf.c +++ b/demux/demux_lavf.c @@ -40,6 +40,7 @@ #include "common/tags.h" #include "common/av_common.h" #include "misc/bstr.h" +#include "misc/charset_conv.h" #include "stream/stream.h" #include "demux.h" @@ -108,16 +109,16 @@ struct format_hack { bool no_stream : 1; // do not wrap struct stream as AVIOContext bool use_stream_ids : 1; // export the native stream IDs bool fully_read : 1; // set demuxer.fully_read flag + bool detect_charset : 1; // format is a small text file, possibly not UTF8 bool image_format : 1; // expected to contain exactly 1 frame - bool utf8_subs : 1; // subtitles are (mostly) guaranteed UTF-8 // Do not confuse player's position estimation (position is into external // segment, with e.g. HLS, player knows about the playlist main file only). bool clear_filepos : 1; }; #define BLACKLIST(fmt) {fmt, .ignore = true} -#define TEXTSUB(fmt) {fmt, .fully_read = true} -#define TEXTSUB_UTF8(fmt) {fmt, .fully_read = true, .utf8_subs = true} +#define TEXTSUB(fmt) {fmt, .fully_read = true, .detect_charset = true} +#define TEXTSUB_UTF8(fmt) {fmt, .fully_read = true} #define IMAGEFMT(fmt) {fmt, .image_format = true} static const struct format_hack format_hacks[] = { @@ -145,10 +146,6 @@ static const struct format_hack format_hacks[] = { TEXTSUB_UTF8("webvtt"), TEXTSUB_UTF8("ass"), - // Formats which support muxed subtitles, and always use UTF-8 for them. - {"mov", .utf8_subs = true}, - {"mkv", .utf8_subs = true}, - // Useless non-sense, sometimes breaks MLP2 subreader.c fallback BLACKLIST("tty"), // Let's open files with extremely generic extensions (.bin) with a @@ -174,6 +171,7 @@ typedef struct lavf_priv { int cur_program; char *mime_type; bool merge_track_metadata; + char *file_charset; } lavf_priv_t; // At least mp4 has name="mov,mp4,m4a,3gp,3g2,mj2", so we split the name @@ -262,6 +260,23 @@ static void list_formats(struct demuxer *demuxer) MP_INFO(demuxer, "%15s : %s\n", fmt->name, fmt->long_name); } +static void detect_charset(struct demuxer *demuxer) +{ + lavf_priv_t *priv = demuxer->priv; + char *cp = demuxer->opts->sub_cp; + if (mp_charset_requires_guess(cp)) { + bstr data = stream_peek(demuxer->stream, STREAM_MAX_BUFFER_SIZE); + cp = (char *)mp_charset_guess(priv, demuxer->log, data, cp, 0); + MP_VERBOSE(demuxer, "Detected charset: %s\n", cp ? cp : "(unknown)"); + } + if (cp && !mp_charset_is_utf8(cp)) + MP_INFO(demuxer, "Using subtitle charset: %s\n", cp); + // libavformat transparently converts UTF-16 to UTF-8 + if (mp_charset_is_utf16(priv->file_charset)) + cp = NULL; + priv->file_charset = cp; +} + static char *remove_prefix(char *s, const char *const *prefixes) { for (int n = 0; prefixes[n]; n++) { @@ -402,6 +417,9 @@ static int lavf_check_file(demuxer_t *demuxer, enum demux_check check) demuxer->filetype = priv->avif->name; + if (priv->format_hack.detect_charset) + detect_charset(demuxer); + return 0; } @@ -622,7 +640,7 @@ static void handle_stream(demuxer_t *demuxer, int i) } } - sh_sub->is_utf8 = priv->format_hack.utf8_subs; + sh_sub->charset = priv->file_charset; break; } |