diff options
author | wm4 <wm4@nowhere> | 2015-12-16 23:54:25 +0100 |
---|---|---|
committer | wm4 <wm4@nowhere> | 2015-12-17 01:17:23 +0100 |
commit | 74c11f0c841d0b81a6ea759c7eb131d2c2e02ec3 (patch) | |
tree | 8c88229f881d63d3bb1504db7ca72583ae5eafcf /demux/demux_lavf.c | |
parent | e798cf1ff64527cd2ed9fc3bab8dbc2d0c7a52de (diff) | |
download | mpv-74c11f0c841d0b81a6ea759c7eb131d2c2e02ec3.tar.bz2 mpv-74c11f0c841d0b81a6ea759c7eb131d2c2e02ec3.tar.xz |
sub: detect charset in demuxer
Slightly simpler, and removes the need to pre-read all subtitle packets.
This still does the subtitle charset conversion on the packet level
(instead converting when parsing the file), so in theory this still
could provide a way to change the charset at runtime. But maybe even
this should be removed, as FFmpeg is somewhat likely to get its own
charset detection and conversion mechanism in the future. (Would have
to keep the subtitle file in memory to allow changing the charset on
the fly, I guess.)
Diffstat (limited to 'demux/demux_lavf.c')
-rw-r--r-- | demux/demux_lavf.c | 34 |
1 files changed, 26 insertions, 8 deletions
diff --git a/demux/demux_lavf.c b/demux/demux_lavf.c index 9aa71fcaf1..5383b93934 100644 --- a/demux/demux_lavf.c +++ b/demux/demux_lavf.c @@ -40,6 +40,7 @@ #include "common/tags.h" #include "common/av_common.h" #include "misc/bstr.h" +#include "misc/charset_conv.h" #include "stream/stream.h" #include "demux.h" @@ -108,16 +109,16 @@ struct format_hack { bool no_stream : 1; // do not wrap struct stream as AVIOContext bool use_stream_ids : 1; // export the native stream IDs bool fully_read : 1; // set demuxer.fully_read flag + bool detect_charset : 1; // format is a small text file, possibly not UTF8 bool image_format : 1; // expected to contain exactly 1 frame - bool utf8_subs : 1; // subtitles are (mostly) guaranteed UTF-8 // Do not confuse player's position estimation (position is into external // segment, with e.g. HLS, player knows about the playlist main file only). bool clear_filepos : 1; }; #define BLACKLIST(fmt) {fmt, .ignore = true} -#define TEXTSUB(fmt) {fmt, .fully_read = true} -#define TEXTSUB_UTF8(fmt) {fmt, .fully_read = true, .utf8_subs = true} +#define TEXTSUB(fmt) {fmt, .fully_read = true, .detect_charset = true} +#define TEXTSUB_UTF8(fmt) {fmt, .fully_read = true} #define IMAGEFMT(fmt) {fmt, .image_format = true} static const struct format_hack format_hacks[] = { @@ -145,10 +146,6 @@ static const struct format_hack format_hacks[] = { TEXTSUB_UTF8("webvtt"), TEXTSUB_UTF8("ass"), - // Formats which support muxed subtitles, and always use UTF-8 for them. - {"mov", .utf8_subs = true}, - {"mkv", .utf8_subs = true}, - // Useless non-sense, sometimes breaks MLP2 subreader.c fallback BLACKLIST("tty"), // Let's open files with extremely generic extensions (.bin) with a @@ -174,6 +171,7 @@ typedef struct lavf_priv { int cur_program; char *mime_type; bool merge_track_metadata; + char *file_charset; } lavf_priv_t; // At least mp4 has name="mov,mp4,m4a,3gp,3g2,mj2", so we split the name @@ -262,6 +260,23 @@ static void list_formats(struct demuxer *demuxer) MP_INFO(demuxer, "%15s : %s\n", fmt->name, fmt->long_name); } +static void detect_charset(struct demuxer *demuxer) +{ + lavf_priv_t *priv = demuxer->priv; + char *cp = demuxer->opts->sub_cp; + if (mp_charset_requires_guess(cp)) { + bstr data = stream_peek(demuxer->stream, STREAM_MAX_BUFFER_SIZE); + cp = (char *)mp_charset_guess(priv, demuxer->log, data, cp, 0); + MP_VERBOSE(demuxer, "Detected charset: %s\n", cp ? cp : "(unknown)"); + } + if (cp && !mp_charset_is_utf8(cp)) + MP_INFO(demuxer, "Using subtitle charset: %s\n", cp); + // libavformat transparently converts UTF-16 to UTF-8 + if (mp_charset_is_utf16(priv->file_charset)) + cp = NULL; + priv->file_charset = cp; +} + static char *remove_prefix(char *s, const char *const *prefixes) { for (int n = 0; prefixes[n]; n++) { @@ -402,6 +417,9 @@ static int lavf_check_file(demuxer_t *demuxer, enum demux_check check) demuxer->filetype = priv->avif->name; + if (priv->format_hack.detect_charset) + detect_charset(demuxer); + return 0; } @@ -622,7 +640,7 @@ static void handle_stream(demuxer_t *demuxer, int i) } } - sh_sub->is_utf8 = priv->format_hack.utf8_subs; + sh_sub->charset = priv->file_charset; break; } |