From 4f5e12136de717896bf322e75d42de1af09e1c3e Mon Sep 17 00:00:00 2001 From: wm4 Date: Tue, 11 Jun 2013 12:16:42 +0200 Subject: stream: remove padding parameter from stream_read_complete() Seems like a completely unnecessary complication. Instead, always add a 1 byte padding (could be extended if a caller needs it), and clear it. Also add some documentation. There was some, but it was outdated and incomplete. --- core/encode_lavc.c | 2 +- core/input/input.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'core') diff --git a/core/encode_lavc.c b/core/encode_lavc.c index e7c52be221..747e3e67df 100644 --- a/core/encode_lavc.c +++ b/core/encode_lavc.c @@ -404,7 +404,7 @@ static void encode_2pass_prepare(struct encode_lavc_context *ctx, set_to_avdictionary(dictp, "flags", "-pass2"); } else { struct bstr content = stream_read_complete(*bytebuf, NULL, - 1000000000, 1); + 1000000000); if (content.start == NULL) { mp_msg(MSGT_ENCODE, MSGL_WARN, "%s: could not read '%s', " "disabling 2-pass encoding at pass 1\n", diff --git a/core/input/input.c b/core/input/input.c index 2d7569c8e9..dfa7d1e5b4 100644 --- a/core/input/input.c +++ b/core/input/input.c @@ -1737,7 +1737,7 @@ static int parse_config_file(struct input_ctx *ictx, char *file, bool warn) mp_msg(MSGT_INPUT, MSGL_ERR, "Can't open input config file %s.\n", file); return 0; } - bstr res = stream_read_complete(s, NULL, 1000000, 0); + bstr res = stream_read_complete(s, NULL, 1000000); free_stream(s); mp_msg(MSGT_INPUT, MSGL_V, "Parsing input config file %s\n", file); int n_binds = parse_config(ictx, false, res, file); -- cgit v1.2.3 From a792f0d628ea6170ce351fc816a2d71e276a8c49 Mon Sep 17 00:00:00 2001 From: wm4 Date: Tue, 11 Jun 2013 19:26:57 +0200 Subject: sub: remove redundant condition --- core/mplayer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'core') diff --git a/core/mplayer.c b/core/mplayer.c index 86260a0598..d9c577b354 100644 --- a/core/mplayer.c +++ b/core/mplayer.c @@ -2010,7 +2010,8 @@ static void reinit_subs(struct MPContext *mpctx) if (!mpctx->sh_sub->dec_sub) mpctx->sh_sub->dec_sub = sub_create(opts); - if (track->demuxer && !track->stream) { + assert(track->demuxer); + if (!track->stream) { // Lazily added DVD track - we must not miss the first subtitle packet, // which makes the demuxer create the sh_stream, and contains the first // subtitle event. @@ -2025,7 +2026,6 @@ static void reinit_subs(struct MPContext *mpctx) return; } - assert(track->demuxer && track->stream); mpctx->initialized_flags |= INITIALIZED_SUB; -- cgit v1.2.3 From a70d575291d48289669ee8989e0597a94189dd8d Mon Sep 17 00:00:00 2001 From: wm4 Date: Tue, 11 Jun 2013 21:39:54 +0200 Subject: sub: preload external text subtitles If a subtitle is external, read it completely and add all subtitle events in advance when the subtitle track is selected. This is done for text subtitles only. (Note that subreader.c and subtitles loaded with libass are different and don't have anything to do with this commit.) --- core/mplayer.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'core') diff --git a/core/mplayer.c b/core/mplayer.c index d9c577b354..3f65573bb5 100644 --- a/core/mplayer.c +++ b/core/mplayer.c @@ -2036,12 +2036,22 @@ static void reinit_subs(struct MPContext *mpctx) if (!sub_is_initialized(dec_sub)) { int w = mpctx->sh_video ? mpctx->sh_video->disp_w : 0; int h = mpctx->sh_video ? mpctx->sh_video->disp_h : 0; + float fps = mpctx->sh_video ? mpctx->sh_video->fps : 25; set_dvdsub_fake_extradata(dec_sub, track->demuxer->stream, w, h); sub_set_video_res(dec_sub, w, h); + sub_set_video_fps(dec_sub, fps); sub_set_ass_renderer(dec_sub, mpctx->osd->ass_library, mpctx->osd->ass_renderer); sub_init_from_sh(dec_sub, sh_sub); + + // Don't do this if the file has video/audio streams. Don't do it even + // if it has only sub streams, because reading packets will change the + // demuxer position. + if (!track->preloaded && track->is_external) { + demux_seek(track->demuxer, 0, 0, SEEK_ABSOLUTE); + track->preloaded = sub_read_all_packets(dec_sub, sh_sub); + } } mpctx->osd->dec_sub = dec_sub; -- cgit v1.2.3 From 64b1374a4456435cc4486a8153703fa89af58e31 Mon Sep 17 00:00:00 2001 From: wm4 Date: Tue, 11 Jun 2013 21:41:50 +0200 Subject: sub: do some timing postprocessing on preloaded subs This fixes the -subfps option (which unfortunately is still useful), and fixes minor annoying timing errors (which unfortunately still happen). Note that none of these affect ASS or image subtitles. ASS is specially handled: libass loads subtitles as ASS_Track. There are no actual packets passed around, and sd_ass just uses the ASS_Track. Disable the --sub-no-text-pp option. It's misleading now and always was completely useless. --- core/options.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'core') diff --git a/core/options.c b/core/options.c index f3e262fc17..2e0d32bb60 100644 --- a/core/options.c +++ b/core/options.c @@ -496,9 +496,7 @@ const m_option_t mp_opts[] = { OPT_FLAG("autosub", sub_auto, 0), OPT_FLAG("sub-visibility", sub_visibility, 0), OPT_FLAG("sub-forced-only", forced_subs_only, 0), - // enable Closed Captioning display - OPT_FLAG_CONSTANTS("overlapsub", suboverlap_enabled, 0, 0, 2), - OPT_FLAG_STORE("sub-no-text-pp", sub_no_text_pp, 0, 1), + OPT_FLAG_CONSTANTS("sub-fix-timing", suboverlap_enabled, 0, 1, 0), OPT_CHOICE("autosub-match", sub_match_fuzziness, 0, ({"exact", 0}, {"fuzzy", 1}, {"all", 2})), OPT_INTRANGE("sub-pos", sub_pos, 0, 0, 100), @@ -804,7 +802,7 @@ const struct MPOpts mp_default_opts = { .ass_vsfilter_aspect_compat = 1, .ass_style_override = 1, .use_embedded_fonts = 1, - .suboverlap_enabled = 1, + .suboverlap_enabled = 0, .hwdec_codecs = "all", -- cgit v1.2.3 From db2e1ef4d210f5a8a4a2555d0a78b0a4dea103ec Mon Sep 17 00:00:00 2001 From: wm4 Date: Fri, 21 Jun 2013 00:26:05 +0200 Subject: Move/rename subreader.c --- core/mplayer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'core') diff --git a/core/mplayer.c b/core/mplayer.c index 3f65573bb5..e0049a8bd0 100644 --- a/core/mplayer.c +++ b/core/mplayer.c @@ -71,7 +71,7 @@ #include "core/mplayer.h" #include "core/m_property.h" -#include "sub/subreader.h" +#include "demux/subreader.h" #include "sub/find_subfiles.h" #include "sub/dec_sub.h" #include "sub/sd.h" -- cgit v1.2.3 From 98388c0c073906f4485420485e27a14e8d957a2d Mon Sep 17 00:00:00 2001 From: wm4 Date: Fri, 21 Jun 2013 21:34:55 +0200 Subject: subreader: turn into actual demuxer subreader.c (before this commit renamed to demux_subreader.c) was special cased to the -sub option. The plan is using the normal demuxer codepath for all subtitle formats (so we can prefer libavformat demuxers for most formats). There are some subtle changes. The probe size is restricted to 32 KB (instead of unlimitted + giving up after 100 lines of input). For formats like MicroDVD, the video FPS isn't used anymore, because it's not available on the subtitle demuxer level. Instead, hardcode it to 23.976 FPS (libavformat seems to do the same). The user can probably still use -sub-fps to fix the timing. Checking the file extension for ".utf"/".utf8"/".utf-8" is simply removed (seems worthless, was in the way, and I've never seen this anywhere). --- core/mplayer.c | 9 ++------- core/options.h | 1 - 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'core') diff --git a/core/mplayer.c b/core/mplayer.c index e0049a8bd0..f766246f63 100644 --- a/core/mplayer.c +++ b/core/mplayer.c @@ -71,7 +71,6 @@ #include "core/mplayer.h" #include "core/m_property.h" -#include "demux/subreader.h" #include "sub/find_subfiles.h" #include "sub/dec_sub.h" #include "sub/sd.h" @@ -1044,7 +1043,6 @@ struct track *mp_add_subtitles(struct MPContext *mpctx, char *filename, { struct MPOpts *opts = &mpctx->opts; struct ass_track *asst = NULL; - sub_data *subd = NULL; if (filename == NULL) return NULL; @@ -1055,17 +1053,14 @@ struct track *mp_add_subtitles(struct MPContext *mpctx, char *filename, // the weird special-cases. #ifdef CONFIG_ASS asst = mp_ass_read_stream(mpctx->ass_library, filename, opts->sub_cp); - if (!asst) - subd = sub_read_file(filename, fps, &mpctx->opts); - if (asst || subd) { + if (asst) { struct demuxer *d = new_sub_pseudo_demuxer(opts); assert(d->num_streams == 1); struct sh_stream *s = d->streams[0]; assert(s->type == STREAM_SUB); - s->codec = asst ? "ass" : subd->codec; + s->codec = "ass"; s->sub->track = asst; - s->sub->sub_data = subd; struct sh_sub **pptr = talloc(d, struct sh_sub*); *pptr = s->sub; diff --git a/core/options.h b/core/options.h index f925990a6c..c31d2063c9 100644 --- a/core/options.h +++ b/core/options.h @@ -147,7 +147,6 @@ typedef struct MPOpts { // subreader.c int suboverlap_enabled; char *sub_cp; - int sub_no_text_pp; char *audio_stream; int audio_stream_cache; -- cgit v1.2.3 From 1bfae45a88ac7c24b74a6f7ca6eb4aa27d20c653 Mon Sep 17 00:00:00 2001 From: wm4 Date: Sat, 22 Jun 2013 01:31:25 +0200 Subject: core: don't set correct-pts mode randomly The default correct-pts mode depended on which demuxer was opened last. Often this is the subtitle demuxer. The correct-pts mode should be decided on the demuxer for video instead. --- core/mplayer.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'core') diff --git a/core/mplayer.c b/core/mplayer.c index f766246f63..67d82fdb35 100644 --- a/core/mplayer.c +++ b/core/mplayer.c @@ -4289,6 +4289,14 @@ goto_reopen_demuxer: ; if (mpctx->timeline) timeline_set_part(mpctx, mpctx->timeline_part, true); + // Decide correct-pts mode based on first segment of video track + opts->correct_pts = opts->user_correct_pts; + if (opts->correct_pts < 0) { + opts->correct_pts = + demux_control(mpctx->demuxer, DEMUXER_CTRL_CORRECT_PTS, + NULL) == DEMUXER_CTRL_OK; + } + mpctx->initialized_flags |= INITIALIZED_DEMUXER; add_subtitle_fonts_from_sources(mpctx); -- cgit v1.2.3 From cfa45c40dc0cfe44b699029168b62d4d3e16c288 Mon Sep 17 00:00:00 2001 From: wm4 Date: Sat, 22 Jun 2013 02:09:52 +0200 Subject: sub: add demux_libass wrapper, drop old hacks demux_libass.c allows us to make subtitle format detection part of the normal file loading process. libass has no probe function, but trying to load the start of a file (the first 4 KB) is good enough. Hope that libass can even handle random binary input gracefully without printing stupid log messages, and that the libass parser doesn't accept too many non-ASS files as input. This doesn't handle the -subcp option correctly yet. This will be fixed later. --- core/command.c | 5 ++-- core/mp_core.h | 3 +-- core/mplayer.c | 83 +++++++++------------------------------------------------- core/options.c | 1 - core/options.h | 1 - 5 files changed, 15 insertions(+), 78 deletions(-) (limited to 'core') diff --git a/core/command.c b/core/command.c index 4da0653425..c39bb3c16d 100644 --- a/core/command.c +++ b/core/command.c @@ -2281,7 +2281,7 @@ void run_command(MPContext *mpctx, mp_cmd_t *cmd) case MP_CMD_SUB_ADD: if (sh_video) { - mp_add_subtitles(mpctx, cmd->args[0].v.s, sh_video->fps, 0); + mp_add_subtitles(mpctx, cmd->args[0].v.s, 0); } break; @@ -2296,8 +2296,7 @@ void run_command(MPContext *mpctx, mp_cmd_t *cmd) struct track *sub = mp_track_by_tid(mpctx, STREAM_SUB, cmd->args[0].v.i); if (sh_video && sub && sub->is_external && sub->external_filename) { - struct track *nsub = mp_add_subtitles(mpctx, sub->external_filename, - sh_video->fps, 0); + struct track *nsub = mp_add_subtitles(mpctx, sub->external_filename, 0); if (nsub) { mp_remove_track(mpctx, sub); mp_switch_track(mpctx, nsub->type, nsub); diff --git a/core/mp_core.h b/core/mp_core.h index a327e43822..c958132700 100644 --- a/core/mp_core.h +++ b/core/mp_core.h @@ -290,8 +290,7 @@ extern int forced_subs_only; void uninit_player(struct MPContext *mpctx, unsigned int mask); void reinit_audio_chain(struct MPContext *mpctx); double playing_audio_pts(struct MPContext *mpctx); -struct track *mp_add_subtitles(struct MPContext *mpctx, char *filename, - float fps, int noerr); +struct track *mp_add_subtitles(struct MPContext *mpctx, char *filename, int noerr); int reinit_video_chain(struct MPContext *mpctx); int reinit_video_filters(struct MPContext *mpctx); void pause_player(struct MPContext *mpctx); diff --git a/core/mplayer.c b/core/mplayer.c index 67d82fdb35..6e453278f0 100644 --- a/core/mplayer.c +++ b/core/mplayer.c @@ -196,9 +196,6 @@ static const char av_desync_help_text[] = _( static void reset_subtitles(struct MPContext *mpctx); static void reinit_subs(struct MPContext *mpctx); -static struct track *open_external_file(struct MPContext *mpctx, char *filename, - char *demuxer_name, int stream_cache, - enum stream_type filter); static double get_relative_time(struct MPContext *mpctx) { @@ -981,6 +978,9 @@ static struct track *add_stream_track(struct MPContext *mpctx, }; MP_TARRAY_APPEND(mpctx, mpctx->tracks, mpctx->num_tracks, track); + if (stream->type == STREAM_SUB) + track->preloaded = !!stream->sub->track; + // Needed for DVD and Blu-ray. if (!track->lang) { struct stream_lang_req req = { @@ -1027,65 +1027,6 @@ static void add_dvd_tracks(struct MPContext *mpctx) #endif } -#ifdef CONFIG_ASS -static int free_sub_data(void *ptr) -{ - struct sh_sub *sh_sub = *(struct sh_sub **)ptr; - if (sh_sub->track) - ass_free_track(sh_sub->track); - talloc_free(sh_sub->sub_data); - return 1; -} -#endif - -struct track *mp_add_subtitles(struct MPContext *mpctx, char *filename, - float fps, int noerr) -{ - struct MPOpts *opts = &mpctx->opts; - struct ass_track *asst = NULL; - - if (filename == NULL) - return NULL; - - // Note: no text subtitles without libass. This is mainly because sd_ass is - // used for rendering. Even when showing subtitles with term-osd, going - // through sd_ass makes the code much simpler, as sd_ass can handle all - // the weird special-cases. -#ifdef CONFIG_ASS - asst = mp_ass_read_stream(mpctx->ass_library, filename, opts->sub_cp); - if (asst) { - struct demuxer *d = new_sub_pseudo_demuxer(opts); - assert(d->num_streams == 1); - struct sh_stream *s = d->streams[0]; - assert(s->type == STREAM_SUB); - - s->codec = "ass"; - s->sub->track = asst; - - struct sh_sub **pptr = talloc(d, struct sh_sub*); - *pptr = s->sub; - talloc_set_destructor(pptr, free_sub_data); - - struct track *t = add_stream_track(mpctx, s, false); - t->is_external = true; - t->preloaded = true; - t->title = talloc_strdup(t, filename); - t->external_filename = talloc_strdup(t, filename); - MP_TARRAY_APPEND(NULL, mpctx->sources, mpctx->num_sources, d); - return t; - } -#endif - - // Used with libavformat subtitles. - struct track *ext = open_external_file(mpctx, filename, NULL, 0, STREAM_SUB); - if (ext) - return ext; - - mp_tmsg(MSGT_CPLAYER, noerr ? MSGL_WARN : MSGL_ERR, - "Cannot load subtitles: %s\n", filename); - return NULL; -} - int mp_get_cache_percent(struct MPContext *mpctx) { if (mpctx->stream) { @@ -3916,16 +3857,15 @@ static void open_subtitles_from_options(struct MPContext *mpctx) // after reading video params we should load subtitles because // we know fps so now we can adjust subtitle time to ~6 seconds AST // check .sub - double sub_fps = mpctx->sh_video ? mpctx->sh_video->fps : 25; if (mpctx->opts.sub_name) { for (int i = 0; mpctx->opts.sub_name[i] != NULL; ++i) - mp_add_subtitles(mpctx, mpctx->opts.sub_name[i], sub_fps, 0); + mp_add_subtitles(mpctx, mpctx->opts.sub_name[i], 0); } if (mpctx->opts.sub_auto) { // auto load sub file ... char **tmp = find_text_subtitles(&mpctx->opts, mpctx->filename); int nsub = MP_TALLOC_ELEMS(tmp); for (int i = 0; i < nsub; i++) { - struct track *track = mp_add_subtitles(mpctx, tmp[i], sub_fps, 1); + struct track *track = mp_add_subtitles(mpctx, tmp[i], 1); if (track) track->auto_loaded = true; } @@ -3955,9 +3895,12 @@ static struct track *open_external_file(struct MPContext *mpctx, char *filename, case STREAM_SUB: ss = -1; break; } vs = -1; // avi can't go without video + struct demuxer_params params = { + .ass_library = mpctx->ass_library, // demux_libass requires it + }; struct demuxer *demuxer = demux_open_withparams(&mpctx->opts, stream, format, demuxer_name, - as, vs, ss, filename, NULL); + as, vs, ss, filename, ¶ms); if (!demuxer) { free_stream(stream); goto err_out; @@ -3995,12 +3938,11 @@ static void open_audiofiles_from_options(struct MPContext *mpctx) opts->audio_stream_cache, STREAM_AUDIO); } -// Just for -subfile. open_subtitles_from_options handles -sub text sub files. -static void open_subfiles_from_options(struct MPContext *mpctx) +struct track *mp_add_subtitles(struct MPContext *mpctx, char *filename, int noerr) { struct MPOpts *opts = &mpctx->opts; - open_external_file(mpctx, opts->sub_stream, opts->sub_demuxer_name, - 0, STREAM_SUB); + return open_external_file(mpctx, filename, opts->sub_demuxer_name, 0, + STREAM_SUB); } static void print_timeline(struct MPContext *mpctx) @@ -4303,7 +4245,6 @@ goto_reopen_demuxer: ; open_subtitles_from_options(mpctx); open_audiofiles_from_options(mpctx); - open_subfiles_from_options(mpctx); check_previous_track_selection(mpctx); diff --git a/core/options.c b/core/options.c index 2e0d32bb60..3ce4315572 100644 --- a/core/options.c +++ b/core/options.c @@ -406,7 +406,6 @@ const m_option_t mp_opts[] = { // demuxer.c - select audio/sub file/demuxer OPT_STRING("audiofile", audio_stream, 0), OPT_INTRANGE("audiofile-cache", audio_stream_cache, 0, 50, 65536), - OPT_STRING("subfile", sub_stream, 0), OPT_STRING("demuxer", demuxer_name, 0), OPT_STRING("audio-demuxer", audio_demuxer_name, 0), OPT_STRING("sub-demuxer", sub_demuxer_name, 0), diff --git a/core/options.h b/core/options.h index c31d2063c9..6ec051ddca 100644 --- a/core/options.h +++ b/core/options.h @@ -150,7 +150,6 @@ typedef struct MPOpts { char *audio_stream; int audio_stream_cache; - char *sub_stream; char *demuxer_name; char *audio_demuxer_name; char *sub_demuxer_name; -- cgit v1.2.3 From f735a03346e8ec743bc89d5bdbaafd62dc0f084d Mon Sep 17 00:00:00 2001 From: wm4 Date: Sun, 23 Jun 2013 22:15:04 +0200 Subject: sub: add subtitle charset conversion This code was once part of subreader.c, then traveled to libass, and now made its way back to the fork of the fork of the original code, MPlayer. It works pretty much the same as subreader.c, except that we have to concatenate some packets to do auto-detection. This is rather annoying, but for all we know the actual source file could be a binary format. Unlike subreader.c, the iconv context is reopened on each packet. This is simpler, and with respect to multibyte encodings, more robust. Reopening is probably not a very fast, but I suspect subtitle charset conversion is not an operation that happens often or has to be fast. Also, this auto-detection is disabled for microdvd - this is the only format we know that has binary data in its packets, but is actually decoded to text. FFmpeg doesn't really allow us to solve this properly, because a) the input packets can be binary, and b) the output will be checked whether it's UTF-8, and if it's not, the output is thrown away and an error message is printed. We could just recode the decoded subtitles before sd_ass if it weren't for that. --- core/charset_conv.c | 240 ++++++++++++++++++++++++++++++++++++++++++++++++++++ core/charset_conv.h | 17 ++++ 2 files changed, 257 insertions(+) create mode 100644 core/charset_conv.c create mode 100644 core/charset_conv.h (limited to 'core') diff --git a/core/charset_conv.c b/core/charset_conv.c new file mode 100644 index 0000000000..15209b30ea --- /dev/null +++ b/core/charset_conv.c @@ -0,0 +1,240 @@ +/* + * This file is part of mpv. + * + * Based on code taken from libass (ISC license), which was originally part + * of MPlayer (GPL). + * Copyright (C) 2006 Evgeniy Stepanov + * + * mpv is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with mpv. If not, see . + */ + +#include +#include +#include + +#include "config.h" + +#include "core/mp_msg.h" + +#ifdef CONFIG_ENCA +#include +#endif + +#ifdef CONFIG_ICONV +#include +#endif + +#include "charset_conv.h" + +// Split the string on ':' into components. +// out_arr is at least max entries long. +// Return number of out_arr entries filled. +static int split_colon(const char *user_cp, int max, bstr *out_arr) +{ + if (!user_cp || max < 1) + return 0; + + int count = 0; + while (1) { + const char *next = strchr(user_cp, ':'); + if (next && max - count > 1) { + out_arr[count++] = (bstr){(char *)user_cp, next - user_cp}; + user_cp = next + 1; + } else { + out_arr[count++] = (bstr){(char *)user_cp, strlen(user_cp)}; + break; + } + } + return count; +} + +// Returns true if user_cp implies that calling mp_charset_guess() on the +// input data is required to determine the real codepage. This is the case +// if user_cp is not a real iconv codepage, but a magic value that requests +// for example ENCA charset auto-detection. +bool mp_charset_requires_guess(const char *user_cp) +{ + bstr res[2] = {{0}}; + split_colon(user_cp, 2, res); + return bstrcasecmp0(res[0], "enca") == 0; +} + +#ifdef CONFIG_ENCA +static const char *enca_guess(bstr buf, const char *language) +{ + if (!language || !language[0]) + language = "__"; // neutral language + + const char *detected_cp = NULL; + + EncaAnalyser analyser = enca_analyser_alloc(language); + if (analyser) { + enca_set_termination_strictness(analyser, 0); + EncaEncoding enc = enca_analyse_const(analyser, buf.start, buf.len); + const char *tmp = enca_charset_name(enc.charset, ENCA_NAME_STYLE_ICONV); + if (tmp && enc.charset != ENCA_CS_UNKNOWN) + detected_cp = tmp; + enca_analyser_free(analyser); + } else { + mp_msg(MSGT_SUBREADER, MSGL_ERR, "ENCA doesn't know language '%s'\n", + language); + size_t langcnt; + const char **languages = enca_get_languages(&langcnt); + mp_msg(MSGT_SUBREADER, MSGL_ERR, "ENCA supported languages:"); + for (int i = 0; i < langcnt; i++) + mp_msg(MSGT_SUBREADER, MSGL_ERR, " %s", languages[i]); + mp_msg(MSGT_SUBREADER, MSGL_ERR, "\n"); + free(languages); + } + + return detected_cp; +} +#endif + +// Runs charset auto-detection on the input buffer, and returns the result. +// If auto-detection fails, NULL is returned. +// If user_cp doesn't refer to any known auto-detection (for example because +// it's a real iconv codepage), user_cp is returned without even looking at +// the buf data. +const char *mp_charset_guess(bstr buf, const char *user_cp) +{ + if (!mp_charset_requires_guess(user_cp)) + return user_cp; + + bstr params[3] = {{0}}; + split_colon(user_cp, 3, params); + + bstr type = params[0]; + char lang[100]; + snprintf(lang, sizeof(lang), "%.*s", BSTR_P(params[1])); + const char *fallback = params[2].start; // last item, already 0-terminated + + const char *res = NULL; + +#ifdef CONFIG_ENCA + if (bstrcasecmp0(type, "enca") == 0) + res = enca_guess(buf, lang); +#endif + + if (res) { + mp_msg(MSGT_SUBREADER, MSGL_DBG2, "%.*s detected charset: '%s'\n", + BSTR_P(type), res); + } else { + res = fallback; + mp_msg(MSGT_SUBREADER, MSGL_DBG2, + "Detection with %.*s failed: fallback to %s\n", + BSTR_P(type), res && res[0] ? res : "no conversion"); + } + + return res; +} + +// Convert the data in buf to UTF-8. The charset argument can be an iconv +// codepage, a value returned by mp_charset_conv_guess(), or a special value +// that triggers autodetection of the charset (e.g. using ENCA). +// The auto-detection is the only difference to mp_iconv_to_utf8(). +// buf: same as mp_iconv_to_utf8() +// user_cp: iconv codepage, special value, NULL +// flags: same as mp_iconv_to_utf8() +// returns: same as mp_iconv_to_utf8() +bstr mp_charset_guess_and_conv_to_utf8(bstr buf, const char *user_cp, int flags) +{ + return mp_iconv_to_utf8(buf, mp_charset_guess(buf, user_cp), flags); +} + +// Use iconv to convert buf to UTF-8. +// Returns buf.start==NULL on error. Returns buf if cp is NULL, or if there is +// obviously no conversion required (e.g. if cp is "UTF-8"). +// Returns a newly allocated buffer if conversion is done and succeeds. The +// buffer will be terminated with 0 for convenience (the terminating 0 is not +// included in the returned length). +// Free the returned buffer with talloc_free(). +// buf: input data +// cp: iconv codepage (or NULL) +// flags: combination of MP_ICONV_* flags +// returns: buf (no conversion), .start==NULL (error), or allocated buffer +bstr mp_iconv_to_utf8(bstr buf, const char *cp, int flags) +{ +#ifdef CONFIG_ICONV + const char *tocp = "UTF-8"; + + if (!cp || !cp[0] || strcasecmp(cp, tocp) == 0) + return buf; + + if (strcasecmp(cp, "ASCII") == 0) + return buf; + + iconv_t icdsc; + if ((icdsc = iconv_open(tocp, cp)) == (iconv_t) (-1)) { + if (flags & MP_ICONV_VERBOSE) + mp_msg(MSGT_SUBREADER, MSGL_ERR, + "Error opening iconv with codepage '%s'\n", cp); + goto failure; + } + + size_t size = buf.len; + size_t osize = size; + size_t ileft = size; + size_t oleft = size - 1; + + char *outbuf = talloc_size(NULL, osize); + char *ip = buf.start; + char *op = outbuf; + + while (1) { + int clear = 0; + size_t rc; + if (ileft) + rc = iconv(icdsc, &ip, &ileft, &op, &oleft); + else { + clear = 1; // clear the conversion state and leave + rc = iconv(icdsc, NULL, NULL, &op, &oleft); + } + if (rc == (size_t) (-1)) { + if (errno == E2BIG) { + size_t offset = op - outbuf; + outbuf = talloc_realloc_size(NULL, outbuf, osize + size); + op = outbuf + offset; + osize += size; + oleft += size; + } else { + if (errno == EINVAL && (flags & MP_ICONV_ALLOW_CUTOFF)) { + // This is intended for cases where the input buffer is cut + // at a random byte position. If this happens in the middle + // of the buffer, it should still be an error. We say it's + // fine if the error is within 10 bytes of the end. + if (ileft <= 10) + break; + } + if (flags & MP_ICONV_VERBOSE) { + mp_msg(MSGT_SUBREADER, MSGL_ERR, + "Error recoding text with codepage '%s'\n", cp); + } + talloc_free(outbuf); + iconv_close(icdsc); + goto failure; + } + } else if (clear) + break; + } + + iconv_close(icdsc); + + outbuf[osize - oleft - 1] = 0; + return (bstr){outbuf, osize - oleft - 1}; +#endif + +failure: + return (bstr){0}; +} diff --git a/core/charset_conv.h b/core/charset_conv.h new file mode 100644 index 0000000000..00a2658da3 --- /dev/null +++ b/core/charset_conv.h @@ -0,0 +1,17 @@ +#ifndef MP_CHARSET_CONV_H +#define MP_CHARSET_CONV_H + +#include +#include "core/bstr.h" + +enum { + MP_ICONV_VERBOSE = 1, // print errors instead of failing silently + MP_ICONV_ALLOW_CUTOFF = 2, // allow partial input data +}; + +bool mp_charset_requires_guess(const char *user_cp); +const char *mp_charset_guess(bstr buf, const char *user_cp); +bstr mp_charset_guess_and_conv_to_utf8(bstr buf, const char *user_cp, int flags); +bstr mp_iconv_to_utf8(bstr buf, const char *cp, int flags); + +#endif -- cgit v1.2.3 From f48829b546095bd33e243332c965ab58d6481160 Mon Sep 17 00:00:00 2001 From: wm4 Date: Mon, 24 Jun 2013 23:06:34 +0200 Subject: sub: libguess support for -subcp Actually this is rather disappointing. --- core/charset_conv.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'core') diff --git a/core/charset_conv.c b/core/charset_conv.c index 15209b30ea..680c8f83f9 100644 --- a/core/charset_conv.c +++ b/core/charset_conv.c @@ -31,6 +31,10 @@ #include #endif +#ifdef CONFIG_LIBGUESS +#include +#endif + #ifdef CONFIG_ICONV #include #endif @@ -67,7 +71,8 @@ bool mp_charset_requires_guess(const char *user_cp) { bstr res[2] = {{0}}; split_colon(user_cp, 2, res); - return bstrcasecmp0(res[0], "enca") == 0; + return bstrcasecmp0(res[0], "enca") == 0 || + bstrcasecmp0(res[0], "guess") == 0; } #ifdef CONFIG_ENCA @@ -102,6 +107,23 @@ static const char *enca_guess(bstr buf, const char *language) } #endif +#ifdef CONFIG_LIBGUESS +static const char *libguess_guess(bstr buf, const char *language) +{ + if (libguess_validate_utf8(buf.start, buf.len)) + return "UTF-8"; + + if (!language || !language[0] || strcmp(language, "help") == 0) { + mp_msg(MSGT_SUBREADER, MSGL_ERR, "libguess needs a language: " + "japanese taiwanese chinese korean russian arabic turkish " + "greek hebrew polish baltic\n"); + return NULL; + } + + return libguess_determine_encoding(buf.start, buf.len, language); +} +#endif + // Runs charset auto-detection on the input buffer, and returns the result. // If auto-detection fails, NULL is returned. // If user_cp doesn't refer to any known auto-detection (for example because @@ -126,6 +148,10 @@ const char *mp_charset_guess(bstr buf, const char *user_cp) if (bstrcasecmp0(type, "enca") == 0) res = enca_guess(buf, lang); #endif +#ifdef CONFIG_LIBGUESS + if (bstrcasecmp0(type, "guess") == 0) + res = libguess_guess(buf, lang); +#endif if (res) { mp_msg(MSGT_SUBREADER, MSGL_DBG2, "%.*s detected charset: '%s'\n", -- cgit v1.2.3 From 00de44eec90e45f4801e45d636b6759e1fdb9d2f Mon Sep 17 00:00:00 2001 From: wm4 Date: Tue, 25 Jun 2013 00:03:37 +0200 Subject: options: add -sub-speed option Should we actually get into trouble for unproper handling of frame-based subtitle formats, this might be the simplest way to work this around. Also is a bit more intuitive than -subfps, which might use an unknown, misdetected, or non-sense video FPS. Still pretty silly, though. --- core/options.c | 2 ++ core/options.h | 1 + 2 files changed, 3 insertions(+) (limited to 'core') diff --git a/core/options.c b/core/options.c index 3ce4315572..40c8527394 100644 --- a/core/options.c +++ b/core/options.c @@ -492,6 +492,7 @@ const m_option_t mp_opts[] = { OPT_STRING("subcp", sub_cp, 0), OPT_FLOAT("sub-delay", sub_delay, 0), OPT_FLOAT("subfps", sub_fps, 0), + OPT_FLOAT("sub-speed", sub_speed, 0), OPT_FLAG("autosub", sub_auto, 0), OPT_FLAG("sub-visibility", sub_visibility, 0), OPT_FLAG("sub-forced-only", forced_subs_only, 0), @@ -786,6 +787,7 @@ const struct MPOpts mp_default_opts = { .audio_display = 1, .sub_visibility = 1, .sub_pos = 100, + .sub_speed = 1.0, .extension_parsing = 1, .audio_output_channels = MP_CHMAP_INIT_STEREO, .audio_output_format = -1, // AF_FORMAT_UNKNOWN diff --git a/core/options.h b/core/options.h index 6ec051ddca..0c6f6c7271 100644 --- a/core/options.h +++ b/core/options.h @@ -141,6 +141,7 @@ typedef struct MPOpts { int sub_pos; float sub_delay; float sub_fps; + float sub_speed; int forced_subs_only; char *quvi_format; -- cgit v1.2.3