diff options
Diffstat (limited to 'sub/dec_sub.c')
-rw-r--r-- | sub/dec_sub.c | 71 |
1 files changed, 69 insertions, 2 deletions
diff --git a/sub/dec_sub.c b/sub/dec_sub.c index 54f3c1ebfe..2b4bfc2e8d 100644 --- a/sub/dec_sub.c +++ b/sub/dec_sub.c @@ -18,6 +18,7 @@ #include <stdlib.h> #include <stdbool.h> +#include <string.h> #include <assert.h> #include "config.h" @@ -27,6 +28,7 @@ #include "dec_sub.h" #include "core/options.h" #include "core/mp_msg.h" +#include "core/charset_conv.h" extern const struct sd_functions sd_ass; extern const struct sd_functions sd_lavc; @@ -56,6 +58,7 @@ struct dec_sub { struct sd init_sd; double video_fps; + const char *charset; struct sd *sd[MAX_NUM_SD]; int num_sd; @@ -196,6 +199,37 @@ void sub_init_from_sh(struct dec_sub *sub, struct sh_sub *sh) sh->gsh->codec ? sh->gsh->codec : "<unknown>"); } +static const char *guess_sub_cp(struct packet_list *subs, const char *usercp) +{ + if (!mp_charset_requires_guess(usercp)) + return usercp; + + // Concat all subs into a buffer. We can't probably do much better without + // having the original data (which we don't, not anymore). + int max_size = 2 * 1024 * 1024; + const char *sep = "\n\n"; // In utf-16: U+0A0A GURMUKHI LETTER UU + int sep_len = strlen(sep); + int num_pkt = 0; + int size = 0; + for (int n = 0; n < subs->num_packets; n++) { + struct demux_packet *pkt = subs->packets[n]; + if (size + pkt->len > max_size) + break; + size += pkt->len + sep_len; + num_pkt++; + } + bstr text = {talloc_size(NULL, size), 0}; + for (int n = 0; n < num_pkt; n++) { + struct demux_packet *pkt = subs->packets[n]; + memcpy(text.start + text.len, pkt->buffer, pkt->len); + memcpy(text.start + text.len + pkt->len, sep, sep_len); + text.len += pkt->len + sep_len; + } + const char *guess = mp_charset_guess(text, usercp); + talloc_free(text.start); + return guess; +} + static void multiply_timings(struct packet_list *subs, double factor) { for (int n = 0; n < subs->num_packets; n++) { @@ -262,6 +296,7 @@ bool sub_read_all_packets(struct dec_sub *sub, struct sh_sub *sh) if (!sub_accept_packets_in_advance(sub) || sh->track) return false; + const char *codec = sh->gsh->codec ? sh->gsh->codec : ""; void *tmp = talloc_new(NULL); struct packet_list subs = {0}; @@ -275,6 +310,14 @@ bool sub_read_all_packets(struct dec_sub *sub, struct sh_sub *sh) MP_TARRAY_APPEND(tmp, subs.packets, subs.num_packets, pkt); } + // Can't run auto-detection on movtext packets: it's the only codec that + // even though it decodes to text has binary input data. + if (opts->sub_cp && strcmp(codec, "movtext") != 0) + sub->charset = guess_sub_cp(&subs, opts->sub_cp); + + if (sub->charset) + mp_msg(MSGT_OSD, MSGL_INFO, "Using subtitle charset: %s\n", sub->charset); + // 23.976 FPS is used as default timebase for frame based formats if (sub->video_fps && sh->frame_based) multiply_timings(&subs, sub->video_fps / 23.976); @@ -313,10 +356,34 @@ static void decode_next(struct dec_sub *sub, int n, struct demux_packet *packet) } } +static struct demux_packet *recode_packet(struct demux_packet *in, + const char *charset) +{ + struct demux_packet *pkt = NULL; + bstr in_buf = {in->buffer, in->len}; + bstr conv = mp_iconv_to_utf8(in_buf, charset, MP_ICONV_VERBOSE); + if (conv.start && conv.start != in_buf.start) { + pkt = talloc_ptrtype(NULL, pkt); + talloc_steal(pkt, conv.start); + *pkt = (struct demux_packet) { + .buffer = conv.start, + .len = conv.len, + .pts = in->pts, + .duration = in->duration, + .avpacket = in->avpacket, // questionable, but gives us sidedata + }; + } + return pkt; +} + void sub_decode(struct dec_sub *sub, struct demux_packet *packet) { - if (sub->num_sd > 0) - decode_next(sub, 0, packet); + if (sub->num_sd > 0) { + struct demux_packet *recoded = NULL; + if (sub->charset) + recoded = recode_packet(packet, sub->charset); + decode_next(sub, 0, recoded ? recoded : packet); + } } void sub_get_bitmaps(struct dec_sub *sub, struct mp_osd_res dim, double pts, |