From a70d575291d48289669ee8989e0597a94189dd8d Mon Sep 17 00:00:00 2001
From: wm4 <wm4@nowhere>
Date: Tue, 11 Jun 2013 21:39:54 +0200
Subject: sub: preload external text subtitles

If a subtitle is external, read it completely and add all subtitle
events in advance when the subtitle track is selected. This is done
for text subtitles only. (Note that subreader.c and subtitles loaded
with libass are different and don't have anything to do with this
commit.)
---
 sub/dec_sub.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 59 insertions(+), 1 deletion(-)

(limited to 'sub/dec_sub.c')
diff --git a/sub/dec_sub.c b/sub/dec_sub.c
index b72630470c..187ae5f22c 100644
--- a/sub/dec_sub.c
+++ b/sub/dec_sub.c
@@ -21,7 +21,7 @@
 #include <assert.h>
 
 #include "config.h"
-#include "demux/stheader.h"
+#include "demux/demux.h"
 #include "sd.h"
 #include "sub.h"
 #include "dec_sub.h"
@@ -56,10 +56,17 @@ struct dec_sub {
     struct MPOpts *opts;
     struct sd init_sd;
 
+    double video_fps;
+
     struct sd *sd[MAX_NUM_SD];
     int num_sd;
 };
 
+struct packet_list {
+    struct demux_packet **packets;
+    int num_packets;
+};
+
 struct dec_sub *sub_create(struct MPOpts *opts)
 {
     struct dec_sub *sub = talloc_zero(NULL, struct dec_sub);
@@ -102,6 +109,11 @@ void sub_set_video_res(struct dec_sub *sub, int w, int h)
     sub->init_sd.sub_video_h = h;
 }
 
+void sub_set_video_fps(struct dec_sub *sub, double fps)
+{
+    sub->video_fps = fps;
+}
+
 void sub_set_extradata(struct dec_sub *sub, void *data, int data_len)
 {
     sub->init_sd.extradata = data_len ? talloc_memdup(sub, data, data_len) : NULL;
@@ -249,6 +261,52 @@ void sub_init_from_sh(struct dec_sub *sub, struct sh_sub *sh)
            sh->gsh->codec ? sh->gsh->codec : "<unknown>");
 }
 
+static void add_sub_list(struct dec_sub *sub, struct packet_list *subs)
+{
+    struct sd *sd = sub_get_last_sd(sub);
+    assert(sd);
+
+    sd->no_remove_duplicates = true;
+
+    for (int n = 0; n < subs->num_packets; n++)
+        sub_decode(sub, subs->packets[n]);
+
+    // Hack for broken FFmpeg packet format: make sd_ass keep the subtitle
+    // events on reset(), even if broken FFmpeg ASS packets were received
+    // (from sd_lavc_conv.c). Normally, these events are removed on seek/reset,
+    // but this is obviously unwanted in this case.
+    if (sd->driver->fix_events)
+        sd->driver->fix_events(sd);
+
+    sd->no_remove_duplicates = false;
+}
+
+// Read all packets from the demuxer and decode/add them. Returns false if
+// there are circumstances which makes this not possible.
+bool sub_read_all_packets(struct dec_sub *sub, struct sh_sub *sh)
+{
+    if (!sub_accept_packets_in_advance(sub) || sh->track)
+        return false;
+
+    void *tmp = talloc_new(NULL);
+    struct packet_list subs = {0};
+
+    for (;;) {
+        ds_get_next_pts(sh->ds);
+        struct demux_packet *pkt = ds_get_packet_sub(sh->ds);
+        if (!pkt)
+            break;
+        pkt = demux_copy_packet(pkt);
+        talloc_steal(tmp, pkt);
+        MP_TARRAY_APPEND(tmp, subs.packets, subs.num_packets, pkt);
+    }
+
+    add_sub_list(sub, &subs);
+
+    talloc_free(tmp);
+    return true;
+}
+
 bool sub_accept_packets_in_advance(struct dec_sub *sub)
 {
     // Converters are assumed to always accept packets in advance
-- 
cgit v1.2.3


From 64b1374a4456435cc4486a8153703fa89af58e31 Mon Sep 17 00:00:00 2001
From: wm4 <wm4@nowhere>
Date: Tue, 11 Jun 2013 21:41:50 +0200
Subject: sub: do some timing postprocessing on preloaded subs

This fixes the -subfps option (which unfortunately is still useful),
and fixes minor annoying timing errors (which unfortunately still
happen).

Note that none of these affect ASS or image subtitles. ASS is specially
handled: libass loads subtitles as ASS_Track. There are no actual
packets passed around, and sd_ass just uses the ASS_Track.

Disable the --sub-no-text-pp option. It's misleading now and always was
completely useless.
---
 sub/dec_sub.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

(limited to 'sub/dec_sub.c')

diff --git a/sub/dec_sub.c b/sub/dec_sub.c
index 187ae5f22c..230dfa2f55 100644
--- a/sub/dec_sub.c
+++ b/sub/dec_sub.c
@@ -261,6 +261,43 @@ void sub_init_from_sh(struct dec_sub *sub, struct sh_sub *sh)
            sh->gsh->codec ? sh->gsh->codec : "<unknown>");
 }
 
+static void multiply_timings(struct packet_list *subs, double factor)
+{
+    for (int n = 0; n < subs->num_packets; n++) {
+        struct demux_packet *pkt = subs->packets[n];
+        if (pkt->pts != MP_NOPTS_VALUE)
+            pkt->pts *= factor;
+        if (pkt->duration > 0)
+            pkt->duration *= factor;
+    }
+}
+
+// Remove overlaps and fill gaps between adjacent subtitle packets. This is done
+// by adjusting the duration of the earlier packet. If the gaps or overlap are
+// larger than the threshold, or if the durations are close to the threshold,
+// don't change the events.
+// The algorithm is maximally naive and doesn't work if there are multiple
+// overlapping lines. (It's not worth the trouble.)
+static void fix_overlaps_and_gaps(struct packet_list *subs)
+{
+    double threshold = 0.2;     // up to 200 ms overlaps or gaps are removed
+    double keep = threshold * 2;// don't change timings if durations are smaller
+    for (int i = 0; i < subs->num_packets - 1; i++) {
+        struct demux_packet *cur = subs->packets[i];
+        struct demux_packet *next = subs->packets[i + 1];
+        if (cur->pts != MP_NOPTS_VALUE && cur->duration > 0 &&
+            next->pts != MP_NOPTS_VALUE && next->duration > 0)
+        {
+            double end = cur->pts + cur->duration;
+            if (fabs(next->pts - end) <= threshold && cur->duration >= keep &&
+                next->duration >= keep)
+            {
+                cur->duration = next->pts - cur->pts;
+            }
+        }
+    }
+}
+
 static void add_sub_list(struct dec_sub *sub, struct packet_list *subs)
 {
     struct sd *sd = sub_get_last_sd(sub);
@@ -285,6 +322,8 @@ static void add_sub_list(struct dec_sub *sub, struct packet_list *subs)
 // there are circumstances which makes this not possible.
 bool sub_read_all_packets(struct dec_sub *sub, struct sh_sub *sh)
 {
+    struct MPOpts *opts = sub->opts;
+
     if (!sub_accept_packets_in_advance(sub) || sh->track)
         return false;
 
@@ -301,6 +340,12 @@ bool sub_read_all_packets(struct dec_sub *sub, struct sh_sub *sh)
         MP_TARRAY_APPEND(tmp, subs.packets, subs.num_packets, pkt);
     }
 
+    if (opts->sub_fps && sub->video_fps)
+        multiply_timings(&subs, opts->sub_fps / sub->video_fps);
+
+    if (!opts->suboverlap_enabled)
+        fix_overlaps_and_gaps(&subs);
+
     add_sub_list(sub, &subs);
 
     talloc_free(tmp);
-- 
cgit v1.2.3


From db2e1ef4d210f5a8a4a2555d0a78b0a4dea103ec Mon Sep 17 00:00:00 2001
From: wm4 <wm4@nowhere>
Date: Fri, 21 Jun 2013 00:26:05 +0200
Subject: Move/rename subreader.c

---
 sub/dec_sub.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'sub/dec_sub.c')

diff --git a/sub/dec_sub.c b/sub/dec_sub.c
index 230dfa2f55..bfd6d90d03 100644
--- a/sub/dec_sub.c
+++ b/sub/dec_sub.c
@@ -25,7 +25,7 @@
 #include "sd.h"
 #include "sub.h"
 #include "dec_sub.h"
-#include "subreader.h"
+#include "demux/subreader.h"
 #include "core/options.h"
 #include "core/mp_msg.h"
 
-- 
cgit v1.2.3


From 98388c0c073906f4485420485e27a14e8d957a2d Mon Sep 17 00:00:00 2001
From: wm4 <wm4@nowhere>
Date: Fri, 21 Jun 2013 21:34:55 +0200
Subject: subreader: turn into actual demuxer

subreader.c (before this commit renamed to demux_subreader.c) was
special cased to the -sub option. The plan is using the normal demuxer
codepath for all subtitle formats (so we can prefer libavformat demuxers
for most formats).

There are some subtle changes. The probe size is restricted to 32 KB
(instead of unlimitted + giving up after 100 lines of input). For
formats like MicroDVD, the video FPS isn't used anymore, because it's
not available on the subtitle demuxer level. Instead, hardcode it to
23.976 FPS (libavformat seems to do the same). The user can probably
still use -sub-fps to fix the timing. Checking the file extension for
".utf"/".utf8"/".utf-8" is simply removed (seems worthless, was in the
way, and I've never seen this anywhere).
---
 sub/dec_sub.c | 65 -----------------------------------------------------------
 1 file changed, 65 deletions(-)

(limited to 'sub/dec_sub.c')

diff --git a/sub/dec_sub.c b/sub/dec_sub.c
index bfd6d90d03..dd1168d0ad 100644
--- a/sub/dec_sub.c
+++ b/sub/dec_sub.c
@@ -25,7 +25,6 @@
 #include "sd.h"
 #include "sub.h"
 #include "dec_sub.h"
-#include "demux/subreader.h"
 #include "core/options.h"
 #include "core/mp_msg.h"
 
@@ -138,68 +137,6 @@ static void print_chain(struct dec_sub *sub)
     mp_msg(MSGT_OSD, MSGL_V, "\n");
 }
 
-// Subtitles read with subreader.c
-static void read_sub_data(struct dec_sub *sub, struct sub_data *subdata)
-{
-    assert(sub_accept_packets_in_advance(sub));
-    char *temp = NULL;
-
-    struct sd *sd = sub_get_last_sd(sub);
-
-    sd->no_remove_duplicates = true;
-
-    for (int i = 0; i < subdata->sub_num; i++) {
-        subtitle *st = &subdata->subtitles[i];
-        // subdata is in 10 ms ticks, pts is in seconds
-        double t = subdata->sub_uses_time ? 0.01 : (1 / subdata->fallback_fps);
-
-        int len = 0;
-        for (int j = 0; j < st->lines; j++)
-            len += st->text[j] ? strlen(st->text[j]) : 0;
-
-        len += 2 * st->lines;   // '\N', including the one after the last line
-        len += 6;               // {\anX}
-        len += 1;               // '\0'
-
-        if (talloc_get_size(temp) < len) {
-            talloc_free(temp);
-            temp = talloc_array(NULL, char, len);
-        }
-
-        char *p = temp;
-        char *end = p + len;
-
-        if (st->alignment)
-            p += snprintf(p, end - p, "{\\an%d}", st->alignment);
-
-        for (int j = 0; j < st->lines; j++)
-            p += snprintf(p, end - p, "%s\\N", st->text[j]);
-
-        if (st->lines > 0)
-            p -= 2;             // remove last "\N"
-        *p = 0;
-
-        struct demux_packet pkt = {0};
-        pkt.pts = st->start * t;
-        pkt.duration = (st->end - st->start) * t;
-        pkt.buffer = temp;
-        pkt.len = strlen(temp);
-
-        sub_decode(sub, &pkt);
-    }
-
-    // Hack for broken FFmpeg packet format: make sd_ass keep the subtitle
-    // events on reset(), even though broken FFmpeg ASS packets were received
-    // (from sd_lavc_conv.c). Normally, these events are removed on seek/reset,
-    // but this is obviously unwanted in this case.
-    if (sd && sd->driver->fix_events)
-        sd->driver->fix_events(sd);
-
-    sd->no_remove_duplicates = false;
-
-    talloc_free(temp);
-}
-
 static int sub_init_decoder(struct dec_sub *sub, struct sd *sd)
 {
     sd->driver = NULL;
@@ -242,8 +179,6 @@ void sub_init_from_sh(struct dec_sub *sub, struct sh_sub *sh)
         // Try adding new converters until a decoder is reached
         if (sd->driver->get_bitmaps || sd->driver->get_text) {
             print_chain(sub);
-            if (sh->sub_data)
-                read_sub_data(sub, sh->sub_data);
             return;
         }
         init_sd = (struct sd) {
-- 
cgit v1.2.3


From feb64c2717139f030974823756f51cbe215ef818 Mon Sep 17 00:00:00 2001
From: wm4 <wm4@nowhere>
Date: Sun, 23 Jun 2013 22:14:43 +0200
Subject: sub: attempt to use video FPS for frame based subtitle formats

This only affects demux_subreader.c for now. Maybe there is some hope
this can be used for libavformat demuxers too, but I'm not sure yet.
---
 sub/dec_sub.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'sub/dec_sub.c')

diff --git a/sub/dec_sub.c b/sub/dec_sub.c
index dd1168d0ad..54f3c1ebfe 100644
--- a/sub/dec_sub.c
+++ b/sub/dec_sub.c
@@ -275,6 +275,10 @@ bool sub_read_all_packets(struct dec_sub *sub, struct sh_sub *sh)
         MP_TARRAY_APPEND(tmp, subs.packets, subs.num_packets, pkt);
     }
 
+    // 23.976 FPS is used as default timebase for frame based formats
+    if (sub->video_fps && sh->frame_based)
+        multiply_timings(&subs, sub->video_fps / 23.976);
+
     if (opts->sub_fps && sub->video_fps)
         multiply_timings(&subs, opts->sub_fps / sub->video_fps);
 
-- 
cgit v1.2.3


From f735a03346e8ec743bc89d5bdbaafd62dc0f084d Mon Sep 17 00:00:00 2001
From: wm4 <wm4@nowhere>
Date: Sun, 23 Jun 2013 22:15:04 +0200
Subject: sub: add subtitle charset conversion

This code was once part of subreader.c, then traveled to libass, and now
made its way back to the fork of the fork of the original code, MPlayer.

It works pretty much the same as subreader.c, except that we have to
concatenate some packets to do auto-detection. This is rather annoying,
but for all we know the actual source file could be a binary format.

Unlike subreader.c, the iconv context is reopened on each packet. This
is simpler, and with respect to multibyte encodings, more robust.
Reopening is probably not a very fast, but I suspect subtitle charset
conversion is not an operation that happens often or has to be fast.

Also, this auto-detection is disabled for microdvd - this is the only
format we know that has binary data in its packets, but is actually
decoded to text. FFmpeg doesn't really allow us to solve this properly,
because a) the input packets can be binary, and b) the output will be
checked whether it's UTF-8, and if it's not, the output is thrown away
and an error message is printed. We could just recode the decoded
subtitles before sd_ass if it weren't for that.
---
 sub/dec_sub.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 69 insertions(+), 2 deletions(-)

(limited to 'sub/dec_sub.c')

diff --git a/sub/dec_sub.c b/sub/dec_sub.c
index 54f3c1ebfe..2b4bfc2e8d 100644
--- a/sub/dec_sub.c
+++ b/sub/dec_sub.c
@@ -18,6 +18,7 @@
 
 #include <stdlib.h>
 #include <stdbool.h>
+#include <string.h>
 #include <assert.h>
 
 #include "config.h"
@@ -27,6 +28,7 @@
 #include "dec_sub.h"
 #include "core/options.h"
 #include "core/mp_msg.h"
+#include "core/charset_conv.h"
 
 extern const struct sd_functions sd_ass;
 extern const struct sd_functions sd_lavc;
@@ -56,6 +58,7 @@ struct dec_sub {
     struct sd init_sd;
 
     double video_fps;
+    const char *charset;
 
     struct sd *sd[MAX_NUM_SD];
     int num_sd;
@@ -196,6 +199,37 @@ void sub_init_from_sh(struct dec_sub *sub, struct sh_sub *sh)
            sh->gsh->codec ? sh->gsh->codec : "<unknown>");
 }
 
+static const char *guess_sub_cp(struct packet_list *subs, const char *usercp)
+{
+    if (!mp_charset_requires_guess(usercp))
+        return usercp;
+
+    // Concat all subs into a buffer. We can't probably do much better without
+    // having the original data (which we don't, not anymore).
+    int max_size = 2 * 1024 * 1024;
+    const char *sep = "\n\n"; // In utf-16: U+0A0A GURMUKHI LETTER UU
+    int sep_len = strlen(sep);
+    int num_pkt = 0;
+    int size = 0;
+    for (int n = 0; n < subs->num_packets; n++) {
+        struct demux_packet *pkt = subs->packets[n];
+        if (size + pkt->len > max_size)
+            break;
+        size += pkt->len + sep_len;
+        num_pkt++;
+    }
+    bstr text = {talloc_size(NULL, size), 0};
+    for (int n = 0; n < num_pkt; n++) {
+        struct demux_packet *pkt = subs->packets[n];
+        memcpy(text.start + text.len, pkt->buffer, pkt->len);
+        memcpy(text.start + text.len + pkt->len, sep, sep_len);
+        text.len += pkt->len + sep_len;
+    }
+    const char *guess = mp_charset_guess(text, usercp);
+    talloc_free(text.start);
+    return guess;
+}
+
 static void multiply_timings(struct packet_list *subs, double factor)
 {
     for (int n = 0; n < subs->num_packets; n++) {
@@ -262,6 +296,7 @@ bool sub_read_all_packets(struct dec_sub *sub, struct sh_sub *sh)
     if (!sub_accept_packets_in_advance(sub) || sh->track)
         return false;
 
+    const char *codec = sh->gsh->codec ? sh->gsh->codec : "";
     void *tmp = talloc_new(NULL);
     struct packet_list subs = {0};
 
@@ -275,6 +310,14 @@ bool sub_read_all_packets(struct dec_sub *sub, struct sh_sub *sh)
         MP_TARRAY_APPEND(tmp, subs.packets, subs.num_packets, pkt);
     }
 
+    // Can't run auto-detection on movtext packets: it's the only codec that
+    // even though it decodes to text has binary input data.
+    if (opts->sub_cp && strcmp(codec, "movtext") != 0)
+        sub->charset = guess_sub_cp(&subs, opts->sub_cp);
+
+    if (sub->charset)
+        mp_msg(MSGT_OSD, MSGL_INFO, "Using subtitle charset: %s\n", sub->charset);
+
     // 23.976 FPS is used as default timebase for frame based formats
     if (sub->video_fps && sh->frame_based)
         multiply_timings(&subs, sub->video_fps / 23.976);
@@ -313,10 +356,34 @@ static void decode_next(struct dec_sub *sub, int n, struct demux_packet *packet)
     }
 }
 
+static struct demux_packet *recode_packet(struct demux_packet *in,
+                                          const char *charset)
+{
+    struct demux_packet *pkt = NULL;
+    bstr in_buf = {in->buffer, in->len};
+    bstr conv = mp_iconv_to_utf8(in_buf, charset, MP_ICONV_VERBOSE);
+    if (conv.start && conv.start != in_buf.start) {
+        pkt = talloc_ptrtype(NULL, pkt);
+        talloc_steal(pkt, conv.start);
+        *pkt = (struct demux_packet) {
+            .buffer = conv.start,
+            .len = conv.len,
+            .pts = in->pts,
+            .duration = in->duration,
+            .avpacket = in->avpacket, // questionable, but gives us sidedata
+        };
+    }
+    return pkt;
+}
+
 void sub_decode(struct dec_sub *sub, struct demux_packet *packet)
 {
-    if (sub->num_sd > 0)
-        decode_next(sub, 0, packet);
+    if (sub->num_sd > 0) {
+        struct demux_packet *recoded = NULL;
+        if (sub->charset)
+            recoded = recode_packet(packet, sub->charset);
+        decode_next(sub, 0, recoded ? recoded : packet);
+    }
 }
 
 void sub_get_bitmaps(struct dec_sub *sub, struct mp_osd_res dim, double pts,
-- 
cgit v1.2.3


From 29cec6f98ba523025acdd1aea42795985e32306f Mon Sep 17 00:00:00 2001
From: wm4 <wm4@nowhere>
Date: Sun, 23 Jun 2013 22:16:47 +0200
Subject: sub: prevent subtitle conversion if subs are known UTF-8

Currently this happens only in an obscure case (reading UTF-16 files
with the old subreader).
---
 sub/dec_sub.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'sub/dec_sub.c')

diff --git a/sub/dec_sub.c b/sub/dec_sub.c
index 2b4bfc2e8d..6129e48c38 100644
--- a/sub/dec_sub.c
+++ b/sub/dec_sub.c
@@ -312,7 +312,7 @@ bool sub_read_all_packets(struct dec_sub *sub, struct sh_sub *sh)
 
     // Can't run auto-detection on movtext packets: it's the only codec that
     // even though it decodes to text has binary input data.
-    if (opts->sub_cp && strcmp(codec, "movtext") != 0)
+    if (opts->sub_cp && !sh->is_utf8 && strcmp(codec, "movtext") != 0)
         sub->charset = guess_sub_cp(&subs, opts->sub_cp);
 
     if (sub->charset)
-- 
cgit v1.2.3


From 5dcae2481d25064da6c0245494a574abba3fc084 Mon Sep 17 00:00:00 2001
From: wm4 <wm4@nowhere>
Date: Mon, 24 Jun 2013 00:47:08 +0200
Subject: dec_sub: move code around

---
 sub/dec_sub.c | 96 +++++++++++++++++++++++++++++++----------------------------
 1 file changed, 51 insertions(+), 45 deletions(-)

(limited to 'sub/dec_sub.c')

diff --git a/sub/dec_sub.c b/sub/dec_sub.c
index 6129e48c38..3d5f9bc088 100644
--- a/sub/dec_sub.c
+++ b/sub/dec_sub.c
@@ -199,6 +199,57 @@ void sub_init_from_sh(struct dec_sub *sub, struct sh_sub *sh)
            sh->gsh->codec ? sh->gsh->codec : "<unknown>");
 }
 
+static struct demux_packet *get_decoded_packet(struct sd *sd)
+{
+    return sd->driver->get_converted ? sd->driver->get_converted(sd) : NULL;
+}
+
+static void decode_chain(struct sd **sd, int num_sd, struct demux_packet *packet)
+{
+    if (num_sd == 0)
+        return;
+    struct sd *dec = sd[0];
+    dec->driver->decode(dec, packet);
+    if (num_sd > 1) {
+        while (1) {
+            struct demux_packet *next = get_decoded_packet(dec);
+            if (!next)
+                break;
+            decode_chain(sd + 1, num_sd - 1, next);
+        }
+    }
+}
+
+static struct demux_packet *recode_packet(struct demux_packet *in,
+                                          const char *charset)
+{
+    struct demux_packet *pkt = NULL;
+    bstr in_buf = {in->buffer, in->len};
+    bstr conv = mp_iconv_to_utf8(in_buf, charset, MP_ICONV_VERBOSE);
+    if (conv.start && conv.start != in_buf.start) {
+        pkt = talloc_ptrtype(NULL, pkt);
+        talloc_steal(pkt, conv.start);
+        *pkt = (struct demux_packet) {
+            .buffer = conv.start,
+            .len = conv.len,
+            .pts = in->pts,
+            .duration = in->duration,
+            .avpacket = in->avpacket, // questionable, but gives us sidedata
+        };
+    }
+    return pkt;
+}
+
+void sub_decode(struct dec_sub *sub, struct demux_packet *packet)
+{
+    if (sub->num_sd > 0) {
+        struct demux_packet *recoded = NULL;
+        if (sub->charset)
+            recoded = recode_packet(packet, sub->charset);
+        decode_chain(sub->sd, sub->num_sd, recoded ? recoded : packet);
+    }
+}
+
 static const char *guess_sub_cp(struct packet_list *subs, const char *usercp)
 {
     if (!mp_charset_requires_guess(usercp))
@@ -341,51 +392,6 @@ bool sub_accept_packets_in_advance(struct dec_sub *sub)
     return sd && sd->driver->accept_packets_in_advance;
 }
 
-static void decode_next(struct dec_sub *sub, int n, struct demux_packet *packet)
-{
-    struct sd *sd = sub->sd[n];
-    sd->driver->decode(sd, packet);
-    if (n + 1 >= sub->num_sd || !sd->driver->get_converted)
-        return;
-    while (1) {
-        struct demux_packet *next =
-            sd->driver->get_converted ? sd->driver->get_converted(sd) : NULL;
-        if (!next)
-            break;
-        decode_next(sub, n + 1, next);
-    }
-}
-
-static struct demux_packet *recode_packet(struct demux_packet *in,
-                                          const char *charset)
-{
-    struct demux_packet *pkt = NULL;
-    bstr in_buf = {in->buffer, in->len};
-    bstr conv = mp_iconv_to_utf8(in_buf, charset, MP_ICONV_VERBOSE);
-    if (conv.start && conv.start != in_buf.start) {
-        pkt = talloc_ptrtype(NULL, pkt);
-        talloc_steal(pkt, conv.start);
-        *pkt = (struct demux_packet) {
-            .buffer = conv.start,
-            .len = conv.len,
-            .pts = in->pts,
-            .duration = in->duration,
-            .avpacket = in->avpacket, // questionable, but gives us sidedata
-        };
-    }
-    return pkt;
-}
-
-void sub_decode(struct dec_sub *sub, struct demux_packet *packet)
-{
-    if (sub->num_sd > 0) {
-        struct demux_packet *recoded = NULL;
-        if (sub->charset)
-            recoded = recode_packet(packet, sub->charset);
-        decode_next(sub, 0, recoded ? recoded : packet);
-    }
-}
-
 void sub_get_bitmaps(struct dec_sub *sub, struct mp_osd_res dim, double pts,
                      struct sub_bitmaps *res)
 {
-- 
cgit v1.2.3


From 74c56309a33aa07fc79e193a619d8a47924da391 Mon Sep 17 00:00:00 2001
From: wm4 <wm4@nowhere>
Date: Mon, 24 Jun 2013 00:58:10 +0200
Subject: dec_sub: change sublist memory allocation

---
 sub/dec_sub.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'sub/dec_sub.c')

diff --git a/sub/dec_sub.c b/sub/dec_sub.c
index 3d5f9bc088..c7562fe014 100644
--- a/sub/dec_sub.c
+++ b/sub/dec_sub.c
@@ -348,8 +348,7 @@ bool sub_read_all_packets(struct dec_sub *sub, struct sh_sub *sh)
         return false;
 
     const char *codec = sh->gsh->codec ? sh->gsh->codec : "";
-    void *tmp = talloc_new(NULL);
-    struct packet_list subs = {0};
+    struct packet_list *subs = talloc_zero(NULL, struct packet_list);
 
     for (;;) {
         ds_get_next_pts(sh->ds);
@@ -357,31 +356,31 @@ bool sub_read_all_packets(struct dec_sub *sub, struct sh_sub *sh)
         if (!pkt)
             break;
         pkt = demux_copy_packet(pkt);
-        talloc_steal(tmp, pkt);
-        MP_TARRAY_APPEND(tmp, subs.packets, subs.num_packets, pkt);
+        talloc_steal(subs, pkt);
+        MP_TARRAY_APPEND(subs, subs->packets, subs->num_packets, pkt);
     }
 
     // Can't run auto-detection on movtext packets: it's the only codec that
     // even though it decodes to text has binary input data.
     if (opts->sub_cp && !sh->is_utf8 && strcmp(codec, "movtext") != 0)
-        sub->charset = guess_sub_cp(&subs, opts->sub_cp);
+        sub->charset = guess_sub_cp(subs, opts->sub_cp);
 
     if (sub->charset)
         mp_msg(MSGT_OSD, MSGL_INFO, "Using subtitle charset: %s\n", sub->charset);
 
     // 23.976 FPS is used as default timebase for frame based formats
     if (sub->video_fps && sh->frame_based)
-        multiply_timings(&subs, sub->video_fps / 23.976);
+        multiply_timings(subs, sub->video_fps / 23.976);
 
     if (opts->sub_fps && sub->video_fps)
-        multiply_timings(&subs, opts->sub_fps / sub->video_fps);
+        multiply_timings(subs, opts->sub_fps / sub->video_fps);
 
     if (!opts->suboverlap_enabled)
-        fix_overlaps_and_gaps(&subs);
+        fix_overlaps_and_gaps(subs);
 
-    add_sub_list(sub, &subs);
+    add_sub_list(sub, subs);
 
-    talloc_free(tmp);
+    talloc_free(subs);
     return true;
 }
 
-- 
cgit v1.2.3


From 0b2e073853beff170e8a9f8a5898e636cf636cf0 Mon Sep 17 00:00:00 2001
From: wm4 <wm4@nowhere>
Date: Mon, 24 Jun 2013 01:04:17 +0200
Subject: dec_sub: allow postprocessing between decoders

Until now, timing and charset recoding postprocessing was applied on
packets as they were output by the demuxer, and then passed to the
decoders. Make it so that postprocessing can happen after some decoders
in special situations.
---
 sub/dec_sub.c | 55 +++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 41 insertions(+), 14 deletions(-)

(limited to 'sub/dec_sub.c')

diff --git a/sub/dec_sub.c b/sub/dec_sub.c
index c7562fe014..01ba109ad0 100644
--- a/sub/dec_sub.c
+++ b/sub/dec_sub.c
@@ -240,16 +240,22 @@ static struct demux_packet *recode_packet(struct demux_packet *in,
     return pkt;
 }
 
-void sub_decode(struct dec_sub *sub, struct demux_packet *packet)
+static void decode_chain_recode(struct dec_sub *sub, struct sd **sd, int num_sd,
+                                struct demux_packet *packet)
 {
-    if (sub->num_sd > 0) {
+    if (num_sd > 0) {
         struct demux_packet *recoded = NULL;
         if (sub->charset)
             recoded = recode_packet(packet, sub->charset);
-        decode_chain(sub->sd, sub->num_sd, recoded ? recoded : packet);
+        decode_chain(sd, num_sd, recoded ? recoded : packet);
     }
 }
 
+void sub_decode(struct dec_sub *sub, struct demux_packet *packet)
+{
+    decode_chain_recode(sub, sub->sd, sub->num_sd, packet);
+}
+
 static const char *guess_sub_cp(struct packet_list *subs, const char *usercp)
 {
     if (!mp_charset_requires_guess(usercp))
@@ -318,7 +324,7 @@ static void fix_overlaps_and_gaps(struct packet_list *subs)
     }
 }
 
-static void add_sub_list(struct dec_sub *sub, struct packet_list *subs)
+static void add_sub_list(struct dec_sub *sub, int at, struct packet_list *subs)
 {
     struct sd *sd = sub_get_last_sd(sub);
     assert(sd);
@@ -326,7 +332,7 @@ static void add_sub_list(struct dec_sub *sub, struct packet_list *subs)
     sd->no_remove_duplicates = true;
 
     for (int n = 0; n < subs->num_packets; n++)
-        sub_decode(sub, subs->packets[n]);
+        decode_chain_recode(sub, sub->sd + at, sub->num_sd - at, subs->packets[n]);
 
     // Hack for broken FFmpeg packet format: make sd_ass keep the subtitle
     // events on reset(), even if broken FFmpeg ASS packets were received
@@ -338,31 +344,52 @@ static void add_sub_list(struct dec_sub *sub, struct packet_list *subs)
     sd->no_remove_duplicates = false;
 }
 
+static void add_packet(struct packet_list *subs, struct demux_packet *pkt)
+{
+    pkt = demux_copy_packet(pkt);
+    talloc_steal(subs, pkt);
+    MP_TARRAY_APPEND(subs, subs->packets, subs->num_packets, pkt);
+}
+
 // Read all packets from the demuxer and decode/add them. Returns false if
 // there are circumstances which makes this not possible.
 bool sub_read_all_packets(struct dec_sub *sub, struct sh_sub *sh)
 {
     struct MPOpts *opts = sub->opts;
 
-    if (!sub_accept_packets_in_advance(sub) || sh->track)
+    if (!sub_accept_packets_in_advance(sub) || sh->track || sub->num_sd < 1)
         return false;
 
-    const char *codec = sh->gsh->codec ? sh->gsh->codec : "";
     struct packet_list *subs = talloc_zero(NULL, struct packet_list);
 
+    // In some cases, we want to put the packets through a decoder first.
+    // Preprocess until sub->sd[preprocess].
+    int preprocess = 0;
+
+    // movtext is currently the only subtitle format that has text output,
+    // but binary input. Do charset conversion after converting to text.
+    if (sub->sd[0]->driver == &sd_movtext)
+        preprocess = 1;
+
     for (;;) {
         ds_get_next_pts(sh->ds);
         struct demux_packet *pkt = ds_get_packet_sub(sh->ds);
         if (!pkt)
             break;
-        pkt = demux_copy_packet(pkt);
-        talloc_steal(subs, pkt);
-        MP_TARRAY_APPEND(subs, subs->packets, subs->num_packets, pkt);
+        if (preprocess) {
+            decode_chain(sub->sd, preprocess, pkt);
+            while (1) {
+                pkt = get_decoded_packet(sub->sd[preprocess - 1]);
+                if (!pkt)
+                    break;
+                add_packet(subs, pkt);
+            }
+        } else {
+            add_packet(subs, pkt);
+        }
     }
 
-    // Can't run auto-detection on movtext packets: it's the only codec that
-    // even though it decodes to text has binary input data.
-    if (opts->sub_cp && !sh->is_utf8 && strcmp(codec, "movtext") != 0)
+    if (opts->sub_cp && !sh->is_utf8)
         sub->charset = guess_sub_cp(subs, opts->sub_cp);
 
     if (sub->charset)
@@ -378,7 +405,7 @@ bool sub_read_all_packets(struct dec_sub *sub, struct sh_sub *sh)
     if (!opts->suboverlap_enabled)
         fix_overlaps_and_gaps(subs);
 
-    add_sub_list(sub, subs);
+    add_sub_list(sub, preprocess, subs);
 
     talloc_free(subs);
     return true;
-- 
cgit v1.2.3


From 709389ce653d5ab11abf8de067cfb5932e642898 Mon Sep 17 00:00:00 2001
From: wm4 <wm4@nowhere>
Date: Mon, 24 Jun 2013 02:06:55 +0200
Subject: sub: add hack for Libav SRT demuxer

Before this commit, SRT demuxing and display actually happened to work
on Libav. But it was using the libavcodec srt converter (which is
essentially unmaintained in Libav), and timing postprocessing didn't
work. For some background explanations see sd_lavf_srt.c.
---
 sub/dec_sub.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'sub/dec_sub.c')

diff --git a/sub/dec_sub.c b/sub/dec_sub.c
index 01ba109ad0..56ed2a3d6f 100644
--- a/sub/dec_sub.c
+++ b/sub/dec_sub.c
@@ -36,6 +36,7 @@ extern const struct sd_functions sd_spu;
 extern const struct sd_functions sd_movtext;
 extern const struct sd_functions sd_srt;
 extern const struct sd_functions sd_microdvd;
+extern const struct sd_functions sd_lavf_srt;
 extern const struct sd_functions sd_lavc_conv;
 
 static const struct sd_functions *sd_list[] = {
@@ -47,6 +48,7 @@ static const struct sd_functions *sd_list[] = {
     &sd_movtext,
     &sd_srt,
     &sd_microdvd,
+    &sd_lavf_srt,
     &sd_lavc_conv,
     NULL
 };
@@ -134,7 +136,7 @@ static void print_chain(struct dec_sub *sub)
     mp_msg(MSGT_OSD, MSGL_V, "Subtitle filter chain: ");
     for (int n = 0; n < sub->num_sd; n++) {
         struct sd *sd = sub->sd[n];
-        mp_msg(MSGT_OSD, MSGL_V, "%s%s (%s)", n > 0 ? " -> " : "", 
+        mp_msg(MSGT_OSD, MSGL_V, "%s%s (%s)", n > 0 ? " -> " : "",
                sd->driver->name, sd->codec);
     }
     mp_msg(MSGT_OSD, MSGL_V, "\n");
@@ -371,6 +373,10 @@ bool sub_read_all_packets(struct dec_sub *sub, struct sh_sub *sh)
     if (sub->sd[0]->driver == &sd_movtext)
         preprocess = 1;
 
+    // Broken Libav libavformat srt packet format (fix timestamps first).
+    if (sub->sd[0]->driver == &sd_lavf_srt)
+        preprocess = 1;
+
     for (;;) {
         ds_get_next_pts(sh->ds);
         struct demux_packet *pkt = ds_get_packet_sub(sh->ds);
-- 
cgit v1.2.3


From 125c20bd081a7d99681f3ac25174c1360b885436 Mon Sep 17 00:00:00 2001
From: wm4 <wm4@nowhere>
Date: Mon, 24 Jun 2013 23:52:53 +0200
Subject: dec_sub: add hack to display last MicroDVD subtitle event

The old subreader.c infrastructure handled this in a similar way.
---
 sub/dec_sub.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'sub/dec_sub.c')

diff --git a/sub/dec_sub.c b/sub/dec_sub.c
index 56ed2a3d6f..968ca3e39f 100644
--- a/sub/dec_sub.c
+++ b/sub/dec_sub.c
@@ -411,6 +411,17 @@ bool sub_read_all_packets(struct dec_sub *sub, struct sh_sub *sh)
     if (!opts->suboverlap_enabled)
         fix_overlaps_and_gaps(subs);
 
+    if (sh->gsh->codec && strcmp(sh->gsh->codec, "microdvd") == 0) {
+        // The last subtitle event in MicroDVD subs can have duration unset,
+        // which means show the subtitle until end of video.
+        // See FFmpeg FATE MicroDVD_capability_tester.sub
+        if (subs->num_packets) {
+            struct demux_packet *last = subs->packets[subs->num_packets - 1];
+            if (last->duration <= 0)
+                last->duration = 10; // arbitrary
+        }
+    }
+
     add_sub_list(sub, preprocess, subs);
 
     talloc_free(subs);
-- 
cgit v1.2.3


From 00de44eec90e45f4801e45d636b6759e1fdb9d2f Mon Sep 17 00:00:00 2001
From: wm4 <wm4@nowhere>
Date: Tue, 25 Jun 2013 00:03:37 +0200
Subject: options: add -sub-speed option

Should we actually get into trouble for unproper handling of
frame-based subtitle formats, this might be the simplest way to
work this around. Also is a bit more intuitive than -subfps, which
might use an unknown, misdetected, or non-sense video FPS.
Still pretty silly, though.
---
 sub/dec_sub.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'sub/dec_sub.c')

diff --git a/sub/dec_sub.c b/sub/dec_sub.c
index 968ca3e39f..a1392017a2 100644
--- a/sub/dec_sub.c
+++ b/sub/dec_sub.c
@@ -401,12 +401,19 @@ bool sub_read_all_packets(struct dec_sub *sub, struct sh_sub *sh)
     if (sub->charset)
         mp_msg(MSGT_OSD, MSGL_INFO, "Using subtitle charset: %s\n", sub->charset);
 
+    double sub_speed = 1.0;
+
     // 23.976 FPS is used as default timebase for frame based formats
     if (sub->video_fps && sh->frame_based)
-        multiply_timings(subs, sub->video_fps / 23.976);
+        sub_speed *= sub->video_fps / 23.976;
 
     if (opts->sub_fps && sub->video_fps)
-        multiply_timings(subs, opts->sub_fps / sub->video_fps);
+        sub_speed *= opts->sub_fps / sub->video_fps;
+
+    sub_speed *= opts->sub_speed;
+
+    if (sub_speed != 1.0)
+        multiply_timings(subs, sub_speed);
 
     if (!opts->suboverlap_enabled)
         fix_overlaps_and_gaps(subs);
-- 
cgit v1.2.3