swresample: limit output size of audio frames

Similar to the previous commit, and for the same reasons. Unlike with af_scaletempo, resampling does not have a natural frame size, so we set an arbitrary size limit on output frames. We add a new option to control this size, although I'm not sure whether anyone will use it, so mark it for testing only. Note that we go through some effort to avoid buffering data in libswresample itself. One reason is that we might have to reinitialize the resampler completely when changing speed, which drops the buffered data. Another is that I'm not sure whether the resampler will do the right thing when applying dynamic speed changes.
author: wm4 <wm4@nowhere> 2018-02-01 08:39:07 +0100
committer: Kevin Mitchell <kevmitch@gmail.com> 2018-02-03 05:01:29 -0800
commit: 7019e0dcfe9f6edddfe8b20f062f8bb94110b3a0 (patch)
tree: 899fa5381652a5b3ace5478a57935bff7772dfdd
parent: 171ec0a7e40030f599b7b385b3699835e8c1eb2d (diff)
download: mpv-7019e0dcfe9f6edddfe8b20f062f8bb94110b3a0.tar.bz2
mpv-7019e0dcfe9f6edddfe8b20f062f8bb94110b3a0.tar.xz
3 files changed, 67 insertions, 35 deletions
diff --git a/DOCS/man/options.rst b/DOCS/man/options.rst
index e44a69235b..b0e4f8a9a9 100644
--- a/DOCS/man/options.rst
+++ b/DOCS/man/options.rst
@@ -3496,6 +3496,15 @@ It also sets the defaults for the ``lavrresample`` audio filter.
     (decoder downmixing), or in the audio output (system mixer), this has no
     effect.
 
+``--audio-resample-max-output-size=<length>``
+    Limit maximum size of audio frames filtered at once, in ms (default: 40).
+    The output size size is limited in order to make resample speed changes
+    react faster. This is necessary especially if decoders or filters output
+    very large frame sizes (like some lossless codecs or some DRC filters).
+    This option does not affect the resampling algorithm in any way.
+
+    For testing/debugging only. Can be removed or changed any time.
+
 ``--audio-swresample-o=<string>``
     Set AVOptions on the SwrContext or AVAudioResampleContext. These should
     be documented by FFmpeg or Libav.
diff --git a/filters/f_swresample.c b/filters/f_swresample.c
index 48bd08d847..593a052a76 100644
--- a/filters/f_swresample.c
+++ b/filters/f_swresample.c
@@ -80,6 +80,7 @@ struct priv {
     struct mp_chmap out_channels;
 
     double current_pts;
+    struct mp_aframe *input;
 
     double cmd_speed;
     double speed;
@@ -96,6 +97,7 @@ const struct m_sub_options resample_conf = {
         OPT_DOUBLE("audio-resample-cutoff", cutoff, M_OPT_RANGE,
                    .min = 0, .max = 1),
         OPT_FLAG("audio-normalize-downmix", normalize, 0),
+        OPT_DOUBLE("audio-resample-max-output-size", max_output_frame_size, 0),
         OPT_KEYVALUELIST("audio-swresample-o", avopts, 0),
         {0}
     },
@@ -357,6 +359,7 @@ static void reset(struct mp_filter *f)
     struct priv *p = f->priv;
 
     p->current_pts = MP_NOPTS_VALUE;
+    TA_FREEP(&p->input);
 
     if (!p->avrctx)
         return;
@@ -426,7 +429,8 @@ static bool reorder_planes(struct mp_aframe *mpa, int *reorder,
 }
 
 static int resample_frame(struct AVAudioResampleContext *r,
-                          struct mp_aframe *out, struct mp_aframe *in)
+                          struct mp_aframe *out, struct mp_aframe *in,
+                          int consume_in)
 {
     // Be aware that the channel layout and count can be different for in and
     // out frames. In some situations the caller will fix up the frames before
@@ -439,7 +443,7 @@ static int resample_frame(struct AVAudioResampleContext *r,
         av_o ? av_o->nb_samples : 0,
         av_i ? av_i->extended_data : NULL,
         av_i ? av_i->linesize[0] : 0,
-        av_i ? av_i->nb_samples : 0);
+        av_i ? MPMIN(av_i->nb_samples, consume_in) : 0);
 }
 
 static struct mp_frame filter_resample_output(struct priv *p,
@@ -450,7 +454,15 @@ static struct mp_frame filter_resample_output(struct priv *p,
     if (!p->avrctx)
         goto error;
 
-    int samples = get_out_samples(p, in ? mp_aframe_get_size(in) : 0);
+    // Limit the filtered data size for better latency when changing speed.
+    // Avoid buffering data within the resampler => restrict input size.
+    // p->in_rate already includes the speed factor.
+    double s = p->opts->max_output_frame_size / 1000 * p->in_rate;
+    int max_in = lrint(MPCLAMP(s, 128, INT_MAX));
+    int consume_in = in ? mp_aframe_get_size(in) : 0;
+    consume_in = MPMIN(consume_in, max_in);
+
+    int samples = get_out_samples(p, consume_in);
     out = mp_aframe_create();
     mp_aframe_config_copy(out, p->pool_fmt);
     if (mp_aframe_pool_allocate(p->out_pool, out, samples) < 0)
@@ -458,7 +470,7 @@ static struct mp_frame filter_resample_output(struct priv *p,
 
     int out_samples = 0;
     if (samples) {
-        out_samples = resample_frame(p->avrctx, out, in);
+        out_samples = resample_frame(p->avrctx, out, in, consume_in);
         if (out_samples < 0 || out_samples > samples)
             goto error;
         mp_aframe_set_size(out, out_samples);
@@ -479,7 +491,7 @@ static struct mp_frame filter_resample_output(struct priv *p,
         }
         int got = 0;
         if (out_samples)
-            got = resample_frame(p->avrctx_out, new, out);
+            got = resample_frame(p->avrctx_out, new, out, out_samples);
         talloc_free(out);
         out = new;
         if (got != out_samples)
@@ -491,12 +503,14 @@ static struct mp_frame filter_resample_output(struct priv *p,
     if (in) {
         mp_aframe_copy_attributes(out, in);
         p->current_pts = mp_aframe_end_pts(in);
+        mp_aframe_skip_samples(in, consume_in);
     }
 
     if (out_samples) {
         if (p->current_pts != MP_NOPTS_VALUE) {
             double delay = get_delay(p) * mp_aframe_get_speed(out) +
-                           mp_aframe_duration(out);
+                           mp_aframe_duration(out) +
+                           (p->input ? mp_aframe_duration(p->input) : 0);
             mp_aframe_set_pts(out, p->current_pts - delay);
             mp_aframe_mul_speed(out, p->speed);
         }
@@ -516,30 +530,36 @@ static void process(struct mp_filter *f)
 {
     struct priv *p = f->priv;
 
-    if (!mp_pin_can_transfer_data(f->ppins[1], f->ppins[0]))
+    if (!mp_pin_in_needs_data(f->ppins[1]))
         return;
 
     p->speed = p->cmd_speed * p->public.speed;
 
-    struct mp_frame frame = mp_pin_out_read(f->ppins[0]);
-
     struct mp_aframe *input = NULL;
-    if (frame.type == MP_FRAME_AUDIO) {
-        input = frame.data;
-    } else if (frame.type != MP_FRAME_EOF) {
-        MP_ERR(p, "Unsupported frame type.\n");
-        mp_frame_unref(&frame);
-        mp_filter_internal_mark_failed(f);
-        return;
-    }
+    if (!p->input) {
+        struct mp_frame frame = mp_pin_out_read(f->ppins[0]);
+
+        if (frame.type == MP_FRAME_AUDIO) {
+            input = frame.data;
+        } else if (!frame.type) {
+            return; // no new data
+        } else if (frame.type != MP_FRAME_EOF) {
+            MP_ERR(p, "Unsupported frame type.\n");
+            mp_frame_unref(&frame);
+            mp_filter_internal_mark_failed(f);
+            return;
+        }
 
-    if (!input && !p->avrctx) {
-        // Obviously no draining needed.
-        mp_pin_in_write(f->ppins[1], MP_EOF_FRAME);
-        return;
+        if (!input && !p->avrctx) {
+            // Obviously no draining needed.
+            mp_pin_in_write(f->ppins[1], MP_EOF_FRAME);
+            return;
+        }
     }
 
     if (input) {
+        assert(!p->input);
+
         struct mp_swresample *s = &p->public;
 
         int in_rate = mp_aframe_get_rate(input);
@@ -549,7 +569,7 @@ static void process(struct mp_filter *f)
 
         if (!in_rate || !in_format || !in_channels.num) {
             MP_ERR(p, "Frame with invalid format unsupported\n");
-            mp_frame_unref(&frame);
+            talloc_free(input);
             mp_filter_internal_mark_failed(f);
             return;
         }
@@ -573,7 +593,8 @@ static void process(struct mp_filter *f)
                 if (out.type) {
                     mp_pin_in_write(f->ppins[1], out);
                     // continue filtering next time.
-                    mp_pin_out_unread(f->ppins[0], frame);
+                    mp_pin_out_unread(f->ppins[0],
+                                      MAKE_FRAME(MP_FRAME_AUDIO, input));
                     input = NULL;
                 }
             }
@@ -598,6 +619,8 @@ static void process(struct mp_filter *f)
                 return;
             }
         }
+
+        p->input = input;
     }
 
     int new_rate = rate_from_speed(p->in_rate_user, p->speed);
@@ -624,32 +647,29 @@ static void process(struct mp_filter *f)
         // in the resampler.
         struct mp_frame out = filter_resample_output(p, NULL);
         bool need_drain = !!out.type;
-        if (need_drain) {
+        if (need_drain)
             mp_pin_in_write(f->ppins[1], out);
-            // Drain; continue filtering next time.
-            mp_pin_out_unread(f->ppins[0], frame);
-        }
         // Reinitialize resampler.
         configure_lavrr(p, false);
-        if (need_drain) {
-            mp_filter_internal_mark_progress(f);
+        // If we've written output, we must continue filtering next time.
+        if (need_drain)
             return;
-        }
     }
 
-    struct mp_frame out = filter_resample_output(p, input);
+    struct mp_frame out = filter_resample_output(p, p->input);
 
-    if (input && out.type) {
+    if (p->input && out.type) {
         mp_pin_in_write(f->ppins[1], out);
         mp_pin_out_request_data(f->ppins[0]);
-    } else if (!input && out.type) {
+    } else if (!p->input && out.type) {
         mp_pin_in_write(f->ppins[1], out);
         mp_pin_out_repeat_eof(f->ppins[0]);
-    } else if (!input) {
+    } else if (!p->input) {
         mp_pin_in_write(f->ppins[1], MP_EOF_FRAME);
     }
 
-    talloc_free(input);
+    if (p->input && !mp_aframe_get_size(p->input))
+        TA_FREEP(&p->input);
 }
 
 double mp_swresample_get_delay(struct mp_swresample *s)
@@ -676,6 +696,7 @@ static void destroy(struct mp_filter *f)
     struct priv *p = f->priv;
 
     close_lavrr(p);
+    TA_FREEP(&p->input);
 }
 
 static const struct mp_filter_info swresample_filter = {
diff --git a/filters/f_swresample.h b/filters/f_swresample.h
index 44b2e35d08..10d6176a56 100644
--- a/filters/f_swresample.h
+++ b/filters/f_swresample.h
@@ -23,6 +23,7 @@ struct mp_resample_opts {
     double cutoff;
     int normalize;
     int allow_passthrough;
+    double max_output_frame_size;
     char **avopts;
 };
 
@@ -31,6 +32,7 @@ struct mp_resample_opts {
     .cutoff      = 0.0,         \
     .phase_shift = 10,          \
     .normalize   = 0,           \
+    .max_output_frame_size = 40,\
     }
 
 // Create the filter. If opts==NULL, use the global options as defaults.
author	wm4 <wm4@nowhere>	2018-02-01 08:39:07 +0100
committer	Kevin Mitchell <kevmitch@gmail.com>	2018-02-03 05:01:29 -0800
commit	7019e0dcfe9f6edddfe8b20f062f8bb94110b3a0 (patch)
tree	899fa5381652a5b3ace5478a57935bff7772dfdd
parent	171ec0a7e40030f599b7b385b3699835e8c1eb2d (diff)
download	mpv-7019e0dcfe9f6edddfe8b20f062f8bb94110b3a0.tar.bz2 mpv-7019e0dcfe9f6edddfe8b20f062f8bb94110b3a0.tar.xz