af_scaletempo: output minimally sized audio frame

This helps the filter to adapt much faster to speed changes. Before this commit, the filter just converted and output the full input frame, which could cause problems with large input frames. This was made worse by certain filters like dynaudnorm or loudnorm outputting pretty large frames. This commit changes the filter from trying to convert all input at once to only outputting a single internally filtered frame. Internally, this filter already output data in units of 60ms by default (controlled by the "stride" sub-option), and concatenated as many output frames as necessary to consume all input. Behavior is still kind of bad when inserting the filter. This is because the large frames can be buffered up after the insertion point, so the speed change will be performed with a larger latency. The scaletempo filter can't do anything against this, although it can be fixed by inserting scaletempo as user filter as part of --af.
author: wm4 <wm4@nowhere> 2018-02-01 07:08:06 +0100
committer: Kevin Mitchell <kevmitch@gmail.com> 2018-02-03 05:01:29 -0800
commit: 171ec0a7e40030f599b7b385b3699835e8c1eb2d (patch)
tree: 2e139743fce9c826091690d18ff8cd5179f493c1
parent: debc17663d677526bd67a7faea5233e1a49078e4 (diff)
download: mpv-171ec0a7e40030f599b7b385b3699835e8c1eb2d.tar.bz2
mpv-171ec0a7e40030f599b7b385b3699835e8c1eb2d.tar.xz
1 files changed, 76 insertions, 57 deletions
diff --git a/audio/filter/af_scaletempo.c b/audio/filter/af_scaletempo.c
index 4e48e6168b..ed1df5725e 100644
--- a/audio/filter/af_scaletempo.c
+++ b/audio/filter/af_scaletempo.c
@@ -60,6 +60,7 @@ struct priv {
     struct mp_aframe *cur_format;
     struct mp_aframe_pool *out_pool;
     double current_pts;
+    struct mp_aframe *in;
 
     // stride
     float scale;
@@ -90,12 +91,13 @@ struct priv {
     int (*best_overlap_offset)(struct priv *s);
 };
 
-static bool reinit(struct mp_filter *f, struct mp_aframe *in);
+static bool reinit(struct mp_filter *f);
 
-static int fill_queue(struct priv *s, struct mp_aframe *in, int offset)
+// Return whether it got enough data for filtering.
+static bool fill_queue(struct priv *s)
 {
-    int bytes_in = in ? mp_aframe_get_size(in) * s->bytes_per_frame - offset : 0;
-    int offset_unchanged = offset;
+    int bytes_in = s->in ? mp_aframe_get_size(s->in) * s->bytes_per_frame : 0;
+    int offset = 0;
 
     if (s->bytes_to_slide > 0) {
         if (s->bytes_to_slide < s->bytes_queued) {
@@ -114,16 +116,22 @@ static int fill_queue(struct priv *s, struct mp_aframe *in, int offset)
         }
     }
 
-    if (bytes_in > 0) {
-        int bytes_copy = MPMIN(s->bytes_queue - s->bytes_queued, bytes_in);
-        assert(bytes_copy >= 0);
-        uint8_t **planes = mp_aframe_get_data_ro(in);
+    int bytes_needed = s->bytes_queue - s->bytes_queued;
+    assert(bytes_needed >= 0);
+
+    int bytes_copy = MPMIN(bytes_needed, bytes_in);
+    if (bytes_copy > 0) {
+        uint8_t **planes = mp_aframe_get_data_ro(s->in);
         memcpy(s->buf_queue + s->bytes_queued, planes[0] + offset, bytes_copy);
         s->bytes_queued += bytes_copy;
         offset += bytes_copy;
+        bytes_needed -= bytes_copy;
     }
 
-    return offset - offset_unchanged;
+    if (s->in)
+        mp_aframe_skip_samples(s->in, offset / s->bytes_per_frame);
+
+    return bytes_needed == 0;
 }
 
 #define UNROLL_PADDING (4 * 4)
@@ -224,62 +232,73 @@ static void process(struct mp_filter *f)
 {
     struct priv *s = f->priv;
 
-    if (!mp_pin_can_transfer_data(f->ppins[1], s->in_pin))
+    if (!mp_pin_in_needs_data(f->ppins[1]))
         return;
 
-    struct mp_aframe *in = NULL, *out = NULL;
-
-    struct mp_frame frame = mp_pin_out_read(s->in_pin);
-    if (frame.type != MP_FRAME_AUDIO && frame.type != MP_FRAME_EOF) {
-        MP_ERR(f, "unexpected frame type\n");
-        goto error;
-    }
+    struct mp_aframe *out = NULL;
+
+    bool drain = false;
+    bool is_eof = false;
+    if (!s->in) {
+        struct mp_frame frame = mp_pin_out_read(s->in_pin);
+        if (!frame.type)
+            return; // no input yet
+        if (frame.type != MP_FRAME_AUDIO && frame.type != MP_FRAME_EOF) {
+            MP_ERR(f, "unexpected frame type\n");
+            goto error;
+        }
 
-    in = frame.type == MP_FRAME_AUDIO ? frame.data : NULL;
-    bool is_eof = !in;
+        s->in = frame.type == MP_FRAME_AUDIO ? frame.data : NULL;
+        is_eof = drain = !s->in;
 
-    // EOF before it was even initialized once.
-    if (is_eof && !mp_aframe_config_is_valid(s->cur_format)) {
-        mp_pin_in_write(f->ppins[1], MP_EOF_FRAME);
-        return;
-    }
+        // EOF before it was even initialized once.
+        if (is_eof && !mp_aframe_config_is_valid(s->cur_format)) {
+            mp_pin_in_write(f->ppins[1], MP_EOF_FRAME);
+            return;
+        }
 
-    if (in && !mp_aframe_config_equals(in, s->cur_format)) {
-        if (s->bytes_queued) {
-            // Drain remaining data before executing the format change.
-            MP_VERBOSE(f, "draining\n");
-            mp_pin_out_unread(s->in_pin, frame);
-            in = NULL;
-        } else {
-            if (!reinit(f, in)) {
-                MP_ERR(f, "initialization failed\n");
-                goto error;
+        if (s->in && !mp_aframe_config_equals(s->in, s->cur_format)) {
+            if (s->bytes_queued) {
+                // Drain remaining data before executing the format change.
+                MP_VERBOSE(f, "draining\n");
+                mp_pin_out_unread(s->in_pin, frame);
+                s->in = NULL;
+                drain = true;
+            } else {
+                if (!reinit(f)) {
+                    MP_ERR(f, "initialization failed\n");
+                    goto error;
+                }
             }
         }
+
+        if (s->in)
+            s->current_pts = mp_aframe_end_pts(s->in);
     }
 
-    int in_samples = in ? mp_aframe_get_size(in) : 0;
+    if (!fill_queue(s) && !drain) {
+        TA_FREEP(&s->in);
+        mp_pin_out_request_data_next(s->in_pin);
+        return;
+    }
 
-    int max_out_samples =
-        ((int)(in_samples / s->frames_stride_scaled) + 1) * s->frames_stride;
-    if (!in)
+    int max_out_samples = s->bytes_stride / s->bytes_per_frame;
+    if (drain)
         max_out_samples += s->bytes_queued;
+
     out = mp_aframe_new_ref(s->cur_format);
     if (mp_aframe_pool_allocate(s->out_pool, out, max_out_samples) < 0)
         goto error;
 
-    if (in) {
-        mp_aframe_copy_attributes(out, in);
-        s->current_pts = mp_aframe_end_pts(in);
-    }
+    if (s->in)
+        mp_aframe_copy_attributes(out, s->in);
 
-    int offset_in = fill_queue(s, in, 0);
     uint8_t **out_planes = mp_aframe_get_data_rw(out);
     if (!out_planes)
         goto error;
     int8_t *pout = out_planes[0];
     int out_offset = 0;
-    while (s->bytes_queued >= s->bytes_queue) {
+    if (s->bytes_queued >= s->bytes_queue) {
         int ti;
         float tf;
         int bytes_off = 0;
@@ -303,11 +322,9 @@ static void process(struct mp_filter *f)
         ti = (int)tf;
         s->frames_stride_error = tf - ti;
         s->bytes_to_slide = ti * s->bytes_per_frame;
-
-        offset_in += fill_queue(s, in, offset_in);
     }
     // Drain remaining buffered data.
-    if (!in && s->bytes_queued) {
+    if (drain && s->bytes_queued) {
         memcpy(pout + out_offset, s->buf_queue, s->bytes_queued);
         out_offset += s->bytes_queued;
         s->bytes_queued = 0;
@@ -317,8 +334,9 @@ static void process(struct mp_filter *f)
     // This filter can have a negative delay when scale > 1:
     // output corresponding to some length of input can be decided and written
     // after receiving only a part of that input.
-    double delay = (out_offset * s->speed + s->bytes_queued - s->bytes_to_slide) /
-                    s->bytes_per_frame / mp_aframe_get_effective_rate(out);
+    float delay = (out_offset * s->speed + s->bytes_queued - s->bytes_to_slide) /
+                    s->bytes_per_frame / mp_aframe_get_effective_rate(out)
+                  + (s->in ? mp_aframe_duration(s->in) : 0);
 
     if (s->current_pts != MP_NOPTS_VALUE)
         mp_aframe_set_pts(out, s->current_pts - delay);
@@ -333,17 +351,16 @@ static void process(struct mp_filter *f)
     } else if (is_eof && !out) {
         mp_pin_in_write(f->ppins[1], MP_EOF_FRAME);
     } else if (!is_eof && !out) {
-        mp_pin_out_request_data(s->in_pin);
+        mp_pin_out_request_data_next(s->in_pin);
     }
 
     if (out)
         mp_pin_in_write(f->ppins[1], MAKE_FRAME(MP_FRAME_AUDIO, out));
 
-    talloc_free(in);
     return;
 
 error:
-    talloc_free(in);
+    TA_FREEP(&s->in);
     talloc_free(out);
     mp_filter_internal_mark_failed(f);
 }
@@ -359,15 +376,15 @@ static void update_speed(struct priv *s, float speed)
     s->frames_stride_error = MPMIN(s->frames_stride_error, s->frames_stride_scaled);
 }
 
-static bool reinit(struct mp_filter *f, struct mp_aframe *in)
+static bool reinit(struct mp_filter *f)
 {
     struct priv *s = f->priv;
 
     mp_aframe_reset(s->cur_format);
 
-    float srate  = mp_aframe_get_rate(in) / 1000.0;
-    int nch = mp_aframe_get_channels(in);
-    int format = mp_aframe_get_format(in);
+    float srate  = mp_aframe_get_rate(s->in) / 1000.0;
+    int nch = mp_aframe_get_channels(s->in);
+    int format = mp_aframe_get_format(s->in);
 
     int use_int = 0;
     if (format == AF_FORMAT_S16) {
@@ -488,7 +505,7 @@ static bool reinit(struct mp_filter *f, struct mp_aframe *in)
            (int)(s->bytes_queue / nch / bps),
            (use_int ? "s16" : "float"));
 
-    mp_aframe_config_copy(s->cur_format, in);
+    mp_aframe_config_copy(s->cur_format, s->in);
 
     return true;
 }
@@ -521,6 +538,7 @@ static void reset(struct mp_filter *f)
     s->bytes_to_slide = 0;
     s->frames_stride_error = 0;
     memset(s->buf_overlap, 0, s->bytes_overlap);
+    TA_FREEP(&s->in);
 }
 
 static void destroy(struct mp_filter *f)
@@ -531,6 +549,7 @@ static void destroy(struct mp_filter *f)
     free(s->buf_pre_corr);
     free(s->table_blend);
     free(s->table_window);
+    TA_FREEP(&s->in);
     mp_filter_free_children(f);
 }
author	wm4 <wm4@nowhere>	2018-02-01 07:08:06 +0100
committer	Kevin Mitchell <kevmitch@gmail.com>	2018-02-03 05:01:29 -0800
commit	171ec0a7e40030f599b7b385b3699835e8c1eb2d (patch)
tree	2e139743fce9c826091690d18ff8cd5179f493c1
parent	debc17663d677526bd67a7faea5233e1a49078e4 (diff)
download	mpv-171ec0a7e40030f599b7b385b3699835e8c1eb2d.tar.bz2 mpv-171ec0a7e40030f599b7b385b3699835e8c1eb2d.tar.xz