audio: change format negotiation, remove channel remix fudging

The audio format neogitation code was pretty complicated, although the idea was simple: when the format changes (or on the first audio frame), filter only the new frame through the entire filter chain, discard the resulting frame, but use the format to initialize the AO. This was useful for "fudging" the channel remix behavior (upmix or downmix), and moving it before other filters. Apparently this was useful for things like DRC filters, which might work better in stereo, and which also can only achieve the desired volume levels by doing it before a downmix, which would modify the volume. This mechanism was introduced in commit 60048b7eb957b (which the commit message also describes as "idiotic heuristic"). Knowing the output format is inherently necessary for this, because otherwise we can't know what the hell the user defined filters will do. There were problems with robustness. Some filters needed more than one frame. Resampling in particular would discard initial audio at high resampling ratios. Some filters might drop audio intentionally (like clipping data on timestamp ranges). There were also allegations that some decoders output 0 length frames (although that is invalid in libavcodec). The state machine was excessively complex and hard to understand too. There are 3 things that could have been done: 1. Fix robustness problems by doing more heuristics, like repeating audio frames or simply decoding several frames. Since filters can behave differently, this would have added lots of complexity. 2. Make use of libavfilter's format negotiation, and add the same to mpv builtin filters. This is sort of annoying, because the format negotiation in libavfilter changes the state of the filters. It also reports only some parameters (mostly all for audio, but a lot of holes for video). It would remove some of the state machine, but not all. 3. Drop the channel remix fudging, and do the same as the video chain. This would not require format negotiation, but instead you can just filter the audio frames, and look what comes out of it. If nothing comes out, simply never create an AO. This commit selects option 3. It removes the remix fudging, which means the loss of a feature. Users can instead add "--af=format=channels=2" before their DRC filter, or something. I'm also considering changing the default for --audio-channels back to stereo, and downmix in the decoder or at the start of the filter chain, which would give the same results, except requiring more configuration. Implementation-wise, this is still a bit different from the video path. The VO always remains the same instance, while the AO might have to be recreated on configuration changes. This still requires explicit format change handling + draining old data, but by putting it into f_autoconvert, not much new code is needed.
author: wm4 <wm4@nowhere> 2018-04-07 14:38:40 +0200
committer: Jan Ekström <jeebjp@gmail.com> 2018-04-15 23:11:33 +0300
commit: 3c123281a7eb3181a9706a52d0148f70aa65cff5 (patch)
tree: 86fe71febdffb4bead600b47371f28c7838ff3d2
parent: 4b48966d87d7759346f989c44e8ab3cb405d0037 (diff)
download: mpv-3c123281a7eb3181a9706a52d0148f70aa65cff5.tar.bz2
mpv-3c123281a7eb3181a9706a52d0148f70aa65cff5.tar.xz
3 files changed, 89 insertions, 229 deletions
diff --git a/filters/f_autoconvert.c b/filters/f_autoconvert.c
index ee72474f09..78b716623e 100644
--- a/filters/f_autoconvert.c
+++ b/filters/f_autoconvert.c
@@ -45,6 +45,9 @@ struct priv {
     double audio_speed;
     bool resampling_forced;
 
+    bool format_change_blocked;
+    bool format_change_cont;
+
     struct mp_autoconvert public;
 };
 
@@ -283,17 +286,30 @@ static void handle_audio_frame(struct mp_filter *f)
     struct mp_chmap chmap = {0};
     mp_aframe_get_chmap(aframe, &chmap);
 
-    if (afmt == p->in_afmt && srate == p->in_srate &&
-        mp_chmap_equals(&chmap, &p->in_chmap) &&
-        (!p->resampling_forced || p->sub.filter) &&
-        !p->force_update)
-    {
+    bool format_change = afmt != p->in_afmt ||
+                         srate != p->in_srate ||
+                         !mp_chmap_equals(&chmap, &p->in_chmap) ||
+                         p->force_update;
+
+    if (!format_change && (!p->resampling_forced || p->sub.filter))
         goto cont;
-    }
 
     if (!mp_subfilter_drain_destroy(&p->sub))
         return;
 
+    if (format_change && p->public.on_audio_format_change) {
+        if (p->format_change_blocked)
+            return;
+
+        if (!p->format_change_cont) {
+            p->format_change_blocked = true;
+            p->public.
+                on_audio_format_change(p->public.on_audio_format_change_opaque);
+            return;
+        }
+        p->format_change_cont = false;
+    }
+
     p->in_afmt = afmt;
     p->in_srate = srate;
     p->in_chmap = chmap;
@@ -373,6 +389,17 @@ static void process(struct mp_filter *f)
     mp_subfilter_continue(&p->sub);
 }
 
+void mp_autoconvert_format_change_continue(struct mp_autoconvert *c)
+{
+    struct priv *p = c->f->priv;
+
+    if (p->format_change_blocked) {
+        p->format_change_cont = true;
+        p->format_change_blocked = false;
+        mp_filter_wakeup(c->f);
+    }
+}
+
 static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
 {
     struct priv *p = f->priv;
@@ -394,6 +421,9 @@ static void reset(struct mp_filter *f)
     struct priv *p = f->priv;
 
     mp_subfilter_reset(&p->sub);
+
+    p->format_change_cont = false;
+    p->format_change_blocked = false;
 }
 
 static void destroy(struct mp_filter *f)
diff --git a/filters/f_autoconvert.h b/filters/f_autoconvert.h
index 77e07aecf1..4cd0cb3d6d 100644
--- a/filters/f_autoconvert.h
+++ b/filters/f_autoconvert.h
@@ -8,6 +8,13 @@
 struct mp_autoconvert {
     // f->pins[0] is input, f->pins[1] is output
     struct mp_filter *f;
+
+    // If this is set, the callback is invoked (from the process function), and
+    // further data flow is blocked until mp_autoconvert_format_change_continue()
+    // is called. The idea is that you can reselect the output parameters on
+    // format changes and continue filtering when ready.
+    void (*on_audio_format_change)(void *opaque);
+    void *on_audio_format_change_opaque;
 };
 
 // (to free this, free the filter itself, mp_autoconvert.f)
@@ -46,5 +53,8 @@ void mp_autoconvert_add_srate(struct mp_autoconvert *c, int rate);
 // filter.)
 void mp_autoconvert_clear(struct mp_autoconvert *c);
 
+// See mp_autoconvert.on_audio_format_change.
+void mp_autoconvert_format_change_continue(struct mp_autoconvert *c);
+
 // vf_d3d11vpp.c
 struct mp_filter *vf_d3d11_create_outconv(struct mp_filter *parent);
diff --git a/filters/f_output_chain.c b/filters/f_output_chain.c
index 1f74a01661..e986614e9d 100644
--- a/filters/f_output_chain.c
+++ b/filters/f_output_chain.c
@@ -45,21 +45,6 @@ struct chain {
     struct vo *vo;
     struct ao *ao;
 
-    struct mp_frame pending_input;
-
-    // Some chain types (MP_OUTPUT_CHAIN_AUDIO) require draining the entire
-    // filter chain on format changes and further complex actions:
-    //  0: normal filtering
-    //  1: input changed, flushing out remaining frames from current filters
-    //  2: flushing finished
-    //  3: sent new frame through chain for format probing
-    //  4: sent EOF through chain for format probing
-    //  5: received format probing frame; now waiting for API user to call
-    //     mp_output_chain_set_ao().
-    int format_change_phase;
-    // True if it's a second run trying to see if downmix can be moved up.
-    bool format_change_second_try;
-
     struct mp_output_chain public;
 };
 
@@ -78,7 +63,6 @@ struct mp_user_filter {
     char *name;
     bool is_output_converter;
     bool is_input;
-    bool is_channelremix;
 
     struct mp_image_params last_out_params;
     struct mp_aframe *last_out_aformat;
@@ -89,9 +73,6 @@ struct mp_user_filter {
     bool error_eof_sent;
 };
 
-static void recheck_channelremix_filter(struct chain *p);
-static void remove_channelremix_filter(struct chain *p);
-
 static void update_output_caps(struct chain *p)
 {
     if (p->type != MP_OUTPUT_CHAIN_VIDEO)
@@ -181,15 +162,12 @@ static void process_user(struct mp_filter *f)
             MP_FATAL(p, "Cannot convert decoder/filter output to any format "
                      "supported by the output.\n");
             p->public.failed_output_conversion = true;
-            p->format_change_phase = 0;
             mp_filter_wakeup(p->f);
         } else {
             MP_ERR(p, "Disabling filter %s because it has failed.\n", name);
             mp_filter_reset(u->f); // clear out staled buffered data
         }
         u->failed = true;
-        if (p->format_change_phase)
-            p->format_change_phase = 2; // redo without it
     }
 
     if (u->failed) {
@@ -219,18 +197,7 @@ static void process_user(struct mp_filter *f)
     if (mp_pin_can_transfer_data(f->ppins[1], u->f->pins[1])) {
         struct mp_frame frame = mp_pin_out_read(u->f->pins[1]);
 
-        bool changed = check_out_format_change(u, frame);
-        if (p->type == MP_OUTPUT_CHAIN_AUDIO && (!p->ao || changed) &&
-            u->is_input && !p->format_change_phase)
-        {
-            // Format changed -> block filtering, start draining current filters.
-            MP_VERBOSE(p, "format changed, draining filter chain\n");
-            mp_frame_unref(&p->pending_input);
-            p->pending_input = frame;
-            p->format_change_phase = 1;
-            mp_pin_in_write(f->ppins[1], MP_EOF_FRAME);
-            return;
-        }
+        check_out_format_change(u, frame);
 
         double pts = mp_frame_get_pts(frame);
         if (pts != MP_NOPTS_VALUE)
@@ -309,103 +276,10 @@ static void relink_filter_list(struct chain *p)
     }
 }
 
-// Special logic for draining on format changes (for audio). Never used or
-// initiated video.
-static void process_format_change(struct mp_filter *f)
-{
-    struct chain *p = f->priv;
-
-    if (mp_pin_in_needs_data(p->filters_in)) {
-        if (p->format_change_phase == 2) {
-            MP_VERBOSE(p, "probing new format\n");
-            // Clear any old state.
-            if (!p->format_change_second_try) {
-                mp_autoconvert_clear(p->convert);
-                remove_channelremix_filter(p);
-            }
-            for (int n = 0; n < p->num_all_filters; n++)
-                mp_filter_reset(p->all_filters[n]->f);
-            // Filter a copy of the new input frame to see what comes out.
-            struct mp_frame frame = mp_frame_ref(p->pending_input);
-            if (!frame.type)
-                abort();
-            mp_pin_in_write(p->filters_in, frame);
-            mp_pin_out_request_data(p->filters_out);
-            p->format_change_phase = 3;
-        } else if (p->format_change_phase == 3) {
-            MP_VERBOSE(p, "probing new format (drain)\n");
-            mp_pin_in_write(p->filters_in, MP_EOF_FRAME);
-            p->format_change_phase = 4;
-        }
-    }
-
-    if (mp_pin_can_transfer_data(f->ppins[1], p->filters_out)) {
-        struct mp_frame frame = mp_pin_out_read(p->filters_out);
-
-        if (frame.type == MP_FRAME_EOF) {
-            // We're apparently draining for a format change, and we got EOF
-            // from the chain, which means we're done draining.
-            if (p->format_change_phase == 1) {
-                MP_VERBOSE(p, "done format change draining\n");
-                // Then we need to start probing the new format.
-                p->format_change_phase = 2;
-                mp_pin_out_request_data(p->filters_out);
-            } else if (!p->public.failed_output_conversion) {
-                MP_ERR(p, "No output format - empty file or broken filter?\n");
-                p->ao = NULL;
-                p->public.ao_needs_update = true;
-                p->format_change_phase = 5;
-            }
-            mp_filter_internal_mark_progress(f);
-            return;
-        }
-
-        if (p->format_change_phase >= 2) {
-            // We were filtering a "test" frame to probe the format. Now
-            // that we have it (apparently), just discard it, and make the
-            // user aware of the previously grabbed format.
-            MP_VERBOSE(p, "got output format from probing\n");
-            mp_frame_unref(&frame);
-            for (int n = 0; n < p->num_all_filters; n++)
-                mp_filter_reset(p->all_filters[n]->f);
-            if (p->format_change_second_try) {
-                p->format_change_second_try = false;
-                p->format_change_phase = 0;
-                recheck_channelremix_filter(p);
-            } else {
-                p->ao = NULL;
-                p->public.ao_needs_update = true;
-                p->format_change_phase = 5;
-            }
-            // Do something silly to ensure the f_output_chain user gets
-            // notified properly.
-            mp_filter_wakeup(f);
-            return;
-        }
-
-        // Draining remaining data.
-        mp_pin_in_write(f->ppins[1], frame);
-    }
-}
-
-
 static void process(struct mp_filter *f)
 {
     struct chain *p = f->priv;
 
-    if (p->format_change_phase) {
-        process_format_change(f);
-        return;
-    }
-
-    // Send remaining input from previous format change.
-    if (p->pending_input.type) {
-        if (mp_pin_in_needs_data(p->filters_in)) {
-            mp_pin_in_write(p->filters_in, p->pending_input);
-            p->pending_input = MP_NO_FRAME;
-        }
-    }
-
     if (mp_pin_can_transfer_data(p->filters_in, f->ppins[0])) {
         struct mp_frame frame = mp_pin_out_read(f->ppins[0]);
 
@@ -435,12 +309,6 @@ static void reset(struct mp_filter *f)
 {
     struct chain *p = f->priv;
 
-    // (if format initialization was in progress, this can be repeated next time)
-    mp_frame_unref(&p->pending_input);
-    p->format_change_phase = 0;
-    if (p->format_change_second_try)
-        remove_channelremix_filter(p);
-    p->format_change_second_try = false;
     p->public.ao_needs_update = false;
 
     p->public.got_output_eof = false;
@@ -460,6 +328,11 @@ void mp_output_chain_reset_harder(struct mp_output_chain *c)
         u->last_out_params = (struct mp_image_params){0};
         mp_aframe_reset(u->last_out_aformat);
     }
+
+    if (p->type == MP_OUTPUT_CHAIN_AUDIO) {
+        p->ao = NULL;
+        mp_autoconvert_clear(p->convert);
+    }
 }
 
 static void destroy(struct mp_filter *f)
@@ -502,102 +375,14 @@ void mp_output_chain_set_vo(struct mp_output_chain *c, struct vo *vo)
     update_output_caps(p);
 }
 
-// If there are any user filters, and they don't affect the channel config,
-// then move upmix/downmix to the start of the chain.
-static void maybe_move_up_channelremix(struct chain *p, struct mp_chmap *final)
-{
-    assert(p->num_all_filters >= 2); // at least in/convert filters
-    struct mp_user_filter *first = p->all_filters[0]; // "in" pseudo filter
-    struct mp_chmap in = {0};
-    mp_aframe_get_chmap(first->last_out_aformat, &in);
-    if (mp_chmap_is_unknown(&in))
-        return;
-    mp_chmap_reorder_to_lavc(&in);
-    if (!mp_chmap_is_valid(&in) || mp_chmap_equals_reordered(&in, final))
-        return;
-    for (int n = 0; n < p->num_all_filters; n++) {
-        struct mp_user_filter *u = p->all_filters[n];
-        struct mp_chmap chmap = {0};
-        mp_aframe_get_chmap(u->last_out_aformat, &chmap);
-        if (!mp_chmap_equals_reordered(&in, &chmap))
-            return; // some remix going in
-    }
-
-    if (!p->num_user_filters)
-        return; // would be a NOP
-
-    MP_VERBOSE(p, "trying with channel remixing moved to start of chain\n");
-
-    struct mp_user_filter *remix = create_wrapper_filter(p);
-    struct mp_autoconvert *convert = mp_autoconvert_create(remix->wrapper);
-    if (!convert)
-        abort();
-    mp_autoconvert_add_chmap(convert, final);
-    remix->name = "channelremix";
-    remix->f = convert->f;
-    remix->is_channelremix = true;
-    MP_TARRAY_APPEND(p, p->pre_filters, p->num_pre_filters, remix);
-    relink_filter_list(p);
-
-    // now run the scary state machine again in order to see what filters do
-    // with the remixed channel data and if it was a good idea
-    p->format_change_phase = 2;
-    p->format_change_second_try = true;
-}
-
-static void remove_channelremix_filter(struct chain *p)
-{
-    for (int n = 0; n < p->num_pre_filters; n++) {
-        struct mp_user_filter *u = p->pre_filters[n];
-        if (u->is_channelremix) {
-            MP_TARRAY_REMOVE_AT(p->pre_filters, p->num_pre_filters, n);
-            talloc_free(u->wrapper);
-            relink_filter_list(p);
-            break;
-        }
-    }
-}
-
-static void recheck_channelremix_filter(struct chain *p)
-{
-    struct mp_chmap in = {0};
-    int start = -1;
-    for (int n = 0; n < p->num_all_filters; n++) {
-        struct mp_user_filter *u = p->all_filters[n];
-        if (u->is_channelremix) {
-            mp_aframe_get_chmap(u->last_out_aformat, &in);
-            start = n;
-            break;
-        }
-    }
-
-    if (start < 0 || !mp_chmap_is_valid(&in))
-        goto remove;
-
-    for (int n = start; n < p->num_all_filters; n++) {
-        struct mp_user_filter *u = p->all_filters[n];
-        struct mp_chmap chmap = {0};
-        mp_aframe_get_chmap(u->last_out_aformat, &chmap);
-        if (!mp_chmap_equals_reordered(&in, &chmap))
-            goto remove;
-    }
-
-    return;
-remove:
-    MP_VERBOSE(p, "reverting moved up channel remixing\n");
-    remove_channelremix_filter(p);
-}
-
 void mp_output_chain_set_ao(struct mp_output_chain *c, struct ao *ao)
 {
     struct chain *p = c->f->priv;
 
     assert(p->public.ao_needs_update); // can't just call it any time
-    assert(p->format_change_phase == 5);
-    assert(!p->format_change_second_try);
+    assert(!p->ao);
 
     p->public.ao_needs_update = false;
-    p->format_change_phase = 0;
 
     p->ao = ao;
 
@@ -611,8 +396,38 @@ void mp_output_chain_set_ao(struct mp_output_chain *c, struct ao *ao)
     mp_autoconvert_add_srate(p->convert, out_rate);
     mp_autoconvert_add_chmap(p->convert, &out_channels);
 
-    maybe_move_up_channelremix(p, &out_channels);
+    mp_autoconvert_format_change_continue(p->convert);
+
+    // Just to get the format change logged again.
+    mp_aframe_reset(p->public.output_aformat);
+}
+
+static void on_audio_format_change(void *opaque)
+{
+    struct chain *p = opaque;
 
+    // Find the filter before p->convert, to get p->convert's input format.
+    struct mp_user_filter *prev = NULL;
+    for (int n = 0; n < p->num_all_filters; n++) {
+        struct mp_user_filter *u = p->all_filters[n];
+        if (u->is_output_converter)
+            break;
+        prev = u;
+    }
+
+    assert(prev); // there must have been one
+
+    // Let the f_output_chain user know what format to use. (Not quite proper,
+    // since we overwrite what some other code normally automatically sets.)
+    mp_aframe_config_copy(p->public.output_aformat, prev->last_out_aformat);
+
+    // Ask for calling mp_output_chain_set_ao().
+    p->public.ao_needs_update = true;
+    p->ao = NULL;
+
+    // Do something silly to notify the f_output_chain user. (Not quite proper,
+    // it's merely that this will cause the core to run again, and check the
+    // flag set above.)
     mp_filter_wakeup(p->f);
 }
 
@@ -917,6 +732,11 @@ struct mp_output_chain *mp_output_chain_create(struct mp_filter *parent,
     p->output->f = p->convert->f;
     MP_TARRAY_APPEND(p, p->post_filters, p->num_post_filters, p->output);
 
+    if (type == MP_OUTPUT_CHAIN_AUDIO) {
+        p->convert->on_audio_format_change = on_audio_format_change;
+        p->convert->on_audio_format_change_opaque = p;
+    }
+
     relink_filter_list(p);
 
     reset(f);
author	wm4 <wm4@nowhere>	2018-04-07 14:38:40 +0200
committer	Jan Ekström <jeebjp@gmail.com>	2018-04-15 23:11:33 +0300
commit	3c123281a7eb3181a9706a52d0148f70aa65cff5 (patch)
tree	86fe71febdffb4bead600b47371f28c7838ff3d2
parent	4b48966d87d7759346f989c44e8ab3cb405d0037 (diff)
download	mpv-3c123281a7eb3181a9706a52d0148f70aa65cff5.tar.bz2 mpv-3c123281a7eb3181a9706a52d0148f70aa65cff5.tar.xz