17 files changed, 1119 insertions, 3805 deletions
diff --git a/audio/aconverter.c b/audio/aconverter.c
deleted file mode 100644
index 2475df878d..0000000000
--- a/audio/aconverter.c
+++ /dev/null
@@ -1,653 +0,0 @@
-/*
- * This file is part of mpv.
- *
- * mpv is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * mpv is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <libavutil/opt.h>
-#include <libavutil/common.h>
-#include <libavutil/samplefmt.h>
-#include <libavutil/channel_layout.h>
-#include <libavutil/mathematics.h>
-
-#include "config.h"
-
-#include "common/common.h"
-#include "common/av_common.h"
-#include "common/msg.h"
-#include "options/m_config.h"
-#include "options/m_option.h"
-#include "aconverter.h"
-#include "aframe.h"
-#include "fmt-conversion.h"
-#include "format.h"
-
-#define HAVE_LIBSWRESAMPLE (!HAVE_LIBAV)
-#define HAVE_LIBAVRESAMPLE HAVE_LIBAV
-
-#if HAVE_LIBAVRESAMPLE
-#include <libavresample/avresample.h>
-#elif HAVE_LIBSWRESAMPLE
-#include <libswresample/swresample.h>
-#define AVAudioResampleContext SwrContext
-#define avresample_alloc_context swr_alloc
-#define avresample_open swr_init
-#define avresample_close(x) do { } while(0)
-#define avresample_free swr_free
-#define avresample_available(x) 0
-#define avresample_convert(ctx, out, out_planesize, out_samples, in, in_planesize, in_samples) \
-    swr_convert(ctx, out, out_samples, (const uint8_t**)(in), in_samples)
-#define avresample_set_channel_mapping swr_set_channel_mapping
-#define avresample_set_compensation swr_set_compensation
-#else
-#error "config.h broken or no resampler found"
-#endif
-
-struct mp_aconverter {
-    struct mp_log *log;
-    struct mpv_global *global;
-    double playback_speed;
-    bool is_resampling;
-    bool passthrough_mode;
-    struct AVAudioResampleContext *avrctx;
-    struct mp_aframe *avrctx_fmt; // output format of avrctx
-    struct mp_aframe *pool_fmt; // format used to allocate frames for avrctx output
-    struct mp_aframe *pre_out_fmt; // format before final conversion
-    struct AVAudioResampleContext *avrctx_out; // for output channel reordering
-    const struct mp_resample_opts *opts; // opts requested by the user
-    // At least libswresample keeps a pointer around for this:
-    int reorder_in[MP_NUM_CHANNELS];
-    int reorder_out[MP_NUM_CHANNELS];
-    struct mp_aframe_pool *reorder_buffer;
-    struct mp_aframe_pool *out_pool;
-
-    int in_rate_user; // user input sample rate
-    int in_rate;      // actual rate (used by lavr), adjusted for playback speed
-    int in_format;
-    struct mp_chmap in_channels;
-    int out_rate;
-    int out_format;
-    struct mp_chmap out_channels;
-
-    struct mp_aframe *input;    // queued input frame
-    bool input_eof;             // queued input EOF
-    struct mp_aframe *output;   // queued output frame
-    bool output_eof;            // queued output EOF
-};
-
-#define OPT_BASE_STRUCT struct mp_resample_opts
-const struct m_sub_options resample_config = {
-    .opts = (const m_option_t[]) {
-        OPT_INTRANGE("audio-resample-filter-size", filter_size, 0, 0, 32),
-        OPT_INTRANGE("audio-resample-phase-shift", phase_shift, 0, 0, 30),
-        OPT_FLAG("audio-resample-linear", linear, 0),
-        OPT_DOUBLE("audio-resample-cutoff", cutoff, M_OPT_RANGE,
-                   .min = 0, .max = 1),
-        OPT_FLAG("audio-normalize-downmix", normalize, 0),
-        OPT_KEYVALUELIST("audio-swresample-o", avopts, 0),
-        {0}
-    },
-    .size = sizeof(struct mp_resample_opts),
-    .defaults = &(const struct mp_resample_opts)MP_RESAMPLE_OPTS_DEF,
-    .change_flags = UPDATE_AUDIO,
-};
-
-#if HAVE_LIBAVRESAMPLE
-static double get_delay(struct mp_aconverter *p)
-{
-    return avresample_get_delay(p->avrctx) / (double)p->in_rate +
-           avresample_available(p->avrctx) / (double)p->out_rate;
-}
-static int get_out_samples(struct mp_aconverter *p, int in_samples)
-{
-    return avresample_get_out_samples(p->avrctx, in_samples);
-}
-#else
-static double get_delay(struct mp_aconverter *p)
-{
-    int64_t base = p->in_rate * (int64_t)p->out_rate;
-    return swr_get_delay(p->avrctx, base) / (double)base;
-}
-static int get_out_samples(struct mp_aconverter *p, int in_samples)
-{
-    return swr_get_out_samples(p->avrctx, in_samples);
-}
-#endif
-
-static void close_lavrr(struct mp_aconverter *p)
-{
-    if (p->avrctx)
-        avresample_close(p->avrctx);
-    avresample_free(&p->avrctx);
-    if (p->avrctx_out)
-        avresample_close(p->avrctx_out);
-    avresample_free(&p->avrctx_out);
-
-    TA_FREEP(&p->pre_out_fmt);
-    TA_FREEP(&p->avrctx_fmt);
-    TA_FREEP(&p->pool_fmt);
-}
-
-static int rate_from_speed(int rate, double speed)
-{
-    return lrint(rate * speed);
-}
-
-static struct mp_chmap fudge_pairs[][2] = {
-    {MP_CHMAP2(BL,  BR),  MP_CHMAP2(SL,  SR)},
-    {MP_CHMAP2(SL,  SR),  MP_CHMAP2(BL,  BR)},
-    {MP_CHMAP2(SDL, SDR), MP_CHMAP2(SL,  SR)},
-    {MP_CHMAP2(SL,  SR),  MP_CHMAP2(SDL, SDR)},
-};
-
-// Modify out_layout and return the new value. The intention is reducing the
-// loss libswresample's rematrixing will cause by exchanging similar, but
-// strictly speaking incompatible channel pairs. For example, 7.1 should be
-// changed to 7.1(wide) without dropping the SL/SR channels. (We still leave
-// it to libswresample to create the remix matrix.)
-static uint64_t fudge_layout_conversion(struct mp_aconverter *p,
-                                        uint64_t in, uint64_t out)
-{
-    for (int n = 0; n < MP_ARRAY_SIZE(fudge_pairs); n++) {
-        uint64_t a = mp_chmap_to_lavc(&fudge_pairs[n][0]);
-        uint64_t b = mp_chmap_to_lavc(&fudge_pairs[n][1]);
-        if ((in & a) == a && (in & b) == 0 &&
-            (out & a) == 0 && (out & b) == b)
-        {
-            out = (out & ~b) | a;
-
-            MP_VERBOSE(p, "Fudge: %s -> %s\n",
-                       mp_chmap_to_str(&fudge_pairs[n][0]),
-                       mp_chmap_to_str(&fudge_pairs[n][1]));
-        }
-    }
-    return out;
-}
-
-// mp_chmap_get_reorder() performs:
-//  to->speaker[n] = from->speaker[src[n]]
-// but libavresample does:
-//  to->speaker[dst[n]] = from->speaker[n]
-static void transpose_order(int *map, int num)
-{
-    int nmap[MP_NUM_CHANNELS] = {0};
-    for (int n = 0; n < num; n++) {
-        for (int i = 0; i < num; i++) {
-            if (map[n] == i)
-                nmap[i] = n;
-        }
-    }
-    memcpy(map, nmap, sizeof(nmap));
-}
-
-static bool configure_lavrr(struct mp_aconverter *p, bool verbose)
-{
-    close_lavrr(p);
-
-    p->in_rate = rate_from_speed(p->in_rate_user, p->playback_speed);
-
-    p->passthrough_mode = p->opts->allow_passthrough &&
-                          p->in_rate == p->out_rate &&
-                          p->in_format == p->out_format &&
-                          mp_chmap_equals(&p->in_channels, &p->out_channels);
-
-    if (p->passthrough_mode)
-        return true;
-
-    p->avrctx = avresample_alloc_context();
-    p->avrctx_out = avresample_alloc_context();
-    if (!p->avrctx || !p->avrctx_out)
-        goto error;
-
-    enum AVSampleFormat in_samplefmt = af_to_avformat(p->in_format);
-    enum AVSampleFormat out_samplefmt = af_to_avformat(p->out_format);
-    enum AVSampleFormat out_samplefmtp = av_get_planar_sample_fmt(out_samplefmt);
-
-    if (in_samplefmt == AV_SAMPLE_FMT_NONE ||
-        out_samplefmt == AV_SAMPLE_FMT_NONE ||
-        out_samplefmtp == AV_SAMPLE_FMT_NONE)
-        goto error;
-
-    av_opt_set_int(p->avrctx, "filter_size",        p->opts->filter_size, 0);
-    av_opt_set_int(p->avrctx, "phase_shift",        p->opts->phase_shift, 0);
-    av_opt_set_int(p->avrctx, "linear_interp",      p->opts->linear, 0);
-
-    double cutoff = p->opts->cutoff;
-    if (cutoff <= 0.0)
-        cutoff = MPMAX(1.0 - 6.5 / (p->opts->filter_size + 8), 0.80);
-    av_opt_set_double(p->avrctx, "cutoff",          cutoff, 0);
-
-    int normalize = p->opts->normalize;
-#if HAVE_LIBSWRESAMPLE
-    av_opt_set_double(p->avrctx, "rematrix_maxval", normalize ? 1 : 1000, 0);
-#else
-    av_opt_set_int(p->avrctx, "normalize_mix_level", !!normalize, 0);
-#endif
-
-    if (mp_set_avopts(p->log, p->avrctx, p->opts->avopts) < 0)
-        goto error;
-
-    struct mp_chmap map_in = p->in_channels;
-    struct mp_chmap map_out = p->out_channels;
-
-    // Try not to do any remixing if at least one is "unknown". Some corner
-    // cases also benefit from disabling all channel handling logic if the
-    // src/dst layouts are the same (like fl-fr-na -> fl-fr-na).
-    if (mp_chmap_is_unknown(&map_in) || mp_chmap_is_unknown(&map_out) ||
-        mp_chmap_equals(&map_in, &map_out))
-    {
-        mp_chmap_set_unknown(&map_in, map_in.num);
-        mp_chmap_set_unknown(&map_out, map_out.num);
-    }
-
-    // unchecked: don't take any channel reordering into account
-    uint64_t in_ch_layout = mp_chmap_to_lavc_unchecked(&map_in);
-    uint64_t out_ch_layout = mp_chmap_to_lavc_unchecked(&map_out);
-
-    struct mp_chmap in_lavc, out_lavc;
-    mp_chmap_from_lavc(&in_lavc, in_ch_layout);
-    mp_chmap_from_lavc(&out_lavc, out_ch_layout);
-
-    if (verbose && !mp_chmap_equals(&in_lavc, &out_lavc)) {
-        MP_VERBOSE(p, "Remix: %s -> %s\n", mp_chmap_to_str(&in_lavc),
-                                            mp_chmap_to_str(&out_lavc));
-    }
-
-    if (in_lavc.num != map_in.num) {
-        // For handling NA channels, we would have to add a planarization step.
-        MP_FATAL(p, "Unsupported input channel layout %s.\n",
-                 mp_chmap_to_str(&map_in));
-        goto error;
-    }
-
-    mp_chmap_get_reorder(p->reorder_in, &map_in, &in_lavc);
-    transpose_order(p->reorder_in, map_in.num);
-
-    if (mp_chmap_equals(&out_lavc, &map_out)) {
-        // No intermediate step required - output new format directly.
-        out_samplefmtp = out_samplefmt;
-    } else {
-        // Verify that we really just reorder and/or insert NA channels.
-        struct mp_chmap withna = out_lavc;
-        mp_chmap_fill_na(&withna, map_out.num);
-        if (withna.num != map_out.num)
-            goto error;
-    }
-    mp_chmap_get_reorder(p->reorder_out, &out_lavc, &map_out);
-
-    p->pre_out_fmt = mp_aframe_create();
-    mp_aframe_set_rate(p->pre_out_fmt, p->out_rate);
-    mp_aframe_set_chmap(p->pre_out_fmt, &p->out_channels);
-    mp_aframe_set_format(p->pre_out_fmt, p->out_format);
-
-    p->avrctx_fmt = mp_aframe_create();
-    mp_aframe_config_copy(p->avrctx_fmt, p->pre_out_fmt);
-    mp_aframe_set_chmap(p->avrctx_fmt, &out_lavc);
-    mp_aframe_set_format(p->avrctx_fmt, af_from_avformat(out_samplefmtp));
-
-    // If there are NA channels, the final output will have more channels than
-    // the avrctx output. Also, avrctx will output planar (out_samplefmtp was
-    // not overwritten). Allocate the output frame with more channels, so the
-    // NA channels can be trivially added.
-    p->pool_fmt = mp_aframe_create();
-    mp_aframe_config_copy(p->pool_fmt, p->avrctx_fmt);
-    if (map_out.num > out_lavc.num)
-        mp_aframe_set_chmap(p->pool_fmt, &map_out);
-
-    out_ch_layout = fudge_layout_conversion(p, in_ch_layout, out_ch_layout);
-
-    // Real conversion; output is input to avrctx_out.
-    av_opt_set_int(p->avrctx, "in_channel_layout",  in_ch_layout, 0);
-    av_opt_set_int(p->avrctx, "out_channel_layout", out_ch_layout, 0);
-    av_opt_set_int(p->avrctx, "in_sample_rate",     p->in_rate, 0);
-    av_opt_set_int(p->avrctx, "out_sample_rate",    p->out_rate, 0);
-    av_opt_set_int(p->avrctx, "in_sample_fmt",      in_samplefmt, 0);
-    av_opt_set_int(p->avrctx, "out_sample_fmt",     out_samplefmtp, 0);
-
-    // Just needs the correct number of channels for deplanarization.
-    struct mp_chmap fake_chmap;
-    mp_chmap_set_unknown(&fake_chmap, map_out.num);
-    uint64_t fake_out_ch_layout = mp_chmap_to_lavc_unchecked(&fake_chmap);
-    if (!fake_out_ch_layout)
-        goto error;
-    av_opt_set_int(p->avrctx_out, "in_channel_layout",  fake_out_ch_layout, 0);
-    av_opt_set_int(p->avrctx_out, "out_channel_layout", fake_out_ch_layout, 0);
-
-    av_opt_set_int(p->avrctx_out, "in_sample_fmt",      out_samplefmtp, 0);
-    av_opt_set_int(p->avrctx_out, "out_sample_fmt",     out_samplefmt, 0);
-    av_opt_set_int(p->avrctx_out, "in_sample_rate",     p->out_rate, 0);
-    av_opt_set_int(p->avrctx_out, "out_sample_rate",    p->out_rate, 0);
-
-    // API has weird requirements, quoting avresample.h:
-    //  * This function can only be called when the allocated context is not open.
-    //  * Also, the input channel layout must have already been set.
-    avresample_set_channel_mapping(p->avrctx, p->reorder_in);
-
-    p->is_resampling = false;
-
-    if (avresample_open(p->avrctx) < 0 || avresample_open(p->avrctx_out) < 0) {
-        MP_ERR(p, "Cannot open Libavresample context.\n");
-        goto error;
-    }
-    return true;
-
-error:
-    close_lavrr(p);
-    return false;
-}
-
-bool mp_aconverter_reconfig(struct mp_aconverter *p,
-                    int in_rate, int in_format, struct mp_chmap in_channels,
-                    int out_rate, int out_format, struct mp_chmap out_channels)
-{
-    close_lavrr(p);
-
-    TA_FREEP(&p->input);
-    TA_FREEP(&p->output);
-    p->input_eof = p->output_eof = false;
-
-    p->playback_speed = 1.0;
-
-    p->in_rate_user = in_rate;
-    p->in_format    = in_format;
-    p->in_channels  = in_channels;
-    p->out_rate     = out_rate;
-    p->out_format   = out_format;
-    p->out_channels = out_channels;
-
-    return configure_lavrr(p, true);
-}
-
-void mp_aconverter_flush(struct mp_aconverter *p)
-{
-    if (!p->avrctx)
-        return;
-#if HAVE_LIBSWRESAMPLE
-    swr_close(p->avrctx);
-    if (swr_init(p->avrctx) < 0)
-        close_lavrr(p);
-#else
-    while (avresample_read(p->avrctx, NULL, 1000) > 0) {}
-#endif
-}
-
-void mp_aconverter_set_speed(struct mp_aconverter *p, double speed)
-{
-    p->playback_speed = speed;
-}
-
-static void extra_output_conversion(struct mp_aframe *mpa)
-{
-    int format = af_fmt_from_planar(mp_aframe_get_format(mpa));
-    int num_planes = mp_aframe_get_planes(mpa);
-    uint8_t **planes = mp_aframe_get_data_rw(mpa);
-    if (!planes)
-        return;
-    for (int p = 0; p < num_planes; p++) {
-        void *ptr = planes[p];
-        int total = mp_aframe_get_total_plane_samples(mpa);
-        if (format == AF_FORMAT_FLOAT) {
-            for (int s = 0; s < total; s++)
-                ((float *)ptr)[s] = av_clipf(((float *)ptr)[s], -1.0f, 1.0f);
-        } else if (format == AF_FORMAT_DOUBLE) {
-            for (int s = 0; s < total; s++)
-                ((double *)ptr)[s] = MPCLAMP(((double *)ptr)[s], -1.0, 1.0);
-        }
-    }
-}
-
-// This relies on the tricky way mpa was allocated.
-static bool reorder_planes(struct mp_aframe *mpa, int *reorder,
-                           struct mp_chmap *newmap)
-{
-    if (!mp_aframe_set_chmap(mpa, newmap))
-        return false;
-
-    int num_planes = newmap->num;
-    uint8_t **planes = mp_aframe_get_data_rw(mpa);
-    uint8_t *old_planes[MP_NUM_CHANNELS];
-    assert(num_planes <= MP_NUM_CHANNELS);
-    for (int n = 0; n < num_planes; n++)
-        old_planes[n] = planes[n];
-
-    int next_na = 0;
-    for (int n = 0; n < num_planes; n++)
-        next_na += newmap->speaker[n] != MP_SPEAKER_ID_NA;
-
-    for (int n = 0; n < num_planes; n++) {
-        int src = reorder[n];
-        assert(src >= -1 && src < num_planes);
-        if (src >= 0) {
-            planes[n] = old_planes[src];
-        } else {
-            assert(next_na < num_planes);
-            planes[n] = old_planes[next_na++];
-            // The NA planes were never written by avrctx, so clear them.
-            af_fill_silence(planes[n],
-                            mp_aframe_get_sstride(mpa) * mp_aframe_get_size(mpa),
-                            mp_aframe_get_format(mpa));
-        }
-    }
-
-    return true;
-}
-
-static int resample_frame(struct AVAudioResampleContext *r,
-                          struct mp_aframe *out, struct mp_aframe *in)
-{
-    // Be aware that the channel layout and count can be different for in and
-    // out frames. In some situations the caller will fix up the frames before
-    // or after conversion. The sample rates can also be different.
-    AVFrame *av_i = in ? mp_aframe_get_raw_avframe(in) : NULL;
-    AVFrame *av_o = out ? mp_aframe_get_raw_avframe(out) : NULL;
-    return avresample_convert(r,
-        av_o ? av_o->extended_data : NULL,
-        av_o ? av_o->linesize[0] : 0,
-        av_o ? av_o->nb_samples : 0,
-        av_i ? av_i->extended_data : NULL,
-        av_i ? av_i->linesize[0] : 0,
-        av_i ? av_i->nb_samples : 0);
-}
-
-static void filter_resample(struct mp_aconverter *p, struct mp_aframe *in)
-{
-    struct mp_aframe *out = NULL;
-
-    if (!p->avrctx)
-        goto error;
-
-    int samples = get_out_samples(p, in ? mp_aframe_get_size(in) : 0);
-    out = mp_aframe_create();
-    mp_aframe_config_copy(out, p->pool_fmt);
-    if (mp_aframe_pool_allocate(p->out_pool, out, samples) < 0)
-        goto error;
-
-    int out_samples = 0;
-    if (samples) {
-        out_samples = resample_frame(p->avrctx, out, in);
-        if (out_samples < 0 || out_samples > samples)
-            goto error;
-        mp_aframe_set_size(out, out_samples);
-    }
-
-    struct mp_chmap out_chmap;
-    if (!mp_aframe_get_chmap(p->pool_fmt, &out_chmap))
-        goto error;
-    if (!reorder_planes(out, p->reorder_out, &out_chmap))
-        goto error;
-
-    if (!mp_aframe_config_equals(out, p->pre_out_fmt)) {
-        struct mp_aframe *new = mp_aframe_create();
-        mp_aframe_config_copy(new, p->pre_out_fmt);
-        if (mp_aframe_pool_allocate(p->reorder_buffer, new, out_samples) < 0) {
-            talloc_free(new);
-            goto error;
-        }
-        int got = 0;
-        if (out_samples)
-            got = resample_frame(p->avrctx_out, new, out);
-        talloc_free(out);
-        out = new;
-        if (got != out_samples)
-            goto error;
-    }
-
-    extra_output_conversion(out);
-
-    if (in)
-        mp_aframe_copy_attributes(out, in);
-
-    if (out_samples) {
-        p->output = out;
-    } else {
-        talloc_free(out);
-    }
-    p->output_eof = !in; // we've read everything
-
-    return;
-error:
-    talloc_free(out);
-    MP_ERR(p, "Error on resampling.\n");
-}
-
-static void filter(struct mp_aconverter *p)
-{
-    if (p->output || p->output_eof || !(p->input || p->input_eof))
-        return;
-
-    int new_rate = rate_from_speed(p->in_rate_user, p->playback_speed);
-
-    if (p->passthrough_mode && new_rate != p->in_rate)
-        configure_lavrr(p, false);
-
-    if (p->passthrough_mode) {
-        p->output = p->input;
-        p->input = NULL;
-        p->output_eof = p->input_eof;
-        p->input_eof = false;
-        return;
-    }
-
-    if (p->avrctx && !(!p->is_resampling && new_rate == p->in_rate)) {
-        AVRational r = av_d2q(p->playback_speed * p->in_rate_user / p->in_rate,
-                              INT_MAX / 2);
-        // Essentially, swr/avresample_set_compensation() does 2 things:
-        // - adjust output sample rate by sample_delta/compensation_distance
-        // - reset the adjustment after compensation_distance output samples
-        // Increase the compensation_distance to avoid undesired reset
-        // semantics - we want to keep the ratio for the whole frame we're
-        // feeding it, until the next filter() call.
-        int mult = INT_MAX / 2 / MPMAX(MPMAX(abs(r.num), abs(r.den)), 1);
-        r = (AVRational){ r.num * mult, r.den * mult };
-        if (avresample_set_compensation(p->avrctx, r.den - r.num, r.den) >= 0) {
-            new_rate = p->in_rate;
-            p->is_resampling = true;
-        }
-    }
-
-    bool need_reinit = fabs(new_rate / (double)p->in_rate - 1) > 0.01;
-    if (need_reinit && new_rate != p->in_rate) {
-        // Before reconfiguring, drain the audio that is still buffered
-        // in the resampler.
-        filter_resample(p, NULL);
-        // Reinitialize resampler.
-        configure_lavrr(p, false);
-        p->output_eof = false;
-        if (p->output)
-            return; // need to read output before continuing filtering
-    }
-
-    filter_resample(p, p->input);
-    TA_FREEP(&p->input);
-    p->input_eof = false;
-}
-
-// Queue input. If true, ownership of in passes to mp_aconverted and the input
-// was accepted. Otherwise, return false and reject in.
-// in==NULL means trigger EOF.
-bool mp_aconverter_write_input(struct mp_aconverter *p, struct mp_aframe *in)
-{
-    if (p->input || p->input_eof)
-        return false;
-
-    p->input = in;
-    p->input_eof = !in;
-    return true;
-}
-
-// Return output frame, or NULL if nothing available.
-// *eof is set to true if NULL is returned, and it was due to EOF.
-struct mp_aframe *mp_aconverter_read_output(struct mp_aconverter *p, bool *eof)
-{
-    *eof = false;
-
-    filter(p);
-
-    if (p->output) {
-        struct mp_aframe *out = p->output;
-        p->output = NULL;
-        return out;
-    }
-
-    *eof = p->output_eof;
-    p->output_eof = false;
-    return NULL;
-}
-
-double mp_aconverter_get_latency(struct mp_aconverter *p)
-{
-    double delay = get_delay(p);
-
-    if (p->input)
-        delay += mp_aframe_duration(p->input);
-
-    // In theory this is influenced by playback speed, but other parts of the
-    // player get it wrong anyway.
-    if (p->output)
-        delay += mp_aframe_duration(p->output);
-
-    return delay;
-}
-
-static void destroy_aconverter(void *ptr)
-{
-    struct mp_aconverter *p = ptr;
-
-    close_lavrr(p);
-
-    talloc_free(p->input);
-    talloc_free(p->output);
-}
-
-// If opts is not NULL, the pointer must be valid for the lifetime of the
-// mp_aconverter.
-struct mp_aconverter *mp_aconverter_create(struct mpv_global *global,
-                                           struct mp_log *log,
-                                           const struct mp_resample_opts *opts)
-{
-    struct mp_aconverter *p = talloc_zero(NULL, struct mp_aconverter);
-    p->log = log;
-    p->global = global;
-
-    p->opts = opts;
-    if (!p->opts)
-        p->opts = mp_get_config_group(p, global, &resample_config);
-
-    p->reorder_buffer = mp_aframe_pool_create(p);
-    p->out_pool = mp_aframe_pool_create(p);
-
-    talloc_set_destructor(p, destroy_aconverter);
-
-    return p;
-}
diff --git a/audio/aconverter.h b/audio/aconverter.h
deleted file mode 100644
index 22ca93e4c1..0000000000
--- a/audio/aconverter.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#pragma once
-
-#include <stdbool.h>
-
-#include "chmap.h"
-
-struct mp_aconverter;
-struct mp_aframe;
-struct mpv_global;
-struct mp_log;
-
-struct mp_resample_opts {
-    int filter_size;
-    int phase_shift;
-    int linear;
-    double cutoff;
-    int normalize;
-    int allow_passthrough;
-    char **avopts;
-};
-
-#define MP_RESAMPLE_OPTS_DEF {  \
-    .filter_size = 16,          \
-    .cutoff      = 0.0,         \
-    .phase_shift = 10,          \
-    .normalize   = 0,           \
-    }
-
-extern const struct m_sub_options resample_config;
-
-struct mp_aconverter *mp_aconverter_create(struct mpv_global *global,
-                                           struct mp_log *log,
-                                           const struct mp_resample_opts *opts);
-bool mp_aconverter_reconfig(struct mp_aconverter *p,
-                    int in_rate, int in_format, struct mp_chmap in_channels,
-                    int out_rate, int out_format, struct mp_chmap out_channels);
-void mp_aconverter_flush(struct mp_aconverter *p);
-void mp_aconverter_set_speed(struct mp_aconverter *p, double speed);
-bool mp_aconverter_write_input(struct mp_aconverter *p, struct mp_aframe *in);
-struct mp_aframe *mp_aconverter_read_output(struct mp_aconverter *p, bool *eof);
-double mp_aconverter_get_latency(struct mp_aconverter *p);
diff --git a/audio/aframe.c b/audio/aframe.c
index 1f053a6715..9115cf67fd 100644
--- a/audio/aframe.c
+++ b/audio/aframe.c
@@ -32,6 +32,11 @@ struct mp_aframe {
     // We support spdif formats, which are allocated as AV_SAMPLE_FMT_S16.
     int format;
     double pts;
+    double speed;
+};
+
+struct avframe_opaque {
+    double speed;
 };
 
 static void free_frame(void *ptr)
@@ -43,11 +48,11 @@ static void free_frame(void *ptr)
 struct mp_aframe *mp_aframe_create(void)
 {
     struct mp_aframe *frame = talloc_zero(NULL, struct mp_aframe);
-    frame->pts = MP_NOPTS_VALUE;
     frame->av_frame = av_frame_alloc();
     if (!frame->av_frame)
         abort();
     talloc_set_destructor(frame, free_frame);
+    mp_aframe_reset(frame);
     return frame;
 }
 
@@ -61,6 +66,7 @@ struct mp_aframe *mp_aframe_new_ref(struct mp_aframe *frame)
     dst->chmap = frame->chmap;
     dst->format = frame->format;
     dst->pts = frame->pts;
+    dst->speed = frame->speed;
 
     if (mp_aframe_is_allocated(frame)) {
         if (av_frame_ref(dst->av_frame, frame->av_frame) < 0)
@@ -80,6 +86,7 @@ void mp_aframe_reset(struct mp_aframe *frame)
     frame->chmap.num = 0;
     frame->format = 0;
     frame->pts = MP_NOPTS_VALUE;
+    frame->speed = 1.0;
 }
 
 // Remove all actual audio data and leave only the metadata.
@@ -120,6 +127,11 @@ struct mp_aframe *mp_aframe_from_avframe(struct AVFrame *av_frame)
         mp_chmap_from_channels(&frame->chmap, av_frame->channels);
 #endif
 
+    if (av_frame->opaque_ref) {
+        struct avframe_opaque *op = (void *)av_frame->opaque_ref->data;
+        frame->speed = op->speed;
+    }
+
     return frame;
 }
 
@@ -137,6 +149,16 @@ struct AVFrame *mp_aframe_to_avframe(struct mp_aframe *frame)
     if (!mp_chmap_is_lavc(&frame->chmap))
         return NULL;
 
+    if (!frame->av_frame->opaque_ref && frame->speed != 1.0) {
+        frame->av_frame->opaque_ref =
+            av_buffer_alloc(sizeof(struct avframe_opaque));
+        if (!frame->av_frame->opaque_ref)
+            return NULL;
+
+        struct avframe_opaque *op = (void *)frame->av_frame->opaque_ref->data;
+        op->speed = frame->speed;
+    }
+
     return av_frame_clone(frame->av_frame);
 }
 
@@ -183,6 +205,7 @@ void mp_aframe_config_copy(struct mp_aframe *dst, struct mp_aframe *src)
 void mp_aframe_copy_attributes(struct mp_aframe *dst, struct mp_aframe *src)
 {
     dst->pts = src->pts;
+    dst->speed = src->speed;
 
     int rate = dst->av_frame->sample_rate;
 
@@ -316,6 +339,37 @@ void mp_aframe_set_pts(struct mp_aframe *frame, double pts)
     frame->pts = pts;
 }
 
+// Set a speed factor. This is multiplied with the sample rate to get the
+// "effective" samplerate (mp_aframe_get_effective_rate()), which will be used
+// to do PTS calculations. If speed!=1.0, the PTS values always refer to the
+// original PTS (before changing speed), and if you want reasonably continuous
+// PTS between frames, you need to use the effective samplerate.
+void mp_aframe_set_speed(struct mp_aframe *frame, double factor)
+{
+    frame->speed = factor;
+}
+
+// Adjust current speed factor.
+void mp_aframe_mul_speed(struct mp_aframe *frame, double factor)
+{
+    frame->speed *= factor;
+}
+
+double mp_aframe_get_speed(struct mp_aframe *frame)
+{
+    return frame->speed;
+}
+
+// Matters for speed changed frames (such as a frame which has been resampled
+// to play at a different speed).
+// Return the sample rate at which the frame would have to be played to result
+// in the same duration as the original frame before the speed change.
+// This is used for A/V sync.
+double mp_aframe_get_effective_rate(struct mp_aframe *frame)
+{
+    return mp_aframe_get_rate(frame) / frame->speed;
+}
+
 // Return number of data pointers.
 int mp_aframe_get_planes(struct mp_aframe *frame)
 {
@@ -339,6 +393,18 @@ int mp_aframe_get_total_plane_samples(struct mp_aframe *frame)
             ? 1 : mp_aframe_get_channels(frame));
 }
 
+char *mp_aframe_format_str_buf(char *buf, size_t buf_size, struct mp_aframe *fmt)
+{
+    char ch[128];
+    mp_chmap_to_str_buf(ch, sizeof(ch), &fmt->chmap);
+    char *hr_ch = mp_chmap_to_str_hr(&fmt->chmap);
+    if (strcmp(hr_ch, ch) != 0)
+        mp_snprintf_cat(ch, sizeof(ch), " (%s)", hr_ch);
+    snprintf(buf, buf_size, "%dHz %s %dch %s", fmt->av_frame->sample_rate,
+             ch, fmt->chmap.num, af_fmt_to_str(fmt->format));
+    return buf;
+}
+
 // Set data to the audio after the given number of samples (i.e. slice it).
 void mp_aframe_skip_samples(struct mp_aframe *f, int samples)
 {
@@ -352,25 +418,25 @@ void mp_aframe_skip_samples(struct mp_aframe *f, int samples)
     f->av_frame->nb_samples -= samples;
 
     if (f->pts != MP_NOPTS_VALUE)
-        f->pts += samples / (double)mp_aframe_get_rate(f);
+        f->pts += samples / mp_aframe_get_effective_rate(f);
 }
 
 // Return the timestamp of the sample just after the end of this frame.
 double mp_aframe_end_pts(struct mp_aframe *f)
 {
-    int rate = mp_aframe_get_rate(f);
-    if (f->pts == MP_NOPTS_VALUE || rate < 1)
+    double rate = mp_aframe_get_effective_rate(f);
+    if (f->pts == MP_NOPTS_VALUE || rate <= 0)
         return MP_NOPTS_VALUE;
-    return f->pts + f->av_frame->nb_samples / (double)rate;
+    return f->pts + f->av_frame->nb_samples / rate;
 }
 
 // Return the duration in seconds of the frame (0 if invalid).
 double mp_aframe_duration(struct mp_aframe *f)
 {
-    int rate = mp_aframe_get_rate(f);
-    if (rate < 1)
+    double rate = mp_aframe_get_effective_rate(f);
+    if (rate <= 0)
         return 0;
-    return f->av_frame->nb_samples / (double)rate;
+    return f->av_frame->nb_samples / rate;
 }
 
 // Clip the given frame to the given timestamp range. Adjusts the frame size
@@ -378,7 +444,7 @@ double mp_aframe_duration(struct mp_aframe *f)
 void mp_aframe_clip_timestamps(struct mp_aframe *f, double start, double end)
 {
     double f_end = mp_aframe_end_pts(f);
-    int rate = mp_aframe_get_rate(f);
+    double rate = mp_aframe_get_effective_rate(f);
     if (f_end == MP_NOPTS_VALUE)
         return;
     if (end != MP_NOPTS_VALUE) {
@@ -405,6 +471,52 @@ void mp_aframe_clip_timestamps(struct mp_aframe *f, double start, double end)
     }
 }
 
+bool mp_aframe_copy_samples(struct mp_aframe *dst, int dst_offset,
+                            struct mp_aframe *src, int src_offset,
+                            int samples)
+{