8 files changed, 1685 insertions, 223 deletions
diff --git a/audio/filter/af_drop.c b/audio/filter/af_drop.c
new file mode 100644
index 0000000000..499389dd2b
--- /dev/null
+++ b/audio/filter/af_drop.c
@@ -0,0 +1,114 @@
+#include "audio/aframe.h"
+#include "audio/format.h"
+#include "common/common.h"
+#include "filters/f_autoconvert.h"
+#include "filters/filter_internal.h"
+#include "filters/user_filters.h"
+
+struct priv {
+    double speed;
+    double diff; // amount of too many additional samples in normal speed
+    struct mp_aframe *last; // for repeating
+};
+
+static void af_drop_process(struct mp_filter *f)
+{
+    struct priv *p = f->priv;
+
+    if (!mp_pin_in_needs_data(f->ppins[1]))
+        return;
+
+    struct mp_frame frame = {0};
+
+    double last_dur = p->last ? mp_aframe_duration(p->last) : 0;
+    if (p->last && p->diff < 0 && -p->diff > last_dur / 2) {
+        MP_VERBOSE(f, "repeat\n");
+        frame = MAKE_FRAME(MP_FRAME_AUDIO, p->last);
+        p->last = NULL;
+    } else {
+        frame = mp_pin_out_read(f->ppins[0]);
+
+        if (frame.type == MP_FRAME_AUDIO) {
+            last_dur = mp_aframe_duration(frame.data);
+            p->diff -= last_dur;
+            if (p->diff > last_dur / 2) {
+                MP_VERBOSE(f, "drop\n");
+                mp_frame_unref(&frame);
+                mp_filter_internal_mark_progress(f);
+            }
+        }
+    }
+
+    if (frame.type == MP_FRAME_AUDIO) {
+        struct mp_aframe *fr = frame.data;
+        talloc_free(p->last);
+        p->last = mp_aframe_new_ref(fr);
+        mp_aframe_mul_speed(fr, p->speed);
+        p->diff += mp_aframe_duration(fr);
+        mp_aframe_set_pts(p->last, mp_aframe_end_pts(fr));
+    } else if (frame.type == MP_FRAME_EOF) {
+        TA_FREEP(&p->last);
+    }
+    mp_pin_in_write(f->ppins[1], frame);
+}
+
+static bool af_drop_command(struct mp_filter *f, struct mp_filter_command *cmd)
+{
+    struct priv *p = f->priv;
+
+    switch (cmd->type) {
+    case MP_FILTER_COMMAND_SET_SPEED:
+        p->speed = cmd->speed;
+        return true;
+    }
+
+    return false;
+}
+
+static void af_drop_reset(struct mp_filter *f)
+{
+    struct priv *p = f->priv;
+
+    TA_FREEP(&p->last);
+    p->diff = 0;
+}
+
+static void af_drop_destroy(struct mp_filter *f)
+{
+    af_drop_reset(f);
+}
+
+static const struct mp_filter_info af_drop_filter = {
+    .name = "drop",
+    .priv_size = sizeof(struct priv),
+    .process = af_drop_process,
+    .command = af_drop_command,
+    .reset = af_drop_reset,
+    .destroy = af_drop_destroy,
+};
+
+static struct mp_filter *af_drop_create(struct mp_filter *parent, void *options)
+{
+    struct mp_filter *f = mp_filter_create(parent, &af_drop_filter);
+    if (!f) {
+        talloc_free(options);
+        return NULL;
+    }
+
+    mp_filter_add_pin(f, MP_PIN_IN, "in");
+    mp_filter_add_pin(f, MP_PIN_OUT, "out");
+
+    struct priv *p = f->priv;
+    p->speed = 1.0;
+
+    return f;
+}
+
+const struct mp_user_filter_entry af_drop = {
+    .desc = {
+        .description = "Change audio speed by dropping/repeating frames",
+        .name = "drop",
+        .priv_size = sizeof(struct priv),
+    },
+    .create = af_drop_create,
+};
diff --git a/audio/filter/af_format.c b/audio/filter/af_format.c
index 3e1eef664c..eddce6422f 100644
--- a/audio/filter/af_format.c
+++ b/audio/filter/af_format.c
@@ -30,7 +30,7 @@ struct f_opts {
     int out_srate;
     struct m_channels out_channels;
 
-    int fail;
+    bool fail;
 };
 
 struct priv {
@@ -38,7 +38,7 @@ struct priv {
     struct mp_pin *in_pin;
 };
 
-static void process(struct mp_filter *f)
+static void af_format_process(struct mp_filter *f)
 {
     struct priv *p = f->priv;
 
@@ -85,7 +85,7 @@ error:
 static const struct mp_filter_info af_format_filter = {
     .name = "format",
     .priv_size = sizeof(struct priv),
-    .process = process,
+    .process = af_format_process,
 };
 
 static struct mp_filter *af_format_create(struct mp_filter *parent,
@@ -128,12 +128,14 @@ const struct mp_user_filter_entry af_format = {
         .description = "Force audio format",
         .priv_size = sizeof(struct f_opts),
         .options = (const struct m_option[]) {
-            OPT_AUDIOFORMAT("format", in_format, 0),
-            OPT_INTRANGE("srate", in_srate, 0, 1000, 8*48000),
-            OPT_CHANNELS("channels", in_channels, 0, .min = 1),
-            OPT_INTRANGE("out-srate", out_srate, 0, 1000, 8*48000),
-            OPT_CHANNELS("out-channels", out_channels, 0, .min = 1),
-            OPT_FLAG("fail", fail, 0),
+            {"format", OPT_AUDIOFORMAT(in_format)},
+            {"srate", OPT_INT(in_srate), M_RANGE(1000, 8*48000)},
+            {"channels", OPT_CHANNELS(in_channels),
+                .flags = M_OPT_CHANNELS_LIMITED},
+            {"out-srate", OPT_INT(out_srate), M_RANGE(1000, 8*48000)},
+            {"out-channels", OPT_CHANNELS(out_channels),
+                .flags = M_OPT_CHANNELS_LIMITED},
+            {"fail", OPT_BOOL(fail)},
             {0}
         },
     },
diff --git a/audio/filter/af_lavcac3enc.c b/audio/filter/af_lavcac3enc.c
index c7582cf52b..def9700d18 100644
--- a/audio/filter/af_lavcac3enc.c
+++ b/audio/filter/af_lavcac3enc.c
@@ -31,7 +31,10 @@
 #include <libavutil/bswap.h>
 #include <libavutil/mem.h>
 
+#include "config.h"
+
 #include "audio/aframe.h"
+#include "audio/chmap_avchannel.h"
 #include "audio/chmap_sel.h"
 #include "audio/fmt-conversion.h"
 #include "audio/format.h"
@@ -47,13 +50,13 @@
 #define AC3_MAX_CHANNELS 6
 #define AC3_MAX_CODED_FRAME_SIZE 3840
 #define AC3_FRAME_SIZE (6  * 256)
-const uint16_t ac3_bitrate_tab[19] = {
+static const uint16_t ac3_bitrate_tab[19] = {
     32, 40, 48, 56, 64, 80, 96, 112, 128,
     160, 192, 224, 256, 320, 384, 448, 512, 576, 640
 };
 
 struct f_opts {
-    int add_iec61937_header;
+    bool add_iec61937_header;
     int bit_rate;
     int min_channel_num;
     char *encoder;
@@ -68,8 +71,9 @@ struct priv {
     struct mp_aframe *in_frame;
     struct mp_aframe_pool *out_pool;
 
-    struct AVCodec        *lavc_acodec;
+    const struct AVCodec  *lavc_acodec;
     struct AVCodecContext *lavc_actx;
+    AVPacket              *lavc_pkt;
     int bit_rate;
     int out_samples;    // upper bound on encoded output per AC3 frame
 };
@@ -99,12 +103,25 @@ static bool reinit(struct mp_filter *f)
     if (!bit_rate && chmap.num < AC3_MAX_CHANNELS + 1)
         bit_rate = default_bit_rate[chmap.num];
 
-    avcodec_close(s->lavc_actx);
+    avcodec_free_context(&s->lavc_actx);
+    s->lavc_actx = avcodec_alloc_context3(s->lavc_acodec);
+    if (!s->lavc_actx) {
+        MP_ERR(f, "Audio LAVC, couldn't reallocate context!\n");
+        return false;
+    }
+
+    if (mp_set_avopts(f->log, s->lavc_actx, s->opts->avopts) < 0)
+        return false;
 
     // Put sample parameters
     s->lavc_actx->sample_fmt = af_to_avformat(format);
+
+#if !HAVE_AV_CHANNEL_LAYOUT
     s->lavc_actx->channels = chmap.num;
     s->lavc_actx->channel_layout = mp_chmap_to_lavc(&chmap);
+#else
+    mp_chmap_to_av_layout(&s->lavc_actx->ch_layout, &chmap);
+#endif
     s->lavc_actx->sample_rate = rate;
     s->lavc_actx->bit_rate = bit_rate;
 
@@ -122,18 +139,19 @@ static bool reinit(struct mp_filter *f)
     return true;
 }
 
-static void reset(struct mp_filter *f)
+static void af_lavcac3enc_reset(struct mp_filter *f)
 {
     struct priv *s = f->priv;
 
     TA_FREEP(&s->in_frame);
 }
 
-static void destroy(struct mp_filter *f)
+static void af_lavcac3enc_destroy(struct mp_filter *f)
 {
     struct priv *s = f->priv;
 
-    reset(f);
+    af_lavcac3enc_reset(f);
+    av_packet_free(&s->lavc_pkt);
     avcodec_free_context(&s->lavc_actx);
 }
 
@@ -143,7 +161,7 @@ static void swap_16(uint16_t *ptr, size_t size)
         ptr[n] = av_bswap16(ptr[n]);
 }
 
-static void process(struct mp_filter *f)
+static void af_lavcac3enc_process(struct mp_filter *f)
 {
     struct priv *s = f->priv;
 
@@ -152,57 +170,57 @@ static void process(struct mp_filter *f)
 
     bool err = true;
     struct mp_aframe *out = NULL;
-    AVPacket pkt = {0};
-    av_init_packet(&pkt);
+    AVPacket *pkt = s->lavc_pkt;
 
     // Send input as long as it wants.
     while (1) {
         if (avcodec_is_open(s->lavc_actx)) {
-            int lavc_ret = avcodec_receive_packet(s->lavc_actx, &pkt);
+            int lavc_ret = avcodec_receive_packet(s->lavc_actx, pkt);
             if (lavc_ret >= 0)
                 break;
             if (lavc_ret < 0 && lavc_ret != AVERROR(EAGAIN)) {
                 MP_FATAL(f, "Encode failed (receive).\n");
-                goto done;
+                goto error;
             }
         }
         AVFrame *frame = NULL;
         struct mp_frame input = mp_pin_out_read(s->in_pin);
         // The following code assumes no sample data buffering in the encoder.
-        if (input.type == MP_FRAME_EOF) {
+        switch (input.type) {
+        case MP_FRAME_NONE:
+            goto done; // no data yet
+        case MP_FRAME_EOF:
             mp_pin_in_write(f->ppins[1], input);
-            return;
-        } else if (input.type == MP_FRAME_AUDIO) {
+            goto done;
+        case MP_FRAME_AUDIO:
             TA_FREEP(&s->in_frame);
             s->in_frame = input.data;
-            frame = mp_frame_to_av(input, NULL);
-            if (!frame)
-                goto done;
             if (mp_aframe_get_channels(s->in_frame) < s->opts->min_channel_num) {
                 // Just pass it through.
                 s->in_frame = NULL;
                 mp_pin_in_write(f->ppins[1], input);
-                return;
+                goto done;
             }
             if (!mp_aframe_config_equals(s->in_frame, s->cur_format)) {
                 if (!reinit(f))
-                    goto done;
+                    goto error;
             }
-        } else if (input.type) {
-            goto done;
-        } else {
-            return; // no data yet
+            frame = mp_frame_to_av(input, NULL);
+            if (!frame)
+                goto error;
+            break;
+        default: goto error; // unexpected packet type
         }
         int lavc_ret = avcodec_send_frame(s->lavc_actx, frame);
         av_frame_free(&frame);
         if (lavc_ret < 0 && lavc_ret != AVERROR(EAGAIN)) {
             MP_FATAL(f, "Encode failed (send).\n");
-            goto done;
+            goto error;
         }
     }
 
     if (!s->in_frame)
-        goto done;
+        goto error;
 
     out = mp_aframe_create();
     mp_aframe_set_format(out, AF_FORMAT_S_AC3);
@@ -210,18 +228,18 @@ static void process(struct mp_filter *f)
     mp_aframe_set_rate(out, 48000);
 
     if (mp_aframe_pool_allocate(s->out_pool, out, s->out_samples) < 0)
-        goto done;
+        goto error;
 
     int sstride = mp_aframe_get_sstride(out);
 
     mp_aframe_copy_attributes(out, s->in_frame);
 
-    int frame_size = pkt.size;
+    int frame_size = pkt->size;
     int header_len = 0;
     char hdr[8];
 
-    if (s->opts->add_iec61937_header && pkt.size > 5) {
-        int bsmod = pkt.data[5] & 0x7;
+    if (s->opts->add_iec61937_header && pkt->size > 5) {
+        int bsmod = pkt->data[5] & 0x7;
         int len = frame_size;
 
         frame_size = AC3_FRAME_SIZE * 2 * 2;
@@ -239,20 +257,22 @@ static void process(struct mp_filter *f)
 
     uint8_t **planes = mp_aframe_get_data_rw(out);
     if (!planes)
-        goto done;
+        goto error;
     char *buf = planes[0];
     memcpy(buf, hdr, header_len);
-    memcpy(buf + header_len, pkt.data, pkt.size);
-    memset(buf + header_len + pkt.size, 0,
-           frame_size - (header_len + pkt.size));
-    swap_16((uint16_t *)(buf + header_len), pkt.size / 2);
+    memcpy(buf + header_len, pkt->data, pkt->size);
+    memset(buf + header_len + pkt->size, 0,
+           frame_size - (header_len + pkt->size));
+    swap_16((uint16_t *)(buf + header_len), pkt->size / 2);
     mp_aframe_set_size(out, frame_size / sstride);
     mp_pin_in_write(f->ppins[1], MAKE_FRAME(MP_FRAME_AUDIO, out));
     out = NULL;
 
-    err = 0;
 done:
-    av_packet_unref(&pkt);
+    err = false;
+    // fall through
+error:
+    av_packet_unref(pkt);
     talloc_free(out);
     if (err)
         mp_filter_internal_mark_failed(f);
@@ -261,11 +281,43 @@ done:
 static const struct mp_filter_info af_lavcac3enc_filter = {
     .name = "lavcac3enc",
     .priv_size = sizeof(struct priv),
-    .process = process,
-    .reset = reset,
-    .destroy = destroy,
+    .process = af_lavcac3enc_process,
+    .reset = af_lavcac3enc_reset,
+    .destroy = af_lavcac3enc_destroy,
 };
 
+static void add_chmaps_to_autoconv(struct mp_filter *f,
+                                   struct mp_autoconvert *conv,
+                                   const struct AVCodec *codec)
+{
+#if !HAVE_AV_CHANNEL_LAYOUT
+    const uint64_t *lch = codec->channel_layouts;
+    for (int n = 0; lch && lch[n]; n++) {
+        struct mp_chmap chmap = {0};
+        mp_chmap_from_lavc(&chmap, lch[n]);
+        if (mp_chmap_is_valid(&chmap))
+            mp_autoconvert_add_chmap(conv, &chmap);
+    }
+#else
+    const AVChannelLayout *lch = codec->ch_layouts;
+    for (int n = 0; lch && lch[n].nb_channels; n++) {
+        struct mp_chmap chmap = {0};
+
+        if (!mp_chmap_from_av_layout(&chmap, &lch[n])) {
+            char layout[128] = {0};
+            MP_VERBOSE(f, "Skipping unsupported channel layout: %s\n",
+                       av_channel_layout_describe(&lch[n],
+                                                  layout, 128) < 0 ?
+                       "undefined" : layout);
+            continue;
+        }
+
+        if (mp_chmap_is_valid(&chmap))
+            mp_autoconvert_add_chmap(conv, &chmap);
+    }
+#endif
+}
+
 static struct mp_filter *af_lavcac3enc_create(struct mp_filter *parent,
                                               void *options)
 {
@@ -295,14 +347,23 @@ static struct mp_filter *af_lavcac3enc_create(struct mp_filter *parent,
         goto error;
     }
 
+    s->lavc_pkt = av_packet_alloc();
+    if (!s->lavc_pkt)
+        goto error;
+
     if (mp_set_avopts(f->log, s->lavc_actx, s->opts->avopts) < 0)
         goto error;
 
-    // For this one, we require the decoder to expert lists of all supported
+    // For this one, we require the decoder to export lists of all supported
     // parameters. (Not all decoders do that, but the ones we're interested
     // in do.)
     if (!s->lavc_acodec->sample_fmts ||
-        !s->lavc_acodec->channel_layouts)
+#if !HAVE_AV_CHANNEL_LAYOUT
+        !s->lavc_acodec->channel_layouts
+#else
+        !s->lavc_acodec->ch_layouts
+#endif
+        )
     {
         MP_ERR(f, "Audio encoder doesn't list supported parameters.\n");
         goto error;
@@ -334,13 +395,7 @@ static struct mp_filter *af_lavcac3enc_create(struct mp_filter *parent,
             mp_autoconvert_add_afmt(conv, mpfmt);
     }
 
-    const uint64_t *lch = s->lavc_acodec->channel_layouts;
-    for (int n = 0; lch && lch[n]; n++) {
-        struct mp_chmap chmap = {0};
-        mp_chmap_from_lavc(&chmap, lch[n]);
-        if (mp_chmap_is_valid(&chmap))
-            mp_autoconvert_add_chmap(conv, &chmap);
-    }
+    add_chmaps_to_autoconv(f, conv, s->lavc_acodec);
 
     // At least currently, the AC3 encoder doesn't export sample rates.
     mp_autoconvert_add_srate(conv, 48000);
@@ -357,6 +412,8 @@ static struct mp_filter *af_lavcac3enc_create(struct mp_filter *parent,
     return f;
 
 error:
+    av_packet_free(&s->lavc_pkt);
+    avcodec_free_context(&s->lavc_actx);
     talloc_free(f);
     return NULL;
 }
@@ -369,18 +426,18 @@ const struct mp_user_filter_entry af_lavcac3enc = {
         .name = "lavcac3enc",
         .priv_size = sizeof(OPT_BASE_STRUCT),
         .priv_defaults = &(const OPT_BASE_STRUCT) {
-            .add_iec61937_header = 1,
+            .add_iec61937_header = true,
             .bit_rate = 640,
             .min_channel_num = 3,
             .encoder = "ac3",
         },
         .options = (const struct m_option[]) {
-            OPT_FLAG("tospdif", add_iec61937_header, 0),
-            OPT_CHOICE_OR_INT("bitrate", bit_rate, 0, 32, 640,
-                            ({"auto", 0}, {"default", 0})),
-            OPT_INTRANGE("minch", min_channel_num, 0, 2, 6),
-            OPT_STRING("encoder", encoder, 0),
-            OPT_KEYVALUELIST("o", avopts, 0),
+            {"tospdif", OPT_BOOL(add_iec61937_header)},
+            {"bitrate", OPT_CHOICE(bit_rate,
+                {"auto", 0}, {"default", 0}), M_RANGE(32, 640)},
+            {"minch", OPT_INT(min_channel_num), M_RANGE(2, 6)},
+            {"encoder", OPT_STRING(encoder)},
+            {"o", OPT_KEYVALUELIST(avopts)},
             {0}
         },
     },
diff --git a/audio/filter/af_rubberband.c b/audio/filter/af_rubberband.c
index c7b6317c13..e71937fcb2 100644
--- a/audio/filter/af_rubberband.c
+++ b/audio/filter/af_rubberband.c
@@ -20,6 +20,8 @@
 
 #include <rubberband/rubberband-c.h>
 
+#include "config.h"
+
 #include "audio/aframe.h"
 #include "audio/format.h"
 #include "common/common.h"
@@ -31,7 +33,7 @@
 // command line options
 struct f_opts {
     int transients, detector, phase, window,
-        smoothing, formant, pitch, channels;
+        smoothing, formant, pitch, channels, engine;
     double scale;
 };
 
@@ -78,7 +80,10 @@ static bool init_rubberband(struct mp_filter *f)
 
     int opts = p->opts->transients | p->opts->detector | p->opts->phase |
                p->opts->window | p->opts->smoothing | p->opts->formant |
-               p->opts->pitch | p-> opts->channels |
+               p->opts->pitch | p->opts->channels |
+#if HAVE_RUBBERBAND_3
+               p->opts->engine |
+#endif
                RubberBandOptionProcessRealTime;
 
     int rate = mp_aframe_get_rate(p->pending);
@@ -100,7 +105,7 @@ static bool init_rubberband(struct mp_filter *f)
     return true;
 }
 
-static void process(struct mp_filter *f)
+static void af_rubberband_process(struct mp_filter *f)
 {
     struct priv *p = f->priv;
 
@@ -228,7 +233,7 @@ error:
     mp_filter_internal_mark_failed(f);
 }
 
-static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
+static bool af_rubberband_command(struct mp_filter *f, struct mp_filter_command *cmd)
 {
     struct priv *p = f->priv;
 
@@ -258,7 +263,7 @@ static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
     return false;
 }
 
-static void reset(struct mp_filter *f)
+static void af_rubberband_reset(struct mp_filter *f)
 {
     struct priv *p = f->priv;
 
@@ -269,7 +274,7 @@ static void reset(struct mp_filter *f)
     TA_FREEP(&p->pending);
 }
 
-static void destroy(struct mp_filter *f)
+static void af_rubberband_destroy(struct mp_filter *f)
 {
     struct priv *p = f->priv;
 
@@ -281,10 +286,10 @@ static void destroy(struct mp_filter *f)
 static const struct mp_filter_info af_rubberband_filter = {
     .name = "rubberband",
     .priv_size = sizeof(struct priv),
-    .process = process,
-    .command = command,
-    .reset = reset,
-    .destroy = destroy,
+    .process = af_rubberband_process,
+    .command = af_rubberband_command,
+    .reset = af_rubberband_reset,
+    .destroy = af_rubberband_destroy,
 };
 
 static struct mp_filter *af_rubberband_create(struct mp_filter *parent,
@@ -331,37 +336,45 @@ const struct mp_user_filter_entry af_rubberband = {
             .transients = RubberBandOptionTransientsMixed,
             .formant = RubberBandOptionFormantPreserved,
             .channels = RubberBandOptionChannelsTogether,
+#if HAVE_RUBBERBAND_3
+            .engine = RubberBandOptionEngineFiner,
+#endif
         },
         .options = (const struct m_option[]) {
-            OPT_CHOICE("transients", transients, 0,
-                    ({"crisp", RubberBandOptionTransientsCrisp},
-                     {"mixed", RubberBandOptionTransientsMixed},
-                     {"smooth", RubberBandOptionTransientsSmooth})),
-            OPT_CHOICE("detector", detector, 0,
-                    ({"compound", RubberBandOptionDetectorCompound},
-                     {"percussive", RubberBandOptionDetectorPercussive},
-                     {"soft", RubberBandOptionDetectorSoft})),
-            OPT_CHOICE("phase", phase, 0,
-                    ({"laminar", RubberBandOptionPhaseLaminar},
-                     {"independent", RubberBandOptionPhaseIndependent})),
-            OPT_CHOICE("window", window, 0,
-                    ({"standard", RubberBandOptionWindowStandard},
-                     {"short", RubberBandOptionWindowShort},
-                     {"long", RubberBandOptionWindowLong})),
-            OPT_CHOICE("smoothing", smoothing, 0,
-                    ({"off", RubberBandOptionSmoothingOff},
-                     {"on", RubberBandOptionSmoothingOn})),
-            OPT_CHOICE("formant", formant, 0,
-                    ({"shifted", RubberBandOptionFormantShifted},
-                     {"preserved", RubberBandOptionFormantPreserved})),
-            OPT_CHOICE("pitch", pitch, 0,
-                    ({"quality", RubberBandOptionPitchHighQuality},
-                     {"speed", RubberBandOptionPitchHighSpeed},
-                     {"consistency", RubberBandOptionPitchHighConsistency})),
-            OPT_CHOICE("channels", channels, 0,
-                    ({"apart", RubberBandOptionChannelsApart},
-                     {"together", RubberBandOptionChannelsTogether})),
-            OPT_DOUBLE("pitch-scale", scale, M_OPT_RANGE, .min = 0.01, .max = 100),
+            {"transients", OPT_CHOICE(transients,
+                {"crisp", RubberBandOptionTransientsCrisp},
+                {"mixed", RubberBandOptionTransientsMixed},
+                {"smooth", RubberBandOptionTransientsSmooth})},
+            {"detector", OPT_CHOICE(detector,
+                {"compound", RubberBandOptionDetectorCompound},
+                {"percussive", RubberBandOptionDetectorPercussive},
+                {"soft", RubberBandOptionDetectorSoft})},
+            {"phase", OPT_CHOICE(phase,
+                {"laminar", RubberBandOptionPhaseLaminar},
+                {"independent", RubberBandOptionPhaseIndependent})},
+            {"window", OPT_CHOICE(window,
+                {"standard", RubberBandOptionWindowStandard},
+                {"short", RubberBandOptionWindowShort},
+                {"long", RubberBandOptionWindowLong})},
+            {"smoothing", OPT_CHOICE(smoothing,
+                {"off", RubberBandOptionSmoothingOff},
+                {"on", RubberBandOptionSmoothingOn})},
+            {"formant", OPT_CHOICE(formant,
+                {"shifted", RubberBandOptionFormantShifted},
+                {"preserved", RubberBandOptionFormantPreserved})},
+            {"pitch", OPT_CHOICE(pitch,
+                {"quality", RubberBandOptionPitchHighQuality},
+                {"speed", RubberBandOptionPitchHighSpeed},
+                {"consistency", RubberBandOptionPitchHighConsistency})},
+            {"channels", OPT_CHOICE(channels,
+                {"apart", RubberBandOptionChannelsApart},
+                {"together", RubberBandOptionChannelsTogether})},
+#if HAVE_RUBBERBAND_3
+            {"engine", OPT_CHOICE(engine,
+                {"finer", RubberBandOptionEngineFiner},
+                {"faster", RubberBandOptionEngineFaster})},
+#endif
+            {"pitch-scale", OPT_DOUBLE(scale), M_RANGE(0.01, 100)},
             {0}
         },
     },
diff --git a/audio/filter/af_scaletempo.c b/audio/filter/af_scaletempo.c
index ed1df5725e..482b91209e 100644
--- a/audio/filter/af_scaletempo.c
+++ b/audio/filter/af_scaletempo.c
@@ -2,7 +2,7 @@
  * scaletempo audio filter
  *
  * scale tempo while maintaining pitch
- * (WSOLA technique with cross correlation)
+ * (WSOLA technique with taxicab distance)
  * inspired by SoundTouch library by Olli Parviainen
  *
  * basic algorithm
@@ -30,10 +30,12 @@
  * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <float.h>
 #include <stdlib.h>
 #include <string.h>
 #include <limits.h>
 #include <assert.h>
+#include <math.h>
 
 #include "audio/aframe.h"
 #include "audio/format.h"
@@ -47,7 +49,7 @@ struct f_opts {
     float scale_nominal;
     float ms_stride;
     float ms_search;
-    float percent_overlap;
+    float factor_overlap;
 #define SCALE_TEMPO 1
 #define SCALE_PITCH 2
     int speed_opt;
@@ -86,8 +88,6 @@ struct priv {
     // best overlap
     int frames_search;
     int num_channels;
-    void *buf_pre_corr;
-    void *table_window;
     int (*best_overlap_offset)(struct priv *s);
 };
 
@@ -134,72 +134,144 @@ static bool fill_queue(struct priv *s)
     return bytes_needed == 0;
 }
 
-#define UNROLL_PADDING (4 * 4)
+// Fit the curve f(x) = a * x^2 + b * x + c such that
+//   f(-1) = y[0]
+//   f(0) = y[1]
+//   f(1) = y[2]
+// and return the extremum position and value
+// assuming y[0] <= y[1] >= y[2] || y[0] >= y[1] <= y[2]
+static void quadratic_interpolation_float(
+    const float* y_values, float* x, float* value)
+{
+    const float b = (y_values[2] - y_values[0]) * 0.5f;
+    const float c = y_values[1];
+    const float a = y_values[0] + b - c;
+
+    if (a == 0.f) {
+        // it's a flat line
+        *x = 0;
+        *value = c;
+    } else {
+        const float pos = -b / (2.f * a);
+        *x = pos;
+        *value = a * pos * pos + b * pos + c;
+    }
+}
+
+static void quadratic_interpolation_s16(
+    const int32_t* y_values, float* x, int32_t* value)
+{
+    const float b = (y_values[2] - y_values[0]) * 0.5f;
+    const float c = y_values[1];
+    const float a = y_values[0] + b - c;
+
+    if (a == 0.f) {
+        // it's a flat line
+        *x = 0;
+        *value = c;
+    } else {
+        const float pos = -b / (2.f * a);
+        *x = pos;
+        *value = a * pos * pos + b * pos + c;
+    }
+}
 
 static int best_overlap_offset_float(struct priv *s)
 {
-    float best_corr = INT_MIN;
-    int best_off = 0;
-
-    float *pw  = s->table_window;
-    float *po  = s->buf_overlap;
-    po += s->num_channels;
-    float *ppc = s->buf_pre_corr;
-    for (int i = s->num_channels; i < s->samples_overlap; i++)
-        *ppc++ = *pw++ **po++;
-
-    float *search_start = (float *)s->buf_queue + s->num_channels;
-    for (int off = 0; off < s->frames_search; off++) {
-        float corr = 0;
-        float *ps = search_start;
-        ppc = s->buf_pre_corr;
-        for (int i = s->num_channels; i < s->samples_overlap; i++)
-            corr += *ppc++ **ps++;
-        if (corr > best_corr) {
-            best_corr = corr;
-            best_off  = off;
+    int num_channels = s->num_channels, frames_search = s->frames_search;
+    float *source = (float *)s->buf_queue + num_channels;
+    float *target = (float *)s->buf_overlap + num_channels;
+    int num_samples = s->samples_overlap - num_channels;
+    int step_size = 3;
+    float history[3] = {};
+
+    float best_distance = FLT_MAX;
+    int best_offset_approx = 0;
+    for (int offset = 0; offset < frames_search; offset += step_size) {
+        float distance = 0;
+        for (int i = 0; i < num_samples; i++)
+            distance += fabsf(target[i] - source[offset * num_channels + i]);
+
+        int offset_approx = offset;
+        history[0] = history[1];
+        history[1] = history[2];
+        history[2] = distance;
+        if(offset >= 2 && history[0] >= history[1] && history[1] <= history[2]) {
+            float extremum;
+            quadratic_interpolation_float(history, &extremum, &distance);
+            offset_approx = offset - step_size + (int)(extremum * step_size + 0.5f);
+        }
+
+        if (distance < best_distance) {
+            best_distance = distance;
+            best_offset_approx  = offset_approx;
+        }
+    }
+
+    best_distance = FLT_MAX;
+    int best_offset = 0;
+    int min_offset = MPMAX(0, best_offset_approx - step_size + 1);
+    int max_offset = MPMIN(frames_search, best_offset_approx + step_size);
+    for (int offset = min_offset; offset < max_offset; offset++) {
+        float distance = 0;
+        for (int i = 0; i < num_samples; i++)
+            distance += fabsf(target[i] - source[offset * num_channels + i]);
+        if (distance < best_distance) {
+            best_distance = distance;
+            best_offset  = offset;
         }
-        search_start += s->num_channels;
     }
 
-    return best_off * 4 * s->num_channels;
+    return best_offset * 4 * num_channels;
 }
 
 static int best_overlap_offset_s16(struct priv *s)
 {
-    int64_t best_corr = INT64_MIN;
-    int best_off = 0;
-
-    int32_t *pw  = s->table_window;
-    int16_t *po  = s->buf_overlap;
-    po += s->num_channels;
-    int32_t *ppc = s->buf_pre_corr;
-    for (long i = s->num_channels; i < s->samples_overlap; i++)
-        *ppc++ = (*pw++ **po++) >> 15;
-
-    int16_t *search_start = (int16_t *)s->buf_queue + s->num_channels;
-    for (int off = 0; off < s->frames_search; off++) {
-        int64_t corr = 0;
-        int16_t *ps = search_start;
-        ppc = s->buf_pre_corr;
-        ppc += s->samples_overlap - s->num_channels;
-        ps  += s->samples_overlap - s->num_channels;
-        long i  = -(s->samples_overlap - s->num_channels);
-        do {
-            corr += ppc[i + 0] * ps[i + 0];
-            corr += ppc[i + 1] * ps[i + 1];
-            corr += ppc[i + 2] * ps[i + 2];
-            corr += ppc[i + 3] * ps[i + 3];
-            i += 4;
-        } while (i < 0);
-        if (corr > best_corr) {
-            best_corr = corr;
-            best_off  = off;
+    int num_channels = s->num_channels, frames_search = s->frames_search;
+    int16_t *source = (int16_t *)s->buf_queue + num_channels;
+    int16_t *target = (int16_t *)s->buf_overlap + num_channels;
+    int num_samples = s->samples_overlap - num_channels;
+    int step_size = 3;
+    int32_t history[3] = {};
+
+    int32_t best_distance = INT32_MAX;
+    int best_offset_approx = 0;
+    for (int offset = 0; offset < frames_search; offset += step_size) {
+        int32_t distance = 0;
+        for (int i = 0; i < num_samples; i++)
+            distance += abs((int32_t)target[i] - source[offset * num_channels + i]);
+
+        int offset_approx = offset;
+        history[0] = history[1];
+        history[1] = history[2];
+        history[2] = distance;
+        if(offset >= 2 && history[0] >= history[1] && history[1] <= history[2]) {
+            float extremum;
+            quadratic_interpolation_s16(history, &extremum, &distance);
+            offset_approx = offset - step_size + (int)(extremum * step_size + 0.5f);
+        }
+
+        if (distance < best_distance) {
+            best_distance = distance;
+            best_offset_approx  = offset_approx;
         }
-        search_start += s->num_channels;
     }
 
-    return best_off * 2 * s->num_channels;
+    best_distance = INT32_MAX;
+    int best_offset = 0;
+    int min_offset = MPMAX(0, best_offset_approx - step_size + 1);
+    int max_offset = MPMIN(frames_search, best_offset_approx + step_size);
+    for (int offset = min_offset; offset < max_offset; offset++) {
+        int32_t distance = 0;
+        for (int i = 0; i < num_samples; i++)
+            distance += abs((int32_t)target[i] - source[offset * num_channels + i]);
+        if (distance < best_distance) {
+            best_distance = distance;
+            best_offset  = offset;
+        }
+    }
+
+    return best_offset * 2 * s->num_channels;
 }
 
 static void output_overlap_float(struct priv *s, void *buf_out,
@@ -210,8 +282,9 @@ static void output_overlap_float(struct priv *s, void *buf_out,
     float *po   = s->buf_overlap;
     float *pin  = (float *)(s->buf_queue + bytes_off);
     for (int i = 0; i < s->samples_overlap; i++) {
-        *pout++ = *po - *pb++ *(*po - *pin++);
-        po++;
+        // the math is equal to *po * (1 - *pb) + *pin * *pb
+        float o = *po++;
+        *pout++ = o - *pb++ * (o - *pin++);
     }
 }
 
@@ -223,12 +296,13 @@ static void output_overlap_s16(struct priv *s, void *buf_out,
     int16_t *po   = s->buf_overlap;
     int16_t *pin  = (int16_t *)(s->buf_queue + bytes_off);
     for (int i = 0; i < s->samples_overlap; i++) {
-        *pout++ = *po - ((*pb++ *(*po - *pin++)) >> 16);
-        po++;
+        // the math is equal to *po * (1 - *pb) + *pin * *pb
+        int32_t o = *po++;
+        *pout++ = o - ((*pb++ *(o - *pin++)) >> 16);
     }
 }
 
-static void process(struct mp_filter *f)
+static void af_scaletempo_process(struct mp_filter *f)
 {
     struct priv *s = f->priv;
 
@@ -399,7 +473,7 @@ static bool reinit(struct mp_filter *f)
 
     update_speed(s, s->speed);
 
-    int frames_overlap = s->frames_stride * s->opts->percent_overlap;
+    int frames_overlap = s->frames_stride * s->opts->factor_overlap;
     if (frames_overlap <= 0) {
         s->bytes_standing   = s->bytes_stride;
         s->samples_standing = s->bytes_standing / bps;
@@ -419,18 +493,20 @@ static bool reinit(struct mp_filter *f)
         memset(s->buf_overlap, 0, s->bytes_