8 files changed, 453 insertions, 241 deletions
diff --git a/audio/filter/af_drop.c b/audio/filter/af_drop.c
index 724c482720..499389dd2b 100644
--- a/audio/filter/af_drop.c
+++ b/audio/filter/af_drop.c
@@ -11,7 +11,7 @@ struct priv {
     struct mp_aframe *last; // for repeating
 };
 
-static void process(struct mp_filter *f)
+static void af_drop_process(struct mp_filter *f)
 {
     struct priv *p = f->priv;
 
@@ -52,7 +52,7 @@ static void process(struct mp_filter *f)
     mp_pin_in_write(f->ppins[1], frame);
 }
 
-static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
+static bool af_drop_command(struct mp_filter *f, struct mp_filter_command *cmd)
 {
     struct priv *p = f->priv;
 
@@ -65,7 +65,7 @@ static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
     return false;
 }
 
-static void reset(struct mp_filter *f)
+static void af_drop_reset(struct mp_filter *f)
 {
     struct priv *p = f->priv;
 
@@ -73,18 +73,18 @@ static void reset(struct mp_filter *f)
     p->diff = 0;
 }
 
-static void destroy(struct mp_filter *f)
+static void af_drop_destroy(struct mp_filter *f)
 {
-    reset(f);
+    af_drop_reset(f);
 }
 
 static const struct mp_filter_info af_drop_filter = {
     .name = "drop",
     .priv_size = sizeof(struct priv),
-    .process = process,
-    .command = command,
-    .reset = reset,
-    .destroy = destroy,
+    .process = af_drop_process,
+    .command = af_drop_command,
+    .reset = af_drop_reset,
+    .destroy = af_drop_destroy,
 };
 
 static struct mp_filter *af_drop_create(struct mp_filter *parent, void *options)
diff --git a/audio/filter/af_format.c b/audio/filter/af_format.c
index 88ae99ed56..eddce6422f 100644
--- a/audio/filter/af_format.c
+++ b/audio/filter/af_format.c
@@ -30,7 +30,7 @@ struct f_opts {
     int out_srate;
     struct m_channels out_channels;
 
-    int fail;
+    bool fail;
 };
 
 struct priv {
@@ -38,7 +38,7 @@ struct priv {
     struct mp_pin *in_pin;
 };
 
-static void process(struct mp_filter *f)
+static void af_format_process(struct mp_filter *f)
 {
     struct priv *p = f->priv;
 
@@ -85,7 +85,7 @@ error:
 static const struct mp_filter_info af_format_filter = {
     .name = "format",
     .priv_size = sizeof(struct priv),
-    .process = process,
+    .process = af_format_process,
 };
 
 static struct mp_filter *af_format_create(struct mp_filter *parent,
@@ -135,7 +135,7 @@ const struct mp_user_filter_entry af_format = {
             {"out-srate", OPT_INT(out_srate), M_RANGE(1000, 8*48000)},
             {"out-channels", OPT_CHANNELS(out_channels),
                 .flags = M_OPT_CHANNELS_LIMITED},
-            {"fail", OPT_FLAG(fail)},
+            {"fail", OPT_BOOL(fail)},
             {0}
         },
     },
diff --git a/audio/filter/af_lavcac3enc.c b/audio/filter/af_lavcac3enc.c
index 38f93a1c08..def9700d18 100644
--- a/audio/filter/af_lavcac3enc.c
+++ b/audio/filter/af_lavcac3enc.c
@@ -31,7 +31,10 @@
 #include <libavutil/bswap.h>
 #include <libavutil/mem.h>
 
+#include "config.h"
+
 #include "audio/aframe.h"
+#include "audio/chmap_avchannel.h"
 #include "audio/chmap_sel.h"
 #include "audio/fmt-conversion.h"
 #include "audio/format.h"
@@ -47,13 +50,13 @@
 #define AC3_MAX_CHANNELS 6
 #define AC3_MAX_CODED_FRAME_SIZE 3840
 #define AC3_FRAME_SIZE (6  * 256)
-const uint16_t ac3_bitrate_tab[19] = {
+static const uint16_t ac3_bitrate_tab[19] = {
     32, 40, 48, 56, 64, 80, 96, 112, 128,
     160, 192, 224, 256, 320, 384, 448, 512, 576, 640
 };
 
 struct f_opts {
-    int add_iec61937_header;
+    bool add_iec61937_header;
     int bit_rate;
     int min_channel_num;
     char *encoder;
@@ -68,8 +71,9 @@ struct priv {
     struct mp_aframe *in_frame;
     struct mp_aframe_pool *out_pool;
 
-    struct AVCodec        *lavc_acodec;
+    const struct AVCodec  *lavc_acodec;
     struct AVCodecContext *lavc_actx;
+    AVPacket              *lavc_pkt;
     int bit_rate;
     int out_samples;    // upper bound on encoded output per AC3 frame
 };
@@ -99,12 +103,25 @@ static bool reinit(struct mp_filter *f)
     if (!bit_rate && chmap.num < AC3_MAX_CHANNELS + 1)
         bit_rate = default_bit_rate[chmap.num];
 
-    avcodec_close(s->lavc_actx);
+    avcodec_free_context(&s->lavc_actx);
+    s->lavc_actx = avcodec_alloc_context3(s->lavc_acodec);
+    if (!s->lavc_actx) {
+        MP_ERR(f, "Audio LAVC, couldn't reallocate context!\n");
+        return false;
+    }
+
+    if (mp_set_avopts(f->log, s->lavc_actx, s->opts->avopts) < 0)
+        return false;
 
     // Put sample parameters
     s->lavc_actx->sample_fmt = af_to_avformat(format);
+
+#if !HAVE_AV_CHANNEL_LAYOUT
     s->lavc_actx->channels = chmap.num;
     s->lavc_actx->channel_layout = mp_chmap_to_lavc(&chmap);
+#else
+    mp_chmap_to_av_layout(&s->lavc_actx->ch_layout, &chmap);
+#endif
     s->lavc_actx->sample_rate = rate;
     s->lavc_actx->bit_rate = bit_rate;
 
@@ -122,18 +139,19 @@ static bool reinit(struct mp_filter *f)
     return true;
 }
 
-static void reset(struct mp_filter *f)
+static void af_lavcac3enc_reset(struct mp_filter *f)
 {
     struct priv *s = f->priv;
 
     TA_FREEP(&s->in_frame);
 }
 
-static void destroy(struct mp_filter *f)
+static void af_lavcac3enc_destroy(struct mp_filter *f)
 {
     struct priv *s = f->priv;
 
-    reset(f);
+    af_lavcac3enc_reset(f);
+    av_packet_free(&s->lavc_pkt);
     avcodec_free_context(&s->lavc_actx);
 }
 
@@ -143,7 +161,7 @@ static void swap_16(uint16_t *ptr, size_t size)
         ptr[n] = av_bswap16(ptr[n]);
 }
 
-static void process(struct mp_filter *f)
+static void af_lavcac3enc_process(struct mp_filter *f)
 {
     struct priv *s = f->priv;
 
@@ -152,57 +170,57 @@ static void process(struct mp_filter *f)
 
     bool err = true;
     struct mp_aframe *out = NULL;
-    AVPacket pkt = {0};
-    av_init_packet(&pkt);
+    AVPacket *pkt = s->lavc_pkt;
 
     // Send input as long as it wants.
     while (1) {
         if (avcodec_is_open(s->lavc_actx)) {
-            int lavc_ret = avcodec_receive_packet(s->lavc_actx, &pkt);
+            int lavc_ret = avcodec_receive_packet(s->lavc_actx, pkt);
             if (lavc_ret >= 0)
                 break;
             if (lavc_ret < 0 && lavc_ret != AVERROR(EAGAIN)) {
                 MP_FATAL(f, "Encode failed (receive).\n");
-                goto done;
+                goto error;
             }
         }
         AVFrame *frame = NULL;
         struct mp_frame input = mp_pin_out_read(s->in_pin);
         // The following code assumes no sample data buffering in the encoder.
-        if (input.type == MP_FRAME_EOF) {
+        switch (input.type) {
+        case MP_FRAME_NONE:
+            goto done; // no data yet
+        case MP_FRAME_EOF:
             mp_pin_in_write(f->ppins[1], input);
-            return;
-        } else if (input.type == MP_FRAME_AUDIO) {
+            goto done;
+        case MP_FRAME_AUDIO:
             TA_FREEP(&s->in_frame);
             s->in_frame = input.data;
-            frame = mp_frame_to_av(input, NULL);
-            if (!frame)
-                goto done;
             if (mp_aframe_get_channels(s->in_frame) < s->opts->min_channel_num) {
                 // Just pass it through.
                 s->in_frame = NULL;
                 mp_pin_in_write(f->ppins[1], input);
-                return;
+                goto done;
             }
             if (!mp_aframe_config_equals(s->in_frame, s->cur_format)) {
                 if (!reinit(f))
-                    goto done;
+                    goto error;
             }
-        } else if (input.type) {
-            goto done;
-        } else {
-            return; // no data yet
+            frame = mp_frame_to_av(input, NULL);
+            if (!frame)
+                goto error;
+            break;
+        default: goto error; // unexpected packet type
         }
         int lavc_ret = avcodec_send_frame(s->lavc_actx, frame);
         av_frame_free(&frame);
         if (lavc_ret < 0 && lavc_ret != AVERROR(EAGAIN)) {
             MP_FATAL(f, "Encode failed (send).\n");
-            goto done;
+            goto error;
         }
     }
 
     if (!s->in_frame)
-        goto done;
+        goto error;
 
     out = mp_aframe_create();
     mp_aframe_set_format(out, AF_FORMAT_S_AC3);
@@ -210,18 +228,18 @@ static void process(struct mp_filter *f)
     mp_aframe_set_rate(out, 48000);
 
     if (mp_aframe_pool_allocate(s->out_pool, out, s->out_samples) < 0)
-        goto done;
+        goto error;
 
     int sstride = mp_aframe_get_sstride(out);
 
     mp_aframe_copy_attributes(out, s->in_frame);
 
-    int frame_size = pkt.size;
+    int frame_size = pkt->size;
     int header_len = 0;
     char hdr[8];
 
-    if (s->opts->add_iec61937_header && pkt.size > 5) {
-        int bsmod = pkt.data[5] & 0x7;
+    if (s->opts->add_iec61937_header && pkt->size > 5) {
+        int bsmod = pkt->data[5] & 0x7;
         int len = frame_size;
 
         frame_size = AC3_FRAME_SIZE * 2 * 2;
@@ -239,20 +257,22 @@ static void process(struct mp_filter *f)
 
     uint8_t **planes = mp_aframe_get_data_rw(out);
     if (!planes)
-        goto done;
+        goto error;
     char *buf = planes[0];
     memcpy(buf, hdr, header_len);
-    memcpy(buf + header_len, pkt.data, pkt.size);
-    memset(buf + header_len + pkt.size, 0,
-           frame_size - (header_len + pkt.size));
-    swap_16((uint16_t *)(buf + header_len), pkt.size / 2);
+    memcpy(buf + header_len, pkt->data, pkt->size);
+    memset(buf + header_len + pkt->size, 0,
+           frame_size - (header_len + pkt->size));
+    swap_16((uint16_t *)(buf + header_len), pkt->size / 2);
     mp_aframe_set_size(out, frame_size / sstride);
     mp_pin_in_write(f->ppins[1], MAKE_FRAME(MP_FRAME_AUDIO, out));
     out = NULL;
 
-    err = 0;
 done:
-    av_packet_unref(&pkt);
+    err = false;
+    // fall through
+error:
+    av_packet_unref(pkt);
     talloc_free(out);
     if (err)
         mp_filter_internal_mark_failed(f);
@@ -261,11 +281,43 @@ done:
 static const struct mp_filter_info af_lavcac3enc_filter = {
     .name = "lavcac3enc",
     .priv_size = sizeof(struct priv),
-    .process = process,
-    .reset = reset,
-    .destroy = destroy,
+    .process = af_lavcac3enc_process,
+    .reset = af_lavcac3enc_reset,
+    .destroy = af_lavcac3enc_destroy,
 };
 
+static void add_chmaps_to_autoconv(struct mp_filter *f,
+                                   struct mp_autoconvert *conv,
+                                   const struct AVCodec *codec)
+{
+#if !HAVE_AV_CHANNEL_LAYOUT
+    const uint64_t *lch = codec->channel_layouts;
+    for (int n = 0; lch && lch[n]; n++) {
+        struct mp_chmap chmap = {0};
+        mp_chmap_from_lavc(&chmap, lch[n]);
+        if (mp_chmap_is_valid(&chmap))
+            mp_autoconvert_add_chmap(conv, &chmap);
+    }
+#else
+    const AVChannelLayout *lch = codec->ch_layouts;
+    for (int n = 0; lch && lch[n].nb_channels; n++) {
+        struct mp_chmap chmap = {0};
+
+        if (!mp_chmap_from_av_layout(&chmap, &lch[n])) {
+            char layout[128] = {0};
+            MP_VERBOSE(f, "Skipping unsupported channel layout: %s\n",
+                       av_channel_layout_describe(&lch[n],
+                                                  layout, 128) < 0 ?
+                       "undefined" : layout);
+            continue;
+        }
+
+        if (mp_chmap_is_valid(&chmap))
+            mp_autoconvert_add_chmap(conv, &chmap);
+    }
+#endif
+}
+
 static struct mp_filter *af_lavcac3enc_create(struct mp_filter *parent,
                                               void *options)
 {
@@ -295,14 +347,23 @@ static struct mp_filter *af_lavcac3enc_create(struct mp_filter *parent,
         goto error;
     }
 
+    s->lavc_pkt = av_packet_alloc();
+    if (!s->lavc_pkt)
+        goto error;
+
     if (mp_set_avopts(f->log, s->lavc_actx, s->opts->avopts) < 0)
         goto error;
 
-    // For this one, we require the decoder to expert lists of all supported
+    // For this one, we require the decoder to export lists of all supported
     // parameters. (Not all decoders do that, but the ones we're interested
     // in do.)
     if (!s->lavc_acodec->sample_fmts ||
-        !s->lavc_acodec->channel_layouts)
+#if !HAVE_AV_CHANNEL_LAYOUT
+        !s->lavc_acodec->channel_layouts
+#else
+        !s->lavc_acodec->ch_layouts
+#endif
+        )
     {
         MP_ERR(f, "Audio encoder doesn't list supported parameters.\n");
         goto error;
@@ -334,13 +395,7 @@ static struct mp_filter *af_lavcac3enc_create(struct mp_filter *parent,
             mp_autoconvert_add_afmt(conv, mpfmt);
     }
 
-    const uint64_t *lch = s->lavc_acodec->channel_layouts;
-    for (int n = 0; lch && lch[n]; n++) {
-        struct mp_chmap chmap = {0};
-        mp_chmap_from_lavc(&chmap, lch[n]);
-        if (mp_chmap_is_valid(&chmap))
-            mp_autoconvert_add_chmap(conv, &chmap);
-    }
+    add_chmaps_to_autoconv(f, conv, s->lavc_acodec);
 
     // At least currently, the AC3 encoder doesn't export sample rates.
     mp_autoconvert_add_srate(conv, 48000);
@@ -357,6 +412,8 @@ static struct mp_filter *af_lavcac3enc_create(struct mp_filter *parent,
     return f;
 
 error:
+    av_packet_free(&s->lavc_pkt);
+    avcodec_free_context(&s->lavc_actx);
     talloc_free(f);
     return NULL;
 }
@@ -369,13 +426,13 @@ const struct mp_user_filter_entry af_lavcac3enc = {
         .name = "lavcac3enc",
         .priv_size = sizeof(OPT_BASE_STRUCT),
         .priv_defaults = &(const OPT_BASE_STRUCT) {
-            .add_iec61937_header = 1,
+            .add_iec61937_header = true,
             .bit_rate = 640,
             .min_channel_num = 3,
             .encoder = "ac3",
         },
         .options = (const struct m_option[]) {
-            {"tospdif", OPT_FLAG(add_iec61937_header)},
+            {"tospdif", OPT_BOOL(add_iec61937_header)},
             {"bitrate", OPT_CHOICE(bit_rate,
                 {"auto", 0}, {"default", 0}), M_RANGE(32, 640)},
             {"minch", OPT_INT(min_channel_num), M_RANGE(2, 6)},
diff --git a/audio/filter/af_rubberband.c b/audio/filter/af_rubberband.c
index 4df2001c49..e71937fcb2 100644
--- a/audio/filter/af_rubberband.c
+++ b/audio/filter/af_rubberband.c
@@ -20,6 +20,8 @@
 
 #include <rubberband/rubberband-c.h>
 
+#include "config.h"
+
 #include "audio/aframe.h"
 #include "audio/format.h"
 #include "common/common.h"
@@ -31,7 +33,7 @@
 // command line options
 struct f_opts {
     int transients, detector, phase, window,
-        smoothing, formant, pitch, channels;
+        smoothing, formant, pitch, channels, engine;
     double scale;
 };
 
@@ -78,7 +80,10 @@ static bool init_rubberband(struct mp_filter *f)
 
     int opts = p->opts->transients | p->opts->detector | p->opts->phase |
                p->opts->window | p->opts->smoothing | p->opts->formant |
-               p->opts->pitch | p-> opts->channels |
+               p->opts->pitch | p->opts->channels |
+#if HAVE_RUBBERBAND_3
+               p->opts->engine |
+#endif
                RubberBandOptionProcessRealTime;
 
     int rate = mp_aframe_get_rate(p->pending);
@@ -100,7 +105,7 @@ static bool init_rubberband(struct mp_filter *f)
     return true;
 }
 
-static void process(struct mp_filter *f)
+static void af_rubberband_process(struct mp_filter *f)
 {
     struct priv *p = f->priv;
 
@@ -228,7 +233,7 @@ error:
     mp_filter_internal_mark_failed(f);
 }
 
-static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
+static bool af_rubberband_command(struct mp_filter *f, struct mp_filter_command *cmd)
 {
     struct priv *p = f->priv;
 
@@ -258,7 +263,7 @@ static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
     return false;
 }
 
-static void reset(struct mp_filter *f)
+static void af_rubberband_reset(struct mp_filter *f)
 {
     struct priv *p = f->priv;
 
@@ -269,7 +274,7 @@ static void reset(struct mp_filter *f)
     TA_FREEP(&p->pending);
 }
 
-static void destroy(struct mp_filter *f)
+static void af_rubberband_destroy(struct mp_filter *f)
 {
     struct priv *p = f->priv;
 
@@ -281,10 +286,10 @@ static void destroy(struct mp_filter *f)
 static const struct mp_filter_info af_rubberband_filter = {
     .name = "rubberband",
     .priv_size = sizeof(struct priv),
-    .process = process,
-    .command = command,
-    .reset = reset,
-    .destroy = destroy,
+    .process = af_rubberband_process,
+    .command = af_rubberband_command,
+    .reset = af_rubberband_reset,
+    .destroy = af_rubberband_destroy,
 };
 
 static struct mp_filter *af_rubberband_create(struct mp_filter *parent,
@@ -331,6 +336,9 @@ const struct mp_user_filter_entry af_rubberband = {
             .transients = RubberBandOptionTransientsMixed,
             .formant = RubberBandOptionFormantPreserved,
             .channels = RubberBandOptionChannelsTogether,
+#if HAVE_RUBBERBAND_3
+            .engine = RubberBandOptionEngineFiner,
+#endif
         },
         .options = (const struct m_option[]) {
             {"transients", OPT_CHOICE(transients,
@@ -361,6 +369,11 @@ const struct mp_user_filter_entry af_rubberband = {
             {"channels", OPT_CHOICE(channels,
                 {"apart", RubberBandOptionChannelsApart},
                 {"together", RubberBandOptionChannelsTogether})},
+#if HAVE_RUBBERBAND_3
+            {"engine", OPT_CHOICE(engine,
+                {"finer", RubberBandOptionEngineFiner},
+                {"faster", RubberBandOptionEngineFaster})},
+#endif
             {"pitch-scale", OPT_DOUBLE(scale), M_RANGE(0.01, 100)},
             {0}
         },
diff --git a/audio/filter/af_scaletempo.c b/audio/filter/af_scaletempo.c
index 8675c9a50d..e7b101b260 100644
--- a/audio/filter/af_scaletempo.c
+++ b/audio/filter/af_scaletempo.c
@@ -48,7 +48,7 @@ struct f_opts {
     float scale_nominal;
     float ms_stride;
     float ms_search;
-    float percent_overlap;
+    float factor_overlap;
 #define SCALE_TEMPO 1
 #define SCALE_PITCH 2
     int speed_opt;
@@ -229,7 +229,7 @@ static void output_overlap_s16(struct priv *s, void *buf_out,
     }
 }
 
-static void process(struct mp_filter *f)
+static void af_scaletempo_process(struct mp_filter *f)
 {
     struct priv *s = f->priv;
 
@@ -400,7 +400,7 @@ static bool reinit(struct mp_filter *f)
 
     update_speed(s, s->speed);
 
-    int frames_overlap = s->frames_stride * s->opts->percent_overlap;
+    int frames_overlap = s->frames_stride * s->opts->factor_overlap;
     if (frames_overlap <= 0) {
         s->bytes_standing   = s->bytes_stride;
         s->samples_standing = s->bytes_standing / bps;
@@ -511,7 +511,7 @@ static bool reinit(struct mp_filter *f)
     return true;
 }
 
-static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
+static bool af_scaletempo_command(struct mp_filter *f, struct mp_filter_command *cmd)
 {
     struct priv *s = f->priv;
 
@@ -530,7 +530,7 @@ static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
     return false;
 }
 
-static void reset(struct mp_filter *f)
+static void af_scaletempo_reset(struct mp_filter *f)
 {
     struct priv *s = f->priv;
 
@@ -543,7 +543,7 @@ static void reset(struct mp_filter *f)
     TA_FREEP(&s->in);
 }
 
-static void destroy(struct mp_filter *f)
+static void af_scaletempo_destroy(struct mp_filter *f)
 {
     struct priv *s = f->priv;
     free(s->buf_queue);
@@ -558,10 +558,10 @@ static void destroy(struct mp_filter *f)
 static const struct mp_filter_info af_scaletempo_filter = {
     .name = "scaletempo",
     .priv_size = sizeof(struct priv),
-    .process = process,
-    .command = command,
-    .reset = reset,
-    .destroy = destroy,
+    .process = af_scaletempo_process,
+    .command = af_scaletempo_command,
+    .reset = af_scaletempo_reset,
+    .destroy = af_scaletempo_destroy,
 };
 
 static struct mp_filter *af_scaletempo_create(struct mp_filter *parent,
@@ -604,7 +604,7 @@ const struct mp_user_filter_entry af_scaletempo = {
         .priv_size = sizeof(OPT_BASE_STRUCT),
         .priv_defaults = &(const OPT_BASE_STRUCT) {
             .ms_stride = 60,
-            .percent_overlap = .20,
+            .factor_overlap = .20,
             .ms_search = 14,
             .speed_opt = SCALE_TEMPO,
             .scale_nominal = 1.0,
@@ -612,7 +612,7 @@ const struct mp_user_filter_entry af_scaletempo = {
         .options = (const struct m_option[]) {
             {"scale", OPT_FLOAT(scale_nominal), M_RANGE(0.01, DBL_MAX)},
             {"stride", OPT_FLOAT(ms_stride), M_RANGE(0.01, DBL_MAX)},
-            {"overlap", OPT_FLOAT(percent_overlap), M_RANGE(0, 1)},
+            {"overlap", OPT_FLOAT(factor_overlap), M_RANGE(0, 1)},
             {"search", OPT_FLOAT(ms_search), M_RANGE(0, DBL_MAX)},
             {"speed", OPT_CHOICE(speed_opt,
                 {"pitch", SCALE_PITCH},
diff --git a/audio/filter/af_scaletempo2.c b/audio/filter/af_scaletempo2.c
index ceac919d5d..749e219454 100644
--- a/audio/filter/af_scaletempo2.c
+++ b/audio/filter/af_scaletempo2.c
@@ -8,21 +8,20 @@
 #include "options/m_option.h"
 
 struct priv {
-    struct mp_scaletempo2 data;
+    struct mp_scaletempo2 *data;
     struct mp_pin *in_pin;
     struct mp_aframe *cur_format;
     struct mp_aframe_pool *out_pool;
     bool sent_final;
     struct mp_aframe *pending;
     bool initialized;
-    double frame_delay;
     float speed;
 };
 
 static bool init_scaletempo2(struct mp_filter *f);
-static void reset(struct mp_filter *f);
+static void af_scaletempo2_reset(struct mp_filter *f);
 
-static void process(struct mp_filter *f)
+static void af_scaletempo2_process(struct mp_filter *f)
 {
     struct priv *p = f->priv;
 
@@ -30,7 +29,7 @@ static void process(struct mp_filter *f)
         return;
 
     while (!p->initialized || !p->pending ||
-           !mp_scaletempo2_frames_available(&p->data))
+           !mp_scaletempo2_frames_available(p->data, p->speed))
     {
         bool eof = false;
         if (!p->pending || !mp_aframe_get_size(p->pending)) {
@@ -65,14 +64,16 @@ static void process(struct mp_filter *f)
         if (p->pending && !format_change && !p->sent_final) {
             int frame_size = mp_aframe_get_size(p->pending);
             uint8_t **planes = mp_aframe_get_data_ro(p->pending);
-            int read = mp_scaletempo2_fill_input_buffer(&p->data,
-                planes, frame_size, final);
-            p->frame_delay += read;
+            int read = mp_scaletempo2_fill_input_buffer(p->data,
+                planes, frame_size, p->speed);
             mp_aframe_skip_samples(p->pending, read);
         }
-        p->sent_final |= final;
+        if (final && p->pending && !p->sent_final) {
+            mp_scaletempo2_set_final(p->data);
+            p->sent_final = true;
+        }
 
-        if (mp_scaletempo2_frames_available(&p->data)) {
+        if (mp_scaletempo2_frames_available(p->data, p->speed)) {
             if (eof) {
                 mp_pin_out_repeat_eof(p->in_pin); // drain more next time
             }
@@ -82,18 +83,15 @@ static void process(struct mp_filter *f)
             if (eof) {
                 mp_pin_in_write(f->ppins[1], MP_EOF_FRAME);
                 return;
-            } else if (format_change) {
-                // go on with proper reinit on the next iteration
-                p->initialized = false;
-                p->sent_final = false;
             }
+            // for format change go on with proper reinit on the next iteration
         }
     }
 
     assert(p->pending);
-    if (mp_scaletempo2_frames_available(&p->data)) {
+    if (mp_scaletempo2_frames_available(p->data, p->speed)) {
         struct mp_aframe *out = mp_aframe_new_ref(p->cur_format);
-        int out_samples = p->data.ola_hop_size;
+        int out_samples = p->data->ola_hop_size;
         if (mp_aframe_pool_allocate(p->out_pool, out, out_samples) < 0) {
             talloc_free(out);
             goto error;
@@ -103,17 +101,30 @@ static void process(struct mp_filter *f)
 
         uint8_t **planes = mp_aframe_get_data_rw(out);
         assert(planes);
-        assert(mp_aframe_get_planes(out) == p->data.channels);
+        assert(mp_aframe_get_planes(out) == p->data->channels);
 
-        out_samples = mp_scaletempo2_fill_buffer(&p->data,
+        out_samples = mp_scaletempo2_fill_buffer(p->data,
             (float**)planes, out_samples, p->speed);
 
         double pts = mp_aframe_get_pts(p->pending);
-        p->frame_delay -= out_samples * p->speed;
-
         if (pts != MP_NOPTS_VALUE) {
-            double delay = p->frame_delay / mp_aframe_get_effective_rate(out);
-            mp_aframe_set_pts(out, pts - delay);
+            double frame_delay = mp_scaletempo2_get_latency(p->data, p->speed)
+                + out_samples * p->speed;
+            mp_aframe_set_pts(out, pts - frame_delay / mp_aframe_get_effective_rate(out));
+
+            if (p->sent_final) {
+                double remain_pts = pts - mp_aframe_get_pts(out);
+                double rate = mp_aframe_get_effective_rate(out) / p->speed;
+                int max_samples = MPMAX(0, (int) (remain_pts * rate));
+                // truncate final packet to expected length
+                if (out_samples >= max_samples) {
+                    out_samples = max_samples;
+
+                    // reset the filter to ensure it stops generating audio
+                    // and mp_scaletempo2_frames_available returns false
+                    mp_scaletempo2_reset(p->data);
+                }
+            }
         }
 
         mp_aframe_set_size(out, out_samples);
@@ -137,16 +148,15 @@ static bool init_scaletempo2(struct mp_filter *f)
     mp_aframe_reset(p->cur_format);
     p->initialized = true;
     p->sent_final = false;
-    p->frame_delay = 0;
     mp_aframe_config_copy(p->cur_format, p->pending);
 
-    mp_scaletempo2_init(&p->data, mp_aframe_get_channels(p->pending),
+    mp_scaletempo2_init(p->data, mp_aframe_get_channels(p->pending),
         mp_aframe_get_rate(p->pending));
 
     return true;
 }
 
-static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
+static bool af_scaletempo2_command(struct mp_filter *f, struct mp_filter_command *cmd)
 {
     struct priv *p = f->priv;
 
@@ -159,29 +169,28 @@ static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
     return false;
 }
 
-static void reset(struct mp_filter *f)
+static void af_scaletempo2_reset(struct mp_filter *f)
 {
     struct priv *p = f->priv;
-    mp_scaletempo2_reset(&p->data);
-    p->frame_delay = 0;
+    mp_scaletempo2_reset(p->data);
     p->initialized = false;
     TA_FREEP(&p->pending);
 }
 
-static void destroy(struct mp_filter *f)
+static void af_scaletempo2_destroy(struct mp_filter *f)
 {
     struct priv *p = f->priv;
-    mp_scaletempo2_destroy(&p->data);
-    talloc_free(p->pending);
+    TA_FREEP(&p->data);
+    TA_FREEP(&p->pending);
 }
 
 static const struct mp_filter_info af_scaletempo2_filter = {
     .name = "scaletempo2",
     .priv_size = sizeof(struct priv),
-    .process = process,
-    .command = command,
-    .reset = reset,
-    .destroy = destroy,
+    .process = af_scaletempo2_process,
+    .command = af_scaletempo2_command,
+    .reset = af_scaletempo2_reset,
+    .destroy = af_scaletempo2_destroy,
 };
 
 static struct mp_filter *af_scaletempo2_create(
@@ -197,7 +206,8 @@ static struct mp_filter *af_scaletempo2_create(
     mp_filter_add_pin(f, MP_PIN_OUT, "out");
 
     struct priv *p = f->priv;
-    p->data.opts = talloc_steal(p, options);
+    p->data = talloc_zero(p, struct mp_scaletempo2);
+    p->data->opts = talloc_steal(p, options);
     p->speed = 1.0;
     p->cur_format = talloc_steal(p, mp_aframe_create());
     p->out_pool = mp_aframe_pool_create(p);
@@ -225,12 +235,12 @@ const struct mp_user_filter_entry af_scaletempo2 = {
         .priv_size = sizeof(OPT_BASE_STRUCT),
         .priv_defaults = &(const OPT_BASE_STRUCT) {
             .min_playback_rate = 0.25,
-            .max_playback_rate = 4.0,
-            .ola_window_size_ms = 20,
-            .wsola_search_interval_ms = 30,
+            .max_playback_rate = 8.0,
+            .ola_window_size_ms = 12,
+            .wsola_search_interval_ms = 40,
         },
         .options = (const struct m_option[]) {
-            {"search-interval", 
+            {"search-interval",
                 OPT_FLOAT(wsola_search_interval_ms), M_RANGE(1, 1000)},
             {"window-size",
                 OPT_FLOAT(ola_window_size_ms), M_RANGE(1, 1000)},
diff --git a/audio/filter/af_scaletempo2_internals.c b/audio/filter/af_scaletempo2_internals.c
index e348cb37a2..7f3a99638f 100644
--- a/audio/filter/af_scaletempo2_internals.c
+++ b/audio/filter/af_scaletempo2_internals.c
@@ -4,6 +4,8 @@
 #include "audio/chmap.h"
 #include "audio/filter/af_scaletempo2_internals.h"
 
+#include "config.h"
+
 // Algorithm overview (from chromium):
 // Waveform Similarity Overlap-and-add (WSOLA).
 //
@@ -39,19 +41,15 @@ static bool in_interval(int n, struct interval q)
     return n >= q.lo && n <= q.hi;
 }
 
-static float **realloc_2d(float **p, int x, int y)
+static void alloc_sample_buffer(struct mp_scaletempo2 *p, float ***ptr, size_t size)
 {
-    float **array = realloc(p, sizeof(float*) * x + sizeof(float) * x * y);
-    float* data = (float*) (array + x);
-    for (int i = 0; i < x; ++i) {
-        array[i] = data + i * y;
-    }
-    return array;
-}
+    talloc_free(*ptr);
 
-static void zero_2d(float **a, int x, int y)
-{
-    memset(a + x, 0, sizeof(float) * x * y);
+    float **buff = talloc_array(p, float*, p->channels);
+    for (int i = 0; i < p->channels; ++i) {
+        buff[i] = talloc_array(buff, float, size);
+    }
+    *ptr = buff;
 }
 
 static void zero_2d_partial(float **a, int x, int y)
@@ -91,19 +89,23 @@ static void multi_channel_moving_block_energies(
 }
 
 static float multi_channel_similarity_measure(
-    const float* dot_prod_a_b,
-    const float* energy_a, const float* energy_b,
+    const float* dot_prod,
+    const float* energy_target, const float* energy_candidate,
     int channels)
 {
     const float epsilon = 1e-12f;
     float similarity_measure = 0.0f;
     for (int n = 0; n < channels; ++n) {
-        similarity_measure += dot_prod_a_b[n]
-            / sqrtf(energy_a[n] * energy_b[n] + epsilon);
+        similarity_measure += dot_prod[n] * energy_target[n]
+            / sqrtf(energy_target[n] * energy_candidate[n] + epsilon);
     }
     return similarity_measure;
 }
 
+#if HAVE_VECTOR
+
+typedef float v8sf __attribute__ ((vector_size (32), aligned (1)));
+
 // Dot-product of channels of two AudioBus. For each AudioBus an offset is
 // given. |dot_product[k]| is the dot-product of channel |k|. The caller should
 // allocate sufficient space for |dot_product|.
@@ -116,16 +118,79 @@ static void multi_channel_dot_product(
     assert(frame_offset_a >= 0);
     assert(frame_offset_b >= 0);
 
-    memset(dot_product, 0, sizeof(*dot_product) * channels);
     for (int k = 0; k < channels; ++k) {
         const float* ch_a = a[k] + frame_offset_a;
         const float* ch_b = b[k] + frame_offset_b;
-        for (int n = 0; n < num_frames; ++n) {
-            dot_product[k] += *ch_a++ * *ch_b++;
+        float sum = 0.0;
+        if (num_frames < 32)
+            goto rest;
+
+        const v8sf *va = (const v8sf *) ch_a;
+        const v8sf *vb = (const v8sf *) ch_b;
+        v8sf vsum[4] = {
+            // Initialize to product of first 32 floats
+            va[0] * vb[0],
+            va[1] * vb[1],
+            va[2] * vb[2],
+            va[3] * vb[3],
+        };
+        va += 4;
+        vb += 4;
+
+        // Process `va` and `vb` across four vertical stripes
+        for (int n = 1; n < num_frames / 32; n++) {
+            vsum[0] += va[0] * vb[0];
+            vsum[1] += va[1] * vb[1];
+            vsum[2] += va[2] * vb[2];
+            vsum[3] += va[3] * vb[3];
+            va += 4;
+            vb += 4;
         }
+
+        // Vertical sum across `vsum` entries
+        vsum[0] += vsum[1];
+        vsum[2] += vsum[3];
+        vsum[0] += vsum[2];
+
+        // Horizontal sum across `vsum[0]`, could probably be done better but
+        // this section is not super performance critical
+        float *vf = (float *) &vsum[0];
+        sum = vf[0] + vf[1] + vf[2] + vf[3] + vf[4] + vf[5] + vf[6] + vf[7];
+        ch_a = (const float *) va;
+        ch_b = (const float *) vb;
+
+rest:
+        // Process the remainder
+        for (int n = 0; n < num_frames % 32; n++)
+            sum += *ch_a++ * *ch_b++;
+
+        dot_product[k] = sum;
+    }
+}
+
+#else // !HAVE_VECTOR
+
+static void multi_channel_dot_product