summaryrefslogtreecommitdiffstats
path: root/audio/filter
diff options
context:
space:
mode:
Diffstat (limited to 'audio/filter')
-rw-r--r--audio/filter/af_drop.c114
-rw-r--r--audio/filter/af_format.c10
-rw-r--r--audio/filter/af_lavcac3enc.c145
-rw-r--r--audio/filter/af_rubberband.c17
-rw-r--r--audio/filter/af_scaletempo.c19
-rw-r--r--audio/filter/af_scaletempo2.c254
-rw-r--r--audio/filter/af_scaletempo2_internals.c874
-rw-r--r--audio/filter/af_scaletempo2_internals.h134
8 files changed, 1508 insertions, 59 deletions
diff --git a/audio/filter/af_drop.c b/audio/filter/af_drop.c
new file mode 100644
index 0000000000..724c482720
--- /dev/null
+++ b/audio/filter/af_drop.c
@@ -0,0 +1,114 @@
+#include "audio/aframe.h"
+#include "audio/format.h"
+#include "common/common.h"
+#include "filters/f_autoconvert.h"
+#include "filters/filter_internal.h"
+#include "filters/user_filters.h"
+
+struct priv {
+ double speed;
+ double diff; // amount of too many additional samples in normal speed
+ struct mp_aframe *last; // for repeating
+};
+
+static void process(struct mp_filter *f)
+{
+ struct priv *p = f->priv;
+
+ if (!mp_pin_in_needs_data(f->ppins[1]))
+ return;
+
+ struct mp_frame frame = {0};
+
+ double last_dur = p->last ? mp_aframe_duration(p->last) : 0;
+ if (p->last && p->diff < 0 && -p->diff > last_dur / 2) {
+ MP_VERBOSE(f, "repeat\n");
+ frame = MAKE_FRAME(MP_FRAME_AUDIO, p->last);
+ p->last = NULL;
+ } else {
+ frame = mp_pin_out_read(f->ppins[0]);
+
+ if (frame.type == MP_FRAME_AUDIO) {
+ last_dur = mp_aframe_duration(frame.data);
+ p->diff -= last_dur;
+ if (p->diff > last_dur / 2) {
+ MP_VERBOSE(f, "drop\n");
+ mp_frame_unref(&frame);
+ mp_filter_internal_mark_progress(f);
+ }
+ }
+ }
+
+ if (frame.type == MP_FRAME_AUDIO) {
+ struct mp_aframe *fr = frame.data;
+ talloc_free(p->last);
+ p->last = mp_aframe_new_ref(fr);
+ mp_aframe_mul_speed(fr, p->speed);
+ p->diff += mp_aframe_duration(fr);
+ mp_aframe_set_pts(p->last, mp_aframe_end_pts(fr));
+ } else if (frame.type == MP_FRAME_EOF) {
+ TA_FREEP(&p->last);
+ }
+ mp_pin_in_write(f->ppins[1], frame);
+}
+
+static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
+{
+ struct priv *p = f->priv;
+
+ switch (cmd->type) {
+ case MP_FILTER_COMMAND_SET_SPEED:
+ p->speed = cmd->speed;
+ return true;
+ }
+
+ return false;
+}
+
+static void reset(struct mp_filter *f)
+{
+ struct priv *p = f->priv;
+
+ TA_FREEP(&p->last);
+ p->diff = 0;
+}
+
+static void destroy(struct mp_filter *f)
+{
+ reset(f);
+}
+
+static const struct mp_filter_info af_drop_filter = {
+ .name = "drop",
+ .priv_size = sizeof(struct priv),
+ .process = process,
+ .command = command,
+ .reset = reset,
+ .destroy = destroy,
+};
+
+static struct mp_filter *af_drop_create(struct mp_filter *parent, void *options)
+{
+ struct mp_filter *f = mp_filter_create(parent, &af_drop_filter);
+ if (!f) {
+ talloc_free(options);
+ return NULL;
+ }
+
+ mp_filter_add_pin(f, MP_PIN_IN, "in");
+ mp_filter_add_pin(f, MP_PIN_OUT, "out");
+
+ struct priv *p = f->priv;
+ p->speed = 1.0;
+
+ return f;
+}
+
+const struct mp_user_filter_entry af_drop = {
+ .desc = {
+ .description = "Change audio speed by dropping/repeating frames",
+ .name = "drop",
+ .priv_size = sizeof(struct priv),
+ },
+ .create = af_drop_create,
+};
diff --git a/audio/filter/af_format.c b/audio/filter/af_format.c
index 79d78d1d96..2d1c1cc97d 100644
--- a/audio/filter/af_format.c
+++ b/audio/filter/af_format.c
@@ -30,7 +30,7 @@ struct f_opts {
int out_srate;
struct m_channels out_channels;
- int fail;
+ bool fail;
};
struct priv {
@@ -130,10 +130,12 @@ const struct mp_user_filter_entry af_format = {
.options = (const struct m_option[]) {
{"format", OPT_AUDIOFORMAT(in_format)},
{"srate", OPT_INT(in_srate), M_RANGE(1000, 8*48000)},
- {"channels", OPT_CHANNELS(in_channels), .min = 1},
+ {"channels", OPT_CHANNELS(in_channels),
+ .flags = M_OPT_CHANNELS_LIMITED},
{"out-srate", OPT_INT(out_srate), M_RANGE(1000, 8*48000)},
- {"out-channels", OPT_CHANNELS(out_channels), .min = 1},
- {"fail", OPT_FLAG(fail)},
+ {"out-channels", OPT_CHANNELS(out_channels),
+ .flags = M_OPT_CHANNELS_LIMITED},
+ {"fail", OPT_BOOL(fail)},
{0}
},
},
diff --git a/audio/filter/af_lavcac3enc.c b/audio/filter/af_lavcac3enc.c
index 38f93a1c08..ec3330f17b 100644
--- a/audio/filter/af_lavcac3enc.c
+++ b/audio/filter/af_lavcac3enc.c
@@ -31,7 +31,10 @@
#include <libavutil/bswap.h>
#include <libavutil/mem.h>
+#include "config.h"
+
#include "audio/aframe.h"
+#include "audio/chmap_avchannel.h"
#include "audio/chmap_sel.h"
#include "audio/fmt-conversion.h"
#include "audio/format.h"
@@ -47,13 +50,13 @@
#define AC3_MAX_CHANNELS 6
#define AC3_MAX_CODED_FRAME_SIZE 3840
#define AC3_FRAME_SIZE (6 * 256)
-const uint16_t ac3_bitrate_tab[19] = {
+static const uint16_t ac3_bitrate_tab[19] = {
32, 40, 48, 56, 64, 80, 96, 112, 128,
160, 192, 224, 256, 320, 384, 448, 512, 576, 640
};
struct f_opts {
- int add_iec61937_header;
+ bool add_iec61937_header;
int bit_rate;
int min_channel_num;
char *encoder;
@@ -68,8 +71,9 @@ struct priv {
struct mp_aframe *in_frame;
struct mp_aframe_pool *out_pool;
- struct AVCodec *lavc_acodec;
+ const struct AVCodec *lavc_acodec;
struct AVCodecContext *lavc_actx;
+ AVPacket *lavc_pkt;
int bit_rate;
int out_samples; // upper bound on encoded output per AC3 frame
};
@@ -99,12 +103,25 @@ static bool reinit(struct mp_filter *f)
if (!bit_rate && chmap.num < AC3_MAX_CHANNELS + 1)
bit_rate = default_bit_rate[chmap.num];
- avcodec_close(s->lavc_actx);
+ avcodec_free_context(&s->lavc_actx);
+ s->lavc_actx = avcodec_alloc_context3(s->lavc_acodec);
+ if (!s->lavc_actx) {
+ MP_ERR(f, "Audio LAVC, couldn't reallocate context!\n");
+ return false;
+ }
+
+ if (mp_set_avopts(f->log, s->lavc_actx, s->opts->avopts) < 0)
+ return false;
// Put sample parameters
s->lavc_actx->sample_fmt = af_to_avformat(format);
+
+#if !HAVE_AV_CHANNEL_LAYOUT
s->lavc_actx->channels = chmap.num;
s->lavc_actx->channel_layout = mp_chmap_to_lavc(&chmap);
+#else
+ mp_chmap_to_av_layout(&s->lavc_actx->ch_layout, &chmap);
+#endif
s->lavc_actx->sample_rate = rate;
s->lavc_actx->bit_rate = bit_rate;
@@ -134,6 +151,7 @@ static void destroy(struct mp_filter *f)
struct priv *s = f->priv;
reset(f);
+ av_packet_free(&s->lavc_pkt);
avcodec_free_context(&s->lavc_actx);
}
@@ -152,57 +170,57 @@ static void process(struct mp_filter *f)
bool err = true;
struct mp_aframe *out = NULL;
- AVPacket pkt = {0};
- av_init_packet(&pkt);
+ AVPacket *pkt = s->lavc_pkt;
// Send input as long as it wants.
while (1) {
if (avcodec_is_open(s->lavc_actx)) {
- int lavc_ret = avcodec_receive_packet(s->lavc_actx, &pkt);
+ int lavc_ret = avcodec_receive_packet(s->lavc_actx, pkt);
if (lavc_ret >= 0)
break;
if (lavc_ret < 0 && lavc_ret != AVERROR(EAGAIN)) {
MP_FATAL(f, "Encode failed (receive).\n");
- goto done;
+ goto error;
}
}
AVFrame *frame = NULL;
struct mp_frame input = mp_pin_out_read(s->in_pin);
// The following code assumes no sample data buffering in the encoder.
- if (input.type == MP_FRAME_EOF) {
+ switch (input.type) {
+ case MP_FRAME_NONE:
+ goto done; // no data yet
+ case MP_FRAME_EOF:
mp_pin_in_write(f->ppins[1], input);
- return;
- } else if (input.type == MP_FRAME_AUDIO) {
+ goto done;
+ case MP_FRAME_AUDIO:
TA_FREEP(&s->in_frame);
s->in_frame = input.data;
- frame = mp_frame_to_av(input, NULL);
- if (!frame)
- goto done;
if (mp_aframe_get_channels(s->in_frame) < s->opts->min_channel_num) {
// Just pass it through.
s->in_frame = NULL;
mp_pin_in_write(f->ppins[1], input);
- return;
+ goto done;
}
if (!mp_aframe_config_equals(s->in_frame, s->cur_format)) {
if (!reinit(f))
- goto done;
+ goto error;
}
- } else if (input.type) {
- goto done;
- } else {
- return; // no data yet
+ frame = mp_frame_to_av(input, NULL);
+ if (!frame)
+ goto error;
+ break;
+ default: goto error; // unexpected packet type
}
int lavc_ret = avcodec_send_frame(s->lavc_actx, frame);
av_frame_free(&frame);
if (lavc_ret < 0 && lavc_ret != AVERROR(EAGAIN)) {
MP_FATAL(f, "Encode failed (send).\n");
- goto done;
+ goto error;
}
}
if (!s->in_frame)
- goto done;
+ goto error;
out = mp_aframe_create();
mp_aframe_set_format(out, AF_FORMAT_S_AC3);
@@ -210,18 +228,18 @@ static void process(struct mp_filter *f)
mp_aframe_set_rate(out, 48000);
if (mp_aframe_pool_allocate(s->out_pool, out, s->out_samples) < 0)
- goto done;
+ goto error;
int sstride = mp_aframe_get_sstride(out);
mp_aframe_copy_attributes(out, s->in_frame);
- int frame_size = pkt.size;
+ int frame_size = pkt->size;
int header_len = 0;
char hdr[8];
- if (s->opts->add_iec61937_header && pkt.size > 5) {
- int bsmod = pkt.data[5] & 0x7;
+ if (s->opts->add_iec61937_header && pkt->size > 5) {
+ int bsmod = pkt->data[5] & 0x7;
int len = frame_size;
frame_size = AC3_FRAME_SIZE * 2 * 2;
@@ -239,20 +257,22 @@ static void process(struct mp_filter *f)
uint8_t **planes = mp_aframe_get_data_rw(out);
if (!planes)
- goto done;
+ goto error;
char *buf = planes[0];
memcpy(buf, hdr, header_len);
- memcpy(buf + header_len, pkt.data, pkt.size);
- memset(buf + header_len + pkt.size, 0,
- frame_size - (header_len + pkt.size));
- swap_16((uint16_t *)(buf + header_len), pkt.size / 2);
+ memcpy(buf + header_len, pkt->data, pkt->size);
+ memset(buf + header_len + pkt->size, 0,
+ frame_size - (header_len + pkt->size));
+ swap_16((uint16_t *)(buf + header_len), pkt->size / 2);
mp_aframe_set_size(out, frame_size / sstride);
mp_pin_in_write(f->ppins[1], MAKE_FRAME(MP_FRAME_AUDIO, out));
out = NULL;
- err = 0;
done:
- av_packet_unref(&pkt);
+ err = false;
+ // fall through
+error:
+ av_packet_unref(pkt);
talloc_free(out);
if (err)
mp_filter_internal_mark_failed(f);
@@ -266,6 +286,38 @@ static const struct mp_filter_info af_lavcac3enc_filter = {
.destroy = destroy,
};
+static void add_chmaps_to_autoconv(struct mp_filter *f,
+ struct mp_autoconvert *conv,
+ const struct AVCodec *codec)
+{
+#if !HAVE_AV_CHANNEL_LAYOUT
+ const uint64_t *lch = codec->channel_layouts;
+ for (int n = 0; lch && lch[n]; n++) {
+ struct mp_chmap chmap = {0};
+ mp_chmap_from_lavc(&chmap, lch[n]);
+ if (mp_chmap_is_valid(&chmap))
+ mp_autoconvert_add_chmap(conv, &chmap);
+ }
+#else
+ const AVChannelLayout *lch = codec->ch_layouts;
+ for (int n = 0; lch && lch[n].nb_channels; n++) {
+ struct mp_chmap chmap = {0};
+
+ if (!mp_chmap_from_av_layout(&chmap, &lch[n])) {
+ char layout[128] = {0};
+ MP_VERBOSE(f, "Skipping unsupported channel layout: %s\n",
+ av_channel_layout_describe(&lch[n],
+ layout, 128) < 0 ?
+ "undefined" : layout);
+ continue;
+ }
+
+ if (mp_chmap_is_valid(&chmap))
+ mp_autoconvert_add_chmap(conv, &chmap);
+ }
+#endif
+}
+
static struct mp_filter *af_lavcac3enc_create(struct mp_filter *parent,
void *options)
{
@@ -295,14 +347,23 @@ static struct mp_filter *af_lavcac3enc_create(struct mp_filter *parent,
goto error;
}
+ s->lavc_pkt = av_packet_alloc();
+ if (!s->lavc_pkt)
+ goto error;
+
if (mp_set_avopts(f->log, s->lavc_actx, s->opts->avopts) < 0)
goto error;
- // For this one, we require the decoder to expert lists of all supported
+ // For this one, we require the decoder to export lists of all supported
// parameters. (Not all decoders do that, but the ones we're interested
// in do.)
if (!s->lavc_acodec->sample_fmts ||
- !s->lavc_acodec->channel_layouts)
+#if !HAVE_AV_CHANNEL_LAYOUT
+ !s->lavc_acodec->channel_layouts
+#else
+ !s->lavc_acodec->ch_layouts
+#endif
+ )
{
MP_ERR(f, "Audio encoder doesn't list supported parameters.\n");
goto error;
@@ -334,13 +395,7 @@ static struct mp_filter *af_lavcac3enc_create(struct mp_filter *parent,
mp_autoconvert_add_afmt(conv, mpfmt);
}
- const uint64_t *lch = s->lavc_acodec->channel_layouts;
- for (int n = 0; lch && lch[n]; n++) {
- struct mp_chmap chmap = {0};
- mp_chmap_from_lavc(&chmap, lch[n]);
- if (mp_chmap_is_valid(&chmap))
- mp_autoconvert_add_chmap(conv, &chmap);
- }
+ add_chmaps_to_autoconv(f, conv, s->lavc_acodec);
// At least currently, the AC3 encoder doesn't export sample rates.
mp_autoconvert_add_srate(conv, 48000);
@@ -357,6 +412,8 @@ static struct mp_filter *af_lavcac3enc_create(struct mp_filter *parent,
return f;
error:
+ av_packet_free(&s->lavc_pkt);
+ avcodec_free_context(&s->lavc_actx);
talloc_free(f);
return NULL;
}
@@ -369,13 +426,13 @@ const struct mp_user_filter_entry af_lavcac3enc = {
.name = "lavcac3enc",
.priv_size = sizeof(OPT_BASE_STRUCT),
.priv_defaults = &(const OPT_BASE_STRUCT) {
- .add_iec61937_header = 1,
+ .add_iec61937_header = true,
.bit_rate = 640,
.min_channel_num = 3,
.encoder = "ac3",
},
.options = (const struct m_option[]) {
- {"tospdif", OPT_FLAG(add_iec61937_header)},
+ {"tospdif", OPT_BOOL(add_iec61937_header)},
{"bitrate", OPT_CHOICE(bit_rate,
{"auto", 0}, {"default", 0}), M_RANGE(32, 640)},
{"minch", OPT_INT(min_channel_num), M_RANGE(2, 6)},
diff --git a/audio/filter/af_rubberband.c b/audio/filter/af_rubberband.c
index 4df2001c49..48e5cc1e86 100644
--- a/audio/filter/af_rubberband.c
+++ b/audio/filter/af_rubberband.c
@@ -20,6 +20,8 @@
#include <rubberband/rubberband-c.h>
+#include "config.h"
+
#include "audio/aframe.h"
#include "audio/format.h"
#include "common/common.h"
@@ -31,7 +33,7 @@
// command line options
struct f_opts {
int transients, detector, phase, window,
- smoothing, formant, pitch, channels;
+ smoothing, formant, pitch, channels, engine;
double scale;
};
@@ -78,7 +80,10 @@ static bool init_rubberband(struct mp_filter *f)
int opts = p->opts->transients | p->opts->detector | p->opts->phase |
p->opts->window | p->opts->smoothing | p->opts->formant |
- p->opts->pitch | p-> opts->channels |
+ p->opts->pitch | p->opts->channels |
+#if HAVE_RUBBERBAND_3
+ p->opts->engine |
+#endif
RubberBandOptionProcessRealTime;
int rate = mp_aframe_get_rate(p->pending);
@@ -331,6 +336,9 @@ const struct mp_user_filter_entry af_rubberband = {
.transients = RubberBandOptionTransientsMixed,
.formant = RubberBandOptionFormantPreserved,
.channels = RubberBandOptionChannelsTogether,
+#if HAVE_RUBBERBAND_3
+ .engine = RubberBandOptionEngineFiner,
+#endif
},
.options = (const struct m_option[]) {
{"transients", OPT_CHOICE(transients,
@@ -361,6 +369,11 @@ const struct mp_user_filter_entry af_rubberband = {
{"channels", OPT_CHOICE(channels,
{"apart", RubberBandOptionChannelsApart},
{"together", RubberBandOptionChannelsTogether})},
+#if HAVE_RUBBERBAND_3
+ {"engine", OPT_CHOICE(engine,
+ {"finer", RubberBandOptionEngineFiner},
+ {"faster", RubberBandOptionEngineFaster})},
+#endif
{"pitch-scale", OPT_DOUBLE(scale), M_RANGE(0.01, 100)},
{0}
},
diff --git a/audio/filter/af_scaletempo.c b/audio/filter/af_scaletempo.c
index 911fd8914e..f06478f750 100644
--- a/audio/filter/af_scaletempo.c
+++ b/audio/filter/af_scaletempo.c
@@ -48,7 +48,7 @@ struct f_opts {
float scale_nominal;
float ms_stride;
float ms_search;
- float percent_overlap;
+ float factor_overlap;
#define SCALE_TEMPO 1
#define SCALE_PITCH 2
int speed_opt;
@@ -187,10 +187,10 @@ static int best_overlap_offset_s16(struct priv *s)
ps += s->samples_overlap - s->num_channels;
long i = -(s->samples_overlap - s->num_channels);
do {
- corr += ppc[i + 0] * ps[i + 0];
- corr += ppc[i + 1] * ps[i + 1];
- corr += ppc[i + 2] * ps[i + 2];
- corr += ppc[i + 3] * ps[i + 3];
+ corr += ppc[i + 0] * (int64_t)ps[i + 0];
+ corr += ppc[i + 1] * (int64_t)ps[i + 1];
+ corr += ppc[i + 2] * (int64_t)ps[i + 2];
+ corr += ppc[i + 3] * (int64_t)ps[i + 3];
i += 4;
} while (i < 0);
if (corr > best_corr) {
@@ -400,7 +400,7 @@ static bool reinit(struct mp_filter *f)
update_speed(s, s->speed);
- int frames_overlap = s->frames_stride * s->opts->percent_overlap;
+ int frames_overlap = s->frames_stride * s->opts->factor_overlap;
if (frames_overlap <= 0) {
s->bytes_standing = s->bytes_stride;
s->samples_standing = s->bytes_standing / bps;
@@ -538,7 +538,8 @@ static void reset(struct mp_filter *f)
s->bytes_queued = 0;
s->bytes_to_slide = 0;
s->frames_stride_error = 0;
- memset(s->buf_overlap, 0, s->bytes_overlap);
+ if (s->buf_overlap && s->bytes_overlap)
+ memset(s->buf_overlap, 0, s->bytes_overlap);
TA_FREEP(&s->in);
}
@@ -603,7 +604,7 @@ const struct mp_user_filter_entry af_scaletempo = {
.priv_size = sizeof(OPT_BASE_STRUCT),
.priv_defaults = &(const OPT_BASE_STRUCT) {
.ms_stride = 60,
- .percent_overlap = .20,
+ .factor_overlap = .20,
.ms_search = 14,
.speed_opt = SCALE_TEMPO,
.scale_nominal = 1.0,
@@ -611,7 +612,7 @@ const struct mp_user_filter_entry af_scaletempo = {
.options = (const struct m_option[]) {
{"scale", OPT_FLOAT(scale_nominal), M_RANGE(0.01, DBL_MAX)},
{"stride", OPT_FLOAT(ms_stride), M_RANGE(0.01, DBL_MAX)},
- {"overlap", OPT_FLOAT(percent_overlap), M_RANGE(0, 1)},
+ {"overlap", OPT_FLOAT(factor_overlap), M_RANGE(0, 1)},
{"search", OPT_FLOAT(ms_search), M_RANGE(0, DBL_MAX)},
{"speed", OPT_CHOICE(speed_opt,
{"pitch", SCALE_PITCH},
diff --git a/audio/filter/af_scaletempo2.c b/audio/filter/af_scaletempo2.c
new file mode 100644
index 0000000000..7ad8e3566d
--- /dev/null
+++ b/audio/filter/af_scaletempo2.c
@@ -0,0 +1,254 @@
+#include "audio/aframe.h"
+#include "audio/filter/af_scaletempo2_internals.h"
+#include "audio/format.h"
+#include "common/common.h"
+#include "filters/f_autoconvert.h"
+#include "filters/filter_internal.h"
+#include "filters/user_filters.h"
+#include "options/m_option.h"
+
+struct priv {
+ struct mp_scaletempo2 data;
+ struct mp_pin *in_pin;
+ struct mp_aframe *cur_format;
+ struct mp_aframe_pool *out_pool;
+ bool sent_final;
+ struct mp_aframe *pending;
+ bool initialized;
+ float speed;
+};
+
+static bool init_scaletempo2(struct mp_filter *f);
+static void reset(struct mp_filter *f);
+
+static void process(struct mp_filter *f)
+{
+ struct priv *p = f->priv;
+
+ if (!mp_pin_in_needs_data(f->ppins[1]))
+ return;
+
+ while (!p->initialized || !p->pending ||
+ !mp_scaletempo2_frames_available(&p->data, p->speed))
+ {
+ bool eof = false;
+ if (!p->pending || !mp_aframe_get_size(p->pending)) {
+ struct mp_frame frame = mp_pin_out_read(p->in_pin);
+ if (frame.type == MP_FRAME_AUDIO) {
+ TA_FREEP(&p->pending);
+ p->pending = frame.data;
+ } else if (frame.type == MP_FRAME_EOF) {
+ eof = true;
+ } else if (frame.type) {
+ MP_ERR(f, "unexpected frame type\n");
+ goto error;
+ } else {
+ return; // no new data yet
+ }
+ }
+ assert(p->pending || eof);
+
+ if (!p->initialized) {
+ if (!p->pending) {
+ mp_pin_in_write(f->ppins[1], MP_EOF_FRAME);
+ return;
+ }
+ if (!init_scaletempo2(f))
+ goto error;
+ }
+
+ bool format_change =
+ p->pending && !mp_aframe_config_equals(p->pending, p->cur_format);
+
+ bool final = format_change || eof;
+ if (p->pending && !format_change && !p->sent_final) {
+ int frame_size = mp_aframe_get_size(p->pending);
+ uint8_t **planes = mp_aframe_get_data_ro(p->pending);
+ int read = mp_scaletempo2_fill_input_buffer(&p->data,
+ planes, frame_size, p->speed);
+ mp_aframe_skip_samples(p->pending, read);
+ }
+ if (final && p->pending && !p->sent_final) {
+ mp_scaletempo2_set_final(&p->data);
+ p->sent_final = true;
+ }
+
+ if (mp_scaletempo2_frames_available(&p->data, p->speed)) {
+ if (eof) {
+ mp_pin_out_repeat_eof(p->in_pin); // drain more next time
+ }
+ } else if (final) {
+ p->initialized = false;
+ p->sent_final = false;
+ if (eof) {
+ mp_pin_in_write(f->ppins[1], MP_EOF_FRAME);
+ return;
+ }
+ // for format change go on with proper reinit on the next iteration
+ }
+ }
+
+ assert(p->pending);
+ if (mp_scaletempo2_frames_available(&p->data, p->speed)) {
+ struct mp_aframe *out = mp_aframe_new_ref(p->cur_format);
+ int out_samples = p->data.ola_hop_size;
+ if (mp_aframe_pool_allocate(p->out_pool, out, out_samples) < 0) {
+ talloc_free(out);
+ goto error;
+ }
+
+ mp_aframe_copy_attributes(out, p->pending);
+
+ uint8_t **planes = mp_aframe_get_data_rw(out);
+ assert(planes);
+ assert(mp_aframe_get_planes(out) == p->data.channels);
+
+ out_samples = mp_scaletempo2_fill_buffer(&p->data,
+ (float**)planes, out_samples, p->speed);
+
+ double pts = mp_aframe_get_pts(p->pending);
+ if (pts != MP_NOPTS_VALUE) {
+ double frame_delay = mp_scaletempo2_get_latency(&p->data, p->speed)
+ + out_samples * p->speed;
+ mp_aframe_set_pts(out, pts - frame_delay / mp_aframe_get_effective_rate(out));
+
+ if (p->sent_final) {
+ double remain_pts = pts - mp_aframe_get_pts(out);
+ double rate = mp_aframe_get_effective_rate(out) / p->speed;
+ int max_samples = MPMAX(0, (int) (remain_pts * rate));
+ // truncate final packet to expected length
+ if (out_samples >= max_samples) {
+ out_samples = max_samples;
+
+ // reset the filter to ensure it stops generating audio
+ // and mp_scaletempo2_frames_available returns false
+ mp_scaletempo2_reset(&p->data);
+ }
+ }
+ }
+
+ mp_aframe_set_size(out, out_samples);
+ mp_aframe_mul_speed(out, p->speed);
+ mp_pin_in_write(f->ppins[1], MAKE_FRAME(MP_FRAME_AUDIO, out));
+ }
+
+ return;
+error:
+ mp_filter_internal_mark_failed(f);
+}
+
+static bool init_scaletempo2(struct mp_filter *f)
+{
+ struct priv *p = f->priv;
+ assert(p->pending);
+
+ if (mp_aframe_get_format(p->pending) != AF_FORMAT_FLOATP)
+ return false;
+
+ mp_aframe_reset(p->cur_format);
+ p->initialized = true;
+ p->sent_final = false;
+ mp_aframe_config_copy(p->cur_format, p->pending);
+
+ mp_scaletempo2_init(&p->data, mp_aframe_get_channels(p->pending),
+ mp_aframe_get_rate(p->pending));
+
+ return true;
+}
+
+static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
+{
+ struct priv *p = f->priv;
+
+ switch (cmd->type) {
+ case MP_FILTER_COMMAND_SET_SPEED:
+ p->speed = cmd->speed;
+ return true;
+ }
+
+ return false;
+}
+
+static void reset(struct mp_filter *f)
+{
+ struct priv *p = f->priv;
+ mp_scaletempo2_reset(&p->data);
+ p->initialized = false;
+ TA_FREEP(&p->pending);
+}
+
+static void destroy(struct mp_filter *f)
+{
+ struct priv *p = f->priv;
+ mp_scaletempo2_destroy(&p->data);
+ talloc_free(p->pending);
+}
+
+static const struct mp_filter_info af_scaletempo2_filter = {
+ .name = "scaletempo2",
+ .priv_size = sizeof(struct priv),
+ .process = process,
+ .command = command,
+ .reset = reset,
+ .destroy = destroy,
+};
+
+static struct mp_filter *af_scaletempo2_create(
+ struct mp_filter *parent, void *options)
+{
+ struct mp_filter *f = mp_filter_create(parent, &af_scaletempo2_filter);
+ if (!f) {
+ talloc_free(options);
+ return NULL;
+ }
+
+ mp_filter_add_pin(f, MP_PIN_IN, "in");
+ mp_filter_add_pin(f, MP_PIN_OUT, "out");
+
+ struct priv *p = f->priv;
+ p->data.opts = talloc_steal(p, options);
+ p->speed = 1.0;
+ p->cur_format = talloc_steal(p, mp_aframe_create());
+ p->out_pool = mp_aframe_pool_create(p);
+ p->pending = NULL;
+ p->initialized = false;
+
+ struct mp_autoconvert *conv = mp_autoconvert_create(f);
+ if (!conv)
+ abort();
+
+ mp_autoconvert_add_afmt(conv, AF_FORMAT_FLOATP);
+
+ mp_pin_connect(conv->f->pins[0], f->ppins[0]);
+ p->in_pin = conv->f->pins[1];
+
+ return f;
+}
+
+#define OPT_BASE_STRUCT struct mp_scaletempo2_opts
+const struct mp_user_filter_entry af_scaletempo2 = {
+ .desc = {
+ .description = "Scale audio tempo while maintaining pitch"
+ " (filter ported from chromium)",
+ .name = "scaletempo2",
+ .priv_size = sizeof(OPT_BASE_STRUCT),
+ .priv_defaults = &(const OPT_BASE_STRUCT) {
+ .min_playback_rate = 0.25,
+ .max_playback_rate = 8.0,
+ .ola_window_size_ms = 12,
+ .wsola_search_interval_ms = 40,
+ },
+ .options = (const struct m_option[]) {
+ {"search-interval",
+ OPT_FLOAT(wsola_search_interval_ms), M_RANGE(1, 1000)},
+ {"window-size",
+ OPT_FLOAT(ola_window_size_ms), M_RANGE(1, 1000)},
+ {"min-speed",
+ OPT_FLOAT(min_playback_rate), M_RANGE(0, FLT_MAX)},
+ {"max-speed",
+ OPT_FLOAT(max_playback_rate), M_RANGE(0, FLT_MAX)},
+ {0}
+ }
+ },
+ .create = af_scaletempo2_create,
+};
diff --git a/audio/filter/af_scaletempo2_internals.c b/audio/filter/af_scaletempo2_internals.c
new file mode 100644
index 0000000000..6e5b31aeda
--- /dev/null
+++ b/audio/filter/af_scaletempo2_internals.c
@@ -0,0 +1,874 @@
+#include <float.h>
+#include <math.h>
+
+#include "audio/chmap.h"
+#include "audio/filter/af_scaletempo2_internals.h"
+
+#include "config.h"
+
+// Algorithm overview (from chromium):
+// Waveform Similarity Overlap-and-add (WSOLA).
+//
+// One WSOLA iteration
+//
+// 1) Extract |target_block| as input frames at indices
+// [|target_block_index|, |target_block_index| + |ola_window_size|).
+// Note that |target_block| is the "natural" continuation of the output.
+//
+// 2) Extract |search_block| as input frames at indices
+// [|search_block_index|,
+// |search_block_index| + |num_candidate_blocks| + |ola_window_size|).
+//
+// 3) Find a block within the |search_block| that is most similar
+// to |target_block|. Let |optimal_index| be the index of such block and
+// write it to |optimal_block|.
+//
+// 4) Update:
+// |optimal_block| = |transition_window| * |target_block| +
+// (1 - |transition_window|) * |optimal_block|.
+//
+// 5) Overlap-and-add |optimal_block| to the |wsola_output|.
+//
+// 6) Update:write
+
+struct interval {
+ int lo;
+ int hi;
+};
+
+static bool in_interval(int n, struct interval q)
+{
+ return n >= q.lo && n <= q.hi;
+}
+
+static float **realloc_2d(float **p, int x, int y)
+{
+ float **array = realloc(p, sizeof(float*) * x + sizeof(float) * x * y);
+ float* data = (float*) (array + x);
+ for (int i = 0; i < x; ++i) {
+ array[i] = data + i * y;
+ }
+ return array;
+}
+
+static void zero_2d(float **a, int x, int y)
+{
+ memset(a + x, 0, sizeof(float) * x * y);
+}
+
+static void zero_2d_partial(float **a, int x, int y)
+{
+ for (int i = 0; i < x; ++i) {
+ memset(a[i], 0, sizeof(float) * y);
+ }
+}
+
+// Energies of sliding windows of channels are interleaved.
+// The number windows is |input_frames| - (|frames_per_window| - 1), hence,
+// the method assumes |energy| must be, at least, of size
+// (|input_frames| - (|frames_per_window| - 1)) * |channels|.
+static void multi_channel_moving_block_energies(
+ float **input, int input_frames, int channels,
+ int frames_per_block, float *energy)
+{
+ int num_blocks = input_frames - (frames_per_block - 1);
+
+ for (int k = 0; k < channels; ++k) {
+ const float* input_channel = input[k];
+
+ energy[k] = 0;
+
+ // First block of channel |k|.
+ for (int m = 0; m < frames_per_block; ++m) {
+ energy[k] += input_channel[m] * input_channel[m];
+ }
+
+ const float* slide_out = input_channel;
+ const float* slide_in = input_channel + frames_per_block;
+ for (int n = 1; n < num_blocks; ++n, ++slide_in, ++slide_out) {
+ energy[k + n * channels] = energy[k + (n - 1) * channels]
+ - *slide_out * *slide_out + *slide_in * *slide_in;
+ }
+ }
+}
+
+static float multi_channel_similarity_measure(
+ const float* dot_prod_a_b,
+ const float* energy_a, const float* energy_b,
+ int channels)
+{
+ const float epsilon = 1e-12f;
+ float similarity_measure = 0.0f;
+ for (int n = 0; n < channels; ++n) {
+ similarity_measure += dot_prod_a_b[n]
+ / sqrtf(energy_a[n] * energy_b[n] + epsilon);
+ }
+ return similarity_measure;
+}
+
+#if HAVE_VECTOR
+
+typedef float v8sf __attribute__ ((vector_size (32), aligned (1)));
+
+// Dot-product of channels of two AudioBus. For each AudioBus an offset is
+// given. |dot_product[k]| is the dot-product of channel |k|. The caller should
+// allocate sufficient space for |dot_product|.
+static void multi_channel_dot_product(
+ float **a, int frame_offset_a,
+ float **b, int frame_offset_b,
+ int channels,
+ int num_frames, float *dot_product)
+{
+ assert(frame_offset_a >= 0);
+ assert(frame_offset_b >= 0);
+
+ for (int k = 0; k < channels; ++k) {
+ const float* ch_a = a[k] + frame_offset_a;
+ const float* ch_b = b[k] + frame_offset_b;
+ float sum = 0.0;
+ if (num_frames < 32)
+ goto rest;
+
+ const v8sf *va = (const v8sf *) ch_a;
+ const v8sf *vb = (const v8sf *) ch_b;
+ v8sf vsum[4] = {
+ // Initialize to product of first 32 floats
+ va[0] * vb[0],
+ va[1] * vb[1],
+ va[2] * vb[2],
+ va[3] * vb[3],
+ };
+ va += 4;
+ vb += 4;
+
+ // Process `va` and `vb` across four vertical stripes
+ for (int n = 1; n < num_