diff options
Diffstat (limited to 'audio/filter')
-rw-r--r-- | audio/filter/af_drop.c | 114 | ||||
-rw-r--r-- | audio/filter/af_format.c | 20 | ||||
-rw-r--r-- | audio/filter/af_lavcac3enc.c | 169 | ||||
-rw-r--r-- | audio/filter/af_rubberband.c | 91 | ||||
-rw-r--r-- | audio/filter/af_scaletempo.c | 282 | ||||
-rw-r--r-- | audio/filter/af_scaletempo2.c | 255 | ||||
-rw-r--r-- | audio/filter/af_scaletempo2_internals.c | 844 | ||||
-rw-r--r-- | audio/filter/af_scaletempo2_internals.h | 133 |
8 files changed, 1685 insertions, 223 deletions
diff --git a/audio/filter/af_drop.c b/audio/filter/af_drop.c new file mode 100644 index 0000000000..499389dd2b --- /dev/null +++ b/audio/filter/af_drop.c @@ -0,0 +1,114 @@ +#include "audio/aframe.h" +#include "audio/format.h" +#include "common/common.h" +#include "filters/f_autoconvert.h" +#include "filters/filter_internal.h" +#include "filters/user_filters.h" + +struct priv { + double speed; + double diff; // amount of too many additional samples in normal speed + struct mp_aframe *last; // for repeating +}; + +static void af_drop_process(struct mp_filter *f) +{ + struct priv *p = f->priv; + + if (!mp_pin_in_needs_data(f->ppins[1])) + return; + + struct mp_frame frame = {0}; + + double last_dur = p->last ? mp_aframe_duration(p->last) : 0; + if (p->last && p->diff < 0 && -p->diff > last_dur / 2) { + MP_VERBOSE(f, "repeat\n"); + frame = MAKE_FRAME(MP_FRAME_AUDIO, p->last); + p->last = NULL; + } else { + frame = mp_pin_out_read(f->ppins[0]); + + if (frame.type == MP_FRAME_AUDIO) { + last_dur = mp_aframe_duration(frame.data); + p->diff -= last_dur; + if (p->diff > last_dur / 2) { + MP_VERBOSE(f, "drop\n"); + mp_frame_unref(&frame); + mp_filter_internal_mark_progress(f); + } + } + } + + if (frame.type == MP_FRAME_AUDIO) { + struct mp_aframe *fr = frame.data; + talloc_free(p->last); + p->last = mp_aframe_new_ref(fr); + mp_aframe_mul_speed(fr, p->speed); + p->diff += mp_aframe_duration(fr); + mp_aframe_set_pts(p->last, mp_aframe_end_pts(fr)); + } else if (frame.type == MP_FRAME_EOF) { + TA_FREEP(&p->last); + } + mp_pin_in_write(f->ppins[1], frame); +} + +static bool af_drop_command(struct mp_filter *f, struct mp_filter_command *cmd) +{ + struct priv *p = f->priv; + + switch (cmd->type) { + case MP_FILTER_COMMAND_SET_SPEED: + p->speed = cmd->speed; + return true; + } + + return false; +} + +static void af_drop_reset(struct mp_filter *f) +{ + struct priv *p = f->priv; + + TA_FREEP(&p->last); + p->diff = 0; +} + +static void af_drop_destroy(struct mp_filter *f) +{ + af_drop_reset(f); +} + +static const struct mp_filter_info af_drop_filter = { + .name = "drop", + .priv_size = sizeof(struct priv), + .process = af_drop_process, + .command = af_drop_command, + .reset = af_drop_reset, + .destroy = af_drop_destroy, +}; + +static struct mp_filter *af_drop_create(struct mp_filter *parent, void *options) +{ + struct mp_filter *f = mp_filter_create(parent, &af_drop_filter); + if (!f) { + talloc_free(options); + return NULL; + } + + mp_filter_add_pin(f, MP_PIN_IN, "in"); + mp_filter_add_pin(f, MP_PIN_OUT, "out"); + + struct priv *p = f->priv; + p->speed = 1.0; + + return f; +} + +const struct mp_user_filter_entry af_drop = { + .desc = { + .description = "Change audio speed by dropping/repeating frames", + .name = "drop", + .priv_size = sizeof(struct priv), + }, + .create = af_drop_create, +}; diff --git a/audio/filter/af_format.c b/audio/filter/af_format.c index 3e1eef664c..eddce6422f 100644 --- a/audio/filter/af_format.c +++ b/audio/filter/af_format.c @@ -30,7 +30,7 @@ struct f_opts { int out_srate; struct m_channels out_channels; - int fail; + bool fail; }; struct priv { @@ -38,7 +38,7 @@ struct priv { struct mp_pin *in_pin; }; -static void process(struct mp_filter *f) +static void af_format_process(struct mp_filter *f) { struct priv *p = f->priv; @@ -85,7 +85,7 @@ error: static const struct mp_filter_info af_format_filter = { .name = "format", .priv_size = sizeof(struct priv), - .process = process, + .process = af_format_process, }; static struct mp_filter *af_format_create(struct mp_filter *parent, @@ -128,12 +128,14 @@ const struct mp_user_filter_entry af_format = { .description = "Force audio format", .priv_size = sizeof(struct f_opts), .options = (const struct m_option[]) { - OPT_AUDIOFORMAT("format", in_format, 0), - OPT_INTRANGE("srate", in_srate, 0, 1000, 8*48000), - OPT_CHANNELS("channels", in_channels, 0, .min = 1), - OPT_INTRANGE("out-srate", out_srate, 0, 1000, 8*48000), - OPT_CHANNELS("out-channels", out_channels, 0, .min = 1), - OPT_FLAG("fail", fail, 0), + {"format", OPT_AUDIOFORMAT(in_format)}, + {"srate", OPT_INT(in_srate), M_RANGE(1000, 8*48000)}, + {"channels", OPT_CHANNELS(in_channels), + .flags = M_OPT_CHANNELS_LIMITED}, + {"out-srate", OPT_INT(out_srate), M_RANGE(1000, 8*48000)}, + {"out-channels", OPT_CHANNELS(out_channels), + .flags = M_OPT_CHANNELS_LIMITED}, + {"fail", OPT_BOOL(fail)}, {0} }, }, diff --git a/audio/filter/af_lavcac3enc.c b/audio/filter/af_lavcac3enc.c index c7582cf52b..def9700d18 100644 --- a/audio/filter/af_lavcac3enc.c +++ b/audio/filter/af_lavcac3enc.c @@ -31,7 +31,10 @@ #include <libavutil/bswap.h> #include <libavutil/mem.h> +#include "config.h" + #include "audio/aframe.h" +#include "audio/chmap_avchannel.h" #include "audio/chmap_sel.h" #include "audio/fmt-conversion.h" #include "audio/format.h" @@ -47,13 +50,13 @@ #define AC3_MAX_CHANNELS 6 #define AC3_MAX_CODED_FRAME_SIZE 3840 #define AC3_FRAME_SIZE (6 * 256) -const uint16_t ac3_bitrate_tab[19] = { +static const uint16_t ac3_bitrate_tab[19] = { 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 448, 512, 576, 640 }; struct f_opts { - int add_iec61937_header; + bool add_iec61937_header; int bit_rate; int min_channel_num; char *encoder; @@ -68,8 +71,9 @@ struct priv { struct mp_aframe *in_frame; struct mp_aframe_pool *out_pool; - struct AVCodec *lavc_acodec; + const struct AVCodec *lavc_acodec; struct AVCodecContext *lavc_actx; + AVPacket *lavc_pkt; int bit_rate; int out_samples; // upper bound on encoded output per AC3 frame }; @@ -99,12 +103,25 @@ static bool reinit(struct mp_filter *f) if (!bit_rate && chmap.num < AC3_MAX_CHANNELS + 1) bit_rate = default_bit_rate[chmap.num]; - avcodec_close(s->lavc_actx); + avcodec_free_context(&s->lavc_actx); + s->lavc_actx = avcodec_alloc_context3(s->lavc_acodec); + if (!s->lavc_actx) { + MP_ERR(f, "Audio LAVC, couldn't reallocate context!\n"); + return false; + } + + if (mp_set_avopts(f->log, s->lavc_actx, s->opts->avopts) < 0) + return false; // Put sample parameters s->lavc_actx->sample_fmt = af_to_avformat(format); + +#if !HAVE_AV_CHANNEL_LAYOUT s->lavc_actx->channels = chmap.num; s->lavc_actx->channel_layout = mp_chmap_to_lavc(&chmap); +#else + mp_chmap_to_av_layout(&s->lavc_actx->ch_layout, &chmap); +#endif s->lavc_actx->sample_rate = rate; s->lavc_actx->bit_rate = bit_rate; @@ -122,18 +139,19 @@ static bool reinit(struct mp_filter *f) return true; } -static void reset(struct mp_filter *f) +static void af_lavcac3enc_reset(struct mp_filter *f) { struct priv *s = f->priv; TA_FREEP(&s->in_frame); } -static void destroy(struct mp_filter *f) +static void af_lavcac3enc_destroy(struct mp_filter *f) { struct priv *s = f->priv; - reset(f); + af_lavcac3enc_reset(f); + av_packet_free(&s->lavc_pkt); avcodec_free_context(&s->lavc_actx); } @@ -143,7 +161,7 @@ static void swap_16(uint16_t *ptr, size_t size) ptr[n] = av_bswap16(ptr[n]); } -static void process(struct mp_filter *f) +static void af_lavcac3enc_process(struct mp_filter *f) { struct priv *s = f->priv; @@ -152,57 +170,57 @@ static void process(struct mp_filter *f) bool err = true; struct mp_aframe *out = NULL; - AVPacket pkt = {0}; - av_init_packet(&pkt); + AVPacket *pkt = s->lavc_pkt; // Send input as long as it wants. while (1) { if (avcodec_is_open(s->lavc_actx)) { - int lavc_ret = avcodec_receive_packet(s->lavc_actx, &pkt); + int lavc_ret = avcodec_receive_packet(s->lavc_actx, pkt); if (lavc_ret >= 0) break; if (lavc_ret < 0 && lavc_ret != AVERROR(EAGAIN)) { MP_FATAL(f, "Encode failed (receive).\n"); - goto done; + goto error; } } AVFrame *frame = NULL; struct mp_frame input = mp_pin_out_read(s->in_pin); // The following code assumes no sample data buffering in the encoder. - if (input.type == MP_FRAME_EOF) { + switch (input.type) { + case MP_FRAME_NONE: + goto done; // no data yet + case MP_FRAME_EOF: mp_pin_in_write(f->ppins[1], input); - return; - } else if (input.type == MP_FRAME_AUDIO) { + goto done; + case MP_FRAME_AUDIO: TA_FREEP(&s->in_frame); s->in_frame = input.data; - frame = mp_frame_to_av(input, NULL); - if (!frame) - goto done; if (mp_aframe_get_channels(s->in_frame) < s->opts->min_channel_num) { // Just pass it through. s->in_frame = NULL; mp_pin_in_write(f->ppins[1], input); - return; + goto done; } if (!mp_aframe_config_equals(s->in_frame, s->cur_format)) { if (!reinit(f)) - goto done; + goto error; } - } else if (input.type) { - goto done; - } else { - return; // no data yet + frame = mp_frame_to_av(input, NULL); + if (!frame) + goto error; + break; + default: goto error; // unexpected packet type } int lavc_ret = avcodec_send_frame(s->lavc_actx, frame); av_frame_free(&frame); if (lavc_ret < 0 && lavc_ret != AVERROR(EAGAIN)) { MP_FATAL(f, "Encode failed (send).\n"); - goto done; + goto error; } } if (!s->in_frame) - goto done; + goto error; out = mp_aframe_create(); mp_aframe_set_format(out, AF_FORMAT_S_AC3); @@ -210,18 +228,18 @@ static void process(struct mp_filter *f) mp_aframe_set_rate(out, 48000); if (mp_aframe_pool_allocate(s->out_pool, out, s->out_samples) < 0) - goto done; + goto error; int sstride = mp_aframe_get_sstride(out); mp_aframe_copy_attributes(out, s->in_frame); - int frame_size = pkt.size; + int frame_size = pkt->size; int header_len = 0; char hdr[8]; - if (s->opts->add_iec61937_header && pkt.size > 5) { - int bsmod = pkt.data[5] & 0x7; + if (s->opts->add_iec61937_header && pkt->size > 5) { + int bsmod = pkt->data[5] & 0x7; int len = frame_size; frame_size = AC3_FRAME_SIZE * 2 * 2; @@ -239,20 +257,22 @@ static void process(struct mp_filter *f) uint8_t **planes = mp_aframe_get_data_rw(out); if (!planes) - goto done; + goto error; char *buf = planes[0]; memcpy(buf, hdr, header_len); - memcpy(buf + header_len, pkt.data, pkt.size); - memset(buf + header_len + pkt.size, 0, - frame_size - (header_len + pkt.size)); - swap_16((uint16_t *)(buf + header_len), pkt.size / 2); + memcpy(buf + header_len, pkt->data, pkt->size); + memset(buf + header_len + pkt->size, 0, + frame_size - (header_len + pkt->size)); + swap_16((uint16_t *)(buf + header_len), pkt->size / 2); mp_aframe_set_size(out, frame_size / sstride); mp_pin_in_write(f->ppins[1], MAKE_FRAME(MP_FRAME_AUDIO, out)); out = NULL; - err = 0; done: - av_packet_unref(&pkt); + err = false; + // fall through +error: + av_packet_unref(pkt); talloc_free(out); if (err) mp_filter_internal_mark_failed(f); @@ -261,11 +281,43 @@ done: static const struct mp_filter_info af_lavcac3enc_filter = { .name = "lavcac3enc", .priv_size = sizeof(struct priv), - .process = process, - .reset = reset, - .destroy = destroy, + .process = af_lavcac3enc_process, + .reset = af_lavcac3enc_reset, + .destroy = af_lavcac3enc_destroy, }; +static void add_chmaps_to_autoconv(struct mp_filter *f, + struct mp_autoconvert *conv, + const struct AVCodec *codec) +{ +#if !HAVE_AV_CHANNEL_LAYOUT + const uint64_t *lch = codec->channel_layouts; + for (int n = 0; lch && lch[n]; n++) { + struct mp_chmap chmap = {0}; + mp_chmap_from_lavc(&chmap, lch[n]); + if (mp_chmap_is_valid(&chmap)) + mp_autoconvert_add_chmap(conv, &chmap); + } +#else + const AVChannelLayout *lch = codec->ch_layouts; + for (int n = 0; lch && lch[n].nb_channels; n++) { + struct mp_chmap chmap = {0}; + + if (!mp_chmap_from_av_layout(&chmap, &lch[n])) { + char layout[128] = {0}; + MP_VERBOSE(f, "Skipping unsupported channel layout: %s\n", + av_channel_layout_describe(&lch[n], + layout, 128) < 0 ? + "undefined" : layout); + continue; + } + + if (mp_chmap_is_valid(&chmap)) + mp_autoconvert_add_chmap(conv, &chmap); + } +#endif +} + static struct mp_filter *af_lavcac3enc_create(struct mp_filter *parent, void *options) { @@ -295,14 +347,23 @@ static struct mp_filter *af_lavcac3enc_create(struct mp_filter *parent, goto error; } + s->lavc_pkt = av_packet_alloc(); + if (!s->lavc_pkt) + goto error; + if (mp_set_avopts(f->log, s->lavc_actx, s->opts->avopts) < 0) goto error; - // For this one, we require the decoder to expert lists of all supported + // For this one, we require the decoder to export lists of all supported // parameters. (Not all decoders do that, but the ones we're interested // in do.) if (!s->lavc_acodec->sample_fmts || - !s->lavc_acodec->channel_layouts) +#if !HAVE_AV_CHANNEL_LAYOUT + !s->lavc_acodec->channel_layouts +#else + !s->lavc_acodec->ch_layouts +#endif + ) { MP_ERR(f, "Audio encoder doesn't list supported parameters.\n"); goto error; @@ -334,13 +395,7 @@ static struct mp_filter *af_lavcac3enc_create(struct mp_filter *parent, mp_autoconvert_add_afmt(conv, mpfmt); } - const uint64_t *lch = s->lavc_acodec->channel_layouts; - for (int n = 0; lch && lch[n]; n++) { - struct mp_chmap chmap = {0}; - mp_chmap_from_lavc(&chmap, lch[n]); - if (mp_chmap_is_valid(&chmap)) - mp_autoconvert_add_chmap(conv, &chmap); - } + add_chmaps_to_autoconv(f, conv, s->lavc_acodec); // At least currently, the AC3 encoder doesn't export sample rates. mp_autoconvert_add_srate(conv, 48000); @@ -357,6 +412,8 @@ static struct mp_filter *af_lavcac3enc_create(struct mp_filter *parent, return f; error: + av_packet_free(&s->lavc_pkt); + avcodec_free_context(&s->lavc_actx); talloc_free(f); return NULL; } @@ -369,18 +426,18 @@ const struct mp_user_filter_entry af_lavcac3enc = { .name = "lavcac3enc", .priv_size = sizeof(OPT_BASE_STRUCT), .priv_defaults = &(const OPT_BASE_STRUCT) { - .add_iec61937_header = 1, + .add_iec61937_header = true, .bit_rate = 640, .min_channel_num = 3, .encoder = "ac3", }, .options = (const struct m_option[]) { - OPT_FLAG("tospdif", add_iec61937_header, 0), - OPT_CHOICE_OR_INT("bitrate", bit_rate, 0, 32, 640, - ({"auto", 0}, {"default", 0})), - OPT_INTRANGE("minch", min_channel_num, 0, 2, 6), - OPT_STRING("encoder", encoder, 0), - OPT_KEYVALUELIST("o", avopts, 0), + {"tospdif", OPT_BOOL(add_iec61937_header)}, + {"bitrate", OPT_CHOICE(bit_rate, + {"auto", 0}, {"default", 0}), M_RANGE(32, 640)}, + {"minch", OPT_INT(min_channel_num), M_RANGE(2, 6)}, + {"encoder", OPT_STRING(encoder)}, + {"o", OPT_KEYVALUELIST(avopts)}, {0} }, }, diff --git a/audio/filter/af_rubberband.c b/audio/filter/af_rubberband.c index c7b6317c13..e71937fcb2 100644 --- a/audio/filter/af_rubberband.c +++ b/audio/filter/af_rubberband.c @@ -20,6 +20,8 @@ #include <rubberband/rubberband-c.h> +#include "config.h" + #include "audio/aframe.h" #include "audio/format.h" #include "common/common.h" @@ -31,7 +33,7 @@ // command line options struct f_opts { int transients, detector, phase, window, - smoothing, formant, pitch, channels; + smoothing, formant, pitch, channels, engine; double scale; }; @@ -78,7 +80,10 @@ static bool init_rubberband(struct mp_filter *f) int opts = p->opts->transients | p->opts->detector | p->opts->phase | p->opts->window | p->opts->smoothing | p->opts->formant | - p->opts->pitch | p-> opts->channels | + p->opts->pitch | p->opts->channels | +#if HAVE_RUBBERBAND_3 + p->opts->engine | +#endif RubberBandOptionProcessRealTime; int rate = mp_aframe_get_rate(p->pending); @@ -100,7 +105,7 @@ static bool init_rubberband(struct mp_filter *f) return true; } -static void process(struct mp_filter *f) +static void af_rubberband_process(struct mp_filter *f) { struct priv *p = f->priv; @@ -228,7 +233,7 @@ error: mp_filter_internal_mark_failed(f); } -static bool command(struct mp_filter *f, struct mp_filter_command *cmd) +static bool af_rubberband_command(struct mp_filter *f, struct mp_filter_command *cmd) { struct priv *p = f->priv; @@ -258,7 +263,7 @@ static bool command(struct mp_filter *f, struct mp_filter_command *cmd) return false; } -static void reset(struct mp_filter *f) +static void af_rubberband_reset(struct mp_filter *f) { struct priv *p = f->priv; @@ -269,7 +274,7 @@ static void reset(struct mp_filter *f) TA_FREEP(&p->pending); } -static void destroy(struct mp_filter *f) +static void af_rubberband_destroy(struct mp_filter *f) { struct priv *p = f->priv; @@ -281,10 +286,10 @@ static void destroy(struct mp_filter *f) static const struct mp_filter_info af_rubberband_filter = { .name = "rubberband", .priv_size = sizeof(struct priv), - .process = process, - .command = command, - .reset = reset, - .destroy = destroy, + .process = af_rubberband_process, + .command = af_rubberband_command, + .reset = af_rubberband_reset, + .destroy = af_rubberband_destroy, }; static struct mp_filter *af_rubberband_create(struct mp_filter *parent, @@ -331,37 +336,45 @@ const struct mp_user_filter_entry af_rubberband = { .transients = RubberBandOptionTransientsMixed, .formant = RubberBandOptionFormantPreserved, .channels = RubberBandOptionChannelsTogether, +#if HAVE_RUBBERBAND_3 + .engine = RubberBandOptionEngineFiner, +#endif }, .options = (const struct m_option[]) { - OPT_CHOICE("transients", transients, 0, - ({"crisp", RubberBandOptionTransientsCrisp}, - {"mixed", RubberBandOptionTransientsMixed}, - {"smooth", RubberBandOptionTransientsSmooth})), - OPT_CHOICE("detector", detector, 0, - ({"compound", RubberBandOptionDetectorCompound}, - {"percussive", RubberBandOptionDetectorPercussive}, - {"soft", RubberBandOptionDetectorSoft})), - OPT_CHOICE("phase", phase, 0, - ({"laminar", RubberBandOptionPhaseLaminar}, - {"independent", RubberBandOptionPhaseIndependent})), - OPT_CHOICE("window", window, 0, - ({"standard", RubberBandOptionWindowStandard}, - {"short", RubberBandOptionWindowShort}, - {"long", RubberBandOptionWindowLong})), - OPT_CHOICE("smoothing", smoothing, 0, - ({"off", RubberBandOptionSmoothingOff}, - {"on", RubberBandOptionSmoothingOn})), - OPT_CHOICE("formant", formant, 0, - ({"shifted", RubberBandOptionFormantShifted}, - {"preserved", RubberBandOptionFormantPreserved})), - OPT_CHOICE("pitch", pitch, 0, - ({"quality", RubberBandOptionPitchHighQuality}, - {"speed", RubberBandOptionPitchHighSpeed}, - {"consistency", RubberBandOptionPitchHighConsistency})), - OPT_CHOICE("channels", channels, 0, - ({"apart", RubberBandOptionChannelsApart}, - {"together", RubberBandOptionChannelsTogether})), - OPT_DOUBLE("pitch-scale", scale, M_OPT_RANGE, .min = 0.01, .max = 100), + {"transients", OPT_CHOICE(transients, + {"crisp", RubberBandOptionTransientsCrisp}, + {"mixed", RubberBandOptionTransientsMixed}, + {"smooth", RubberBandOptionTransientsSmooth})}, + {"detector", OPT_CHOICE(detector, + {"compound", RubberBandOptionDetectorCompound}, + {"percussive", RubberBandOptionDetectorPercussive}, + {"soft", RubberBandOptionDetectorSoft})}, + {"phase", OPT_CHOICE(phase, + {"laminar", RubberBandOptionPhaseLaminar}, + {"independent", RubberBandOptionPhaseIndependent})}, + {"window", OPT_CHOICE(window, + {"standard", RubberBandOptionWindowStandard}, + {"short", RubberBandOptionWindowShort}, + {"long", RubberBandOptionWindowLong})}, + {"smoothing", OPT_CHOICE(smoothing, + {"off", RubberBandOptionSmoothingOff}, + {"on", RubberBandOptionSmoothingOn})}, + {"formant", OPT_CHOICE(formant, + {"shifted", RubberBandOptionFormantShifted}, + {"preserved", RubberBandOptionFormantPreserved})}, + {"pitch", OPT_CHOICE(pitch, + {"quality", RubberBandOptionPitchHighQuality}, + {"speed", RubberBandOptionPitchHighSpeed}, + {"consistency", RubberBandOptionPitchHighConsistency})}, + {"channels", OPT_CHOICE(channels, + {"apart", RubberBandOptionChannelsApart}, + {"together", RubberBandOptionChannelsTogether})}, +#if HAVE_RUBBERBAND_3 + {"engine", OPT_CHOICE(engine, + {"finer", RubberBandOptionEngineFiner}, + {"faster", RubberBandOptionEngineFaster})}, +#endif + {"pitch-scale", OPT_DOUBLE(scale), M_RANGE(0.01, 100)}, {0} }, }, diff --git a/audio/filter/af_scaletempo.c b/audio/filter/af_scaletempo.c index ed1df5725e..482b91209e 100644 --- a/audio/filter/af_scaletempo.c +++ b/audio/filter/af_scaletempo.c @@ -2,7 +2,7 @@ * scaletempo audio filter * * scale tempo while maintaining pitch - * (WSOLA technique with cross correlation) + * (WSOLA technique with taxicab distance) * inspired by SoundTouch library by Olli Parviainen * * basic algorithm @@ -30,10 +30,12 @@ * License along with mpv. If not, see <http://www.gnu.org/licenses/>. */ +#include <float.h> #include <stdlib.h> #include <string.h> #include <limits.h> #include <assert.h> +#include <math.h> #include "audio/aframe.h" #include "audio/format.h" @@ -47,7 +49,7 @@ struct f_opts { float scale_nominal; float ms_stride; float ms_search; - float percent_overlap; + float factor_overlap; #define SCALE_TEMPO 1 #define SCALE_PITCH 2 int speed_opt; @@ -86,8 +88,6 @@ struct priv { // best overlap int frames_search; int num_channels; - void *buf_pre_corr; - void *table_window; int (*best_overlap_offset)(struct priv *s); }; @@ -134,72 +134,144 @@ static bool fill_queue(struct priv *s) return bytes_needed == 0; } -#define UNROLL_PADDING (4 * 4) +// Fit the curve f(x) = a * x^2 + b * x + c such that +// f(-1) = y[0] +// f(0) = y[1] +// f(1) = y[2] +// and return the extremum position and value +// assuming y[0] <= y[1] >= y[2] || y[0] >= y[1] <= y[2] +static void quadratic_interpolation_float( + const float* y_values, float* x, float* value) +{ + const float b = (y_values[2] - y_values[0]) * 0.5f; + const float c = y_values[1]; + const float a = y_values[0] + b - c; + + if (a == 0.f) { + // it's a flat line + *x = 0; + *value = c; + } else { + const float pos = -b / (2.f * a); + *x = pos; + *value = a * pos * pos + b * pos + c; + } +} + +static void quadratic_interpolation_s16( + const int32_t* y_values, float* x, int32_t* value) +{ + const float b = (y_values[2] - y_values[0]) * 0.5f; + const float c = y_values[1]; + const float a = y_values[0] + b - c; + + if (a == 0.f) { + // it's a flat line + *x = 0; + *value = c; + } else { + const float pos = -b / (2.f * a); + *x = pos; + *value = a * pos * pos + b * pos + c; + } +} static int best_overlap_offset_float(struct priv *s) { - float best_corr = INT_MIN; - int best_off = 0; - - float *pw = s->table_window; - float *po = s->buf_overlap; - po += s->num_channels; - float *ppc = s->buf_pre_corr; - for (int i = s->num_channels; i < s->samples_overlap; i++) - *ppc++ = *pw++ **po++; - - float *search_start = (float *)s->buf_queue + s->num_channels; - for (int off = 0; off < s->frames_search; off++) { - float corr = 0; - float *ps = search_start; - ppc = s->buf_pre_corr; - for (int i = s->num_channels; i < s->samples_overlap; i++) - corr += *ppc++ **ps++; - if (corr > best_corr) { - best_corr = corr; - best_off = off; + int num_channels = s->num_channels, frames_search = s->frames_search; + float *source = (float *)s->buf_queue + num_channels; + float *target = (float *)s->buf_overlap + num_channels; + int num_samples = s->samples_overlap - num_channels; + int step_size = 3; + float history[3] = {}; + + float best_distance = FLT_MAX; + int best_offset_approx = 0; + for (int offset = 0; offset < frames_search; offset += step_size) { + float distance = 0; + for (int i = 0; i < num_samples; i++) + distance += fabsf(target[i] - source[offset * num_channels + i]); + + int offset_approx = offset; + history[0] = history[1]; + history[1] = history[2]; + history[2] = distance; + if(offset >= 2 && history[0] >= history[1] && history[1] <= history[2]) { + float extremum; + quadratic_interpolation_float(history, &extremum, &distance); + offset_approx = offset - step_size + (int)(extremum * step_size + 0.5f); + } + + if (distance < best_distance) { + best_distance = distance; + best_offset_approx = offset_approx; + } + } + + best_distance = FLT_MAX; + int best_offset = 0; + int min_offset = MPMAX(0, best_offset_approx - step_size + 1); + int max_offset = MPMIN(frames_search, best_offset_approx + step_size); + for (int offset = min_offset; offset < max_offset; offset++) { + float distance = 0; + for (int i = 0; i < num_samples; i++) + distance += fabsf(target[i] - source[offset * num_channels + i]); + if (distance < best_distance) { + best_distance = distance; + best_offset = offset; } - search_start += s->num_channels; } - return best_off * 4 * s->num_channels; + return best_offset * 4 * num_channels; } static int best_overlap_offset_s16(struct priv *s) { - int64_t best_corr = INT64_MIN; - int best_off = 0; - - int32_t *pw = s->table_window; - int16_t *po = s->buf_overlap; - po += s->num_channels; - int32_t *ppc = s->buf_pre_corr; - for (long i = s->num_channels; i < s->samples_overlap; i++) - *ppc++ = (*pw++ **po++) >> 15; - - int16_t *search_start = (int16_t *)s->buf_queue + s->num_channels; - for (int off = 0; off < s->frames_search; off++) { - int64_t corr = 0; - int16_t *ps = search_start; - ppc = s->buf_pre_corr; - ppc += s->samples_overlap - s->num_channels; - ps += s->samples_overlap - s->num_channels; - long i = -(s->samples_overlap - s->num_channels); - do { - corr += ppc[i + 0] * ps[i + 0]; - corr += ppc[i + 1] * ps[i + 1]; - corr += ppc[i + 2] * ps[i + 2]; - corr += ppc[i + 3] * ps[i + 3]; - i += 4; - } while (i < 0); - if (corr > best_corr) { - best_corr = corr; - best_off = off; + int num_channels = s->num_channels, frames_search = s->frames_search; + int16_t *source = (int16_t *)s->buf_queue + num_channels; + int16_t *target = (int16_t *)s->buf_overlap + num_channels; + int num_samples = s->samples_overlap - num_channels; + int step_size = 3; + int32_t history[3] = {}; + + int32_t best_distance = INT32_MAX; + int best_offset_approx = 0; + for (int offset = 0; offset < frames_search; offset += step_size) { + int32_t distance = 0; + for (int i = 0; i < num_samples; i++) + distance += abs((int32_t)target[i] - source[offset * num_channels + i]); + + int offset_approx = offset; + history[0] = history[1]; + history[1] = history[2]; + history[2] = distance; + if(offset >= 2 && history[0] >= history[1] && history[1] <= history[2]) { + float extremum; + quadratic_interpolation_s16(history, &extremum, &distance); + offset_approx = offset - step_size + (int)(extremum * step_size + 0.5f); + } + + if (distance < best_distance) { + best_distance = distance; + best_offset_approx = offset_approx; } - search_start += s->num_channels; } - return best_off * 2 * s->num_channels; + best_distance = INT32_MAX; + int best_offset = 0; + int min_offset = MPMAX(0, best_offset_approx - step_size + 1); + int max_offset = MPMIN(frames_search, best_offset_approx + step_size); + for (int offset = min_offset; offset < max_offset; offset++) { + int32_t distance = 0; + for (int i = 0; i < num_samples; i++) + distance += abs((int32_t)target[i] - source[offset * num_channels + i]); + if (distance < best_distance) { + best_distance = distance; + best_offset = offset; + } + } + + return best_offset * 2 * s->num_channels; } static void output_overlap_float(struct priv *s, void *buf_out, @@ -210,8 +282,9 @@ static void output_overlap_float(struct priv *s, void *buf_out, float *po = s->buf_overlap; float *pin = (float *)(s->buf_queue + bytes_off); for (int i = 0; i < s->samples_overlap; i++) { - *pout++ = *po - *pb++ *(*po - *pin++); - po++; + // the math is equal to *po * (1 - *pb) + *pin * *pb + float o = *po++; + *pout++ = o - *pb++ * (o - *pin++); } } @@ -223,12 +296,13 @@ static void output_overlap_s16(struct priv *s, void *buf_out, int16_t *po = s->buf_overlap; int16_t *pin = (int16_t *)(s->buf_queue + bytes_off); for (int i = 0; i < s->samples_overlap; i++) { - *pout++ = *po - ((*pb++ *(*po - *pin++)) >> 16); - po++; + // the math is equal to *po * (1 - *pb) + *pin * *pb + int32_t o = *po++; + *pout++ = o - ((*pb++ *(o - *pin++)) >> 16); } } -static void process(struct mp_filter *f) +static void af_scaletempo_process(struct mp_filter *f) { struct priv *s = f->priv; @@ -399,7 +473,7 @@ static bool reinit(struct mp_filter *f) update_speed(s, s->speed); - int frames_overlap = s->frames_stride * s->opts->percent_overlap; + int frames_overlap = s->frames_stride * s->opts->factor_overlap; if (frames_overlap <= 0) { s->bytes_standing = s->bytes_stride; s->samples_standing = s->bytes_standing / bps; @@ -419,18 +493,20 @@ static bool reinit(struct mp_filter *f) memset(s->buf_overlap, 0, s->bytes_ |