diff options
Diffstat (limited to 'audio/filter')
-rw-r--r-- | audio/filter/af_drop.c | 18 | ||||
-rw-r--r-- | audio/filter/af_format.c | 8 | ||||
-rw-r--r-- | audio/filter/af_lavcac3enc.c | 38 | ||||
-rw-r--r-- | audio/filter/af_rubberband.c | 18 | ||||
-rw-r--r-- | audio/filter/af_scaletempo.c | 262 | ||||
-rw-r--r-- | audio/filter/af_scaletempo2.c | 88 | ||||
-rw-r--r-- | audio/filter/af_scaletempo2_internals.c | 237 | ||||
-rw-r--r-- | audio/filter/af_scaletempo2_internals.h | 38 |
8 files changed, 413 insertions, 294 deletions
diff --git a/audio/filter/af_drop.c b/audio/filter/af_drop.c index 724c482720..499389dd2b 100644 --- a/audio/filter/af_drop.c +++ b/audio/filter/af_drop.c @@ -11,7 +11,7 @@ struct priv { struct mp_aframe *last; // for repeating }; -static void process(struct mp_filter *f) +static void af_drop_process(struct mp_filter *f) { struct priv *p = f->priv; @@ -52,7 +52,7 @@ static void process(struct mp_filter *f) mp_pin_in_write(f->ppins[1], frame); } -static bool command(struct mp_filter *f, struct mp_filter_command *cmd) +static bool af_drop_command(struct mp_filter *f, struct mp_filter_command *cmd) { struct priv *p = f->priv; @@ -65,7 +65,7 @@ static bool command(struct mp_filter *f, struct mp_filter_command *cmd) return false; } -static void reset(struct mp_filter *f) +static void af_drop_reset(struct mp_filter *f) { struct priv *p = f->priv; @@ -73,18 +73,18 @@ static void reset(struct mp_filter *f) p->diff = 0; } -static void destroy(struct mp_filter *f) +static void af_drop_destroy(struct mp_filter *f) { - reset(f); + af_drop_reset(f); } static const struct mp_filter_info af_drop_filter = { .name = "drop", .priv_size = sizeof(struct priv), - .process = process, - .command = command, - .reset = reset, - .destroy = destroy, + .process = af_drop_process, + .command = af_drop_command, + .reset = af_drop_reset, + .destroy = af_drop_destroy, }; static struct mp_filter *af_drop_create(struct mp_filter *parent, void *options) diff --git a/audio/filter/af_format.c b/audio/filter/af_format.c index 88ae99ed56..eddce6422f 100644 --- a/audio/filter/af_format.c +++ b/audio/filter/af_format.c @@ -30,7 +30,7 @@ struct f_opts { int out_srate; struct m_channels out_channels; - int fail; + bool fail; }; struct priv { @@ -38,7 +38,7 @@ struct priv { struct mp_pin *in_pin; }; -static void process(struct mp_filter *f) +static void af_format_process(struct mp_filter *f) { struct priv *p = f->priv; @@ -85,7 +85,7 @@ error: static const struct mp_filter_info af_format_filter = { .name = "format", .priv_size = sizeof(struct priv), - .process = process, + .process = af_format_process, }; static struct mp_filter *af_format_create(struct mp_filter *parent, @@ -135,7 +135,7 @@ const struct mp_user_filter_entry af_format = { {"out-srate", OPT_INT(out_srate), M_RANGE(1000, 8*48000)}, {"out-channels", OPT_CHANNELS(out_channels), .flags = M_OPT_CHANNELS_LIMITED}, - {"fail", OPT_FLAG(fail)}, + {"fail", OPT_BOOL(fail)}, {0} }, }, diff --git a/audio/filter/af_lavcac3enc.c b/audio/filter/af_lavcac3enc.c index 86c34a1278..def9700d18 100644 --- a/audio/filter/af_lavcac3enc.c +++ b/audio/filter/af_lavcac3enc.c @@ -50,13 +50,13 @@ #define AC3_MAX_CHANNELS 6 #define AC3_MAX_CODED_FRAME_SIZE 3840 #define AC3_FRAME_SIZE (6 * 256) -const static uint16_t ac3_bitrate_tab[19] = { +static const uint16_t ac3_bitrate_tab[19] = { 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 448, 512, 576, 640 }; struct f_opts { - int add_iec61937_header; + bool add_iec61937_header; int bit_rate; int min_channel_num; char *encoder; @@ -103,7 +103,15 @@ static bool reinit(struct mp_filter *f) if (!bit_rate && chmap.num < AC3_MAX_CHANNELS + 1) bit_rate = default_bit_rate[chmap.num]; - avcodec_close(s->lavc_actx); + avcodec_free_context(&s->lavc_actx); + s->lavc_actx = avcodec_alloc_context3(s->lavc_acodec); + if (!s->lavc_actx) { + MP_ERR(f, "Audio LAVC, couldn't reallocate context!\n"); + return false; + } + + if (mp_set_avopts(f->log, s->lavc_actx, s->opts->avopts) < 0) + return false; // Put sample parameters s->lavc_actx->sample_fmt = af_to_avformat(format); @@ -131,18 +139,18 @@ static bool reinit(struct mp_filter *f) return true; } -static void reset(struct mp_filter *f) +static void af_lavcac3enc_reset(struct mp_filter *f) { struct priv *s = f->priv; TA_FREEP(&s->in_frame); } -static void destroy(struct mp_filter *f) +static void af_lavcac3enc_destroy(struct mp_filter *f) { struct priv *s = f->priv; - reset(f); + af_lavcac3enc_reset(f); av_packet_free(&s->lavc_pkt); avcodec_free_context(&s->lavc_actx); } @@ -153,7 +161,7 @@ static void swap_16(uint16_t *ptr, size_t size) ptr[n] = av_bswap16(ptr[n]); } -static void process(struct mp_filter *f) +static void af_lavcac3enc_process(struct mp_filter *f) { struct priv *s = f->priv; @@ -187,9 +195,6 @@ static void process(struct mp_filter *f) case MP_FRAME_AUDIO: TA_FREEP(&s->in_frame); s->in_frame = input.data; - frame = mp_frame_to_av(input, NULL); - if (!frame) - goto error; if (mp_aframe_get_channels(s->in_frame) < s->opts->min_channel_num) { // Just pass it through. s->in_frame = NULL; @@ -200,6 +205,9 @@ static void process(struct mp_filter *f) if (!reinit(f)) goto error; } + frame = mp_frame_to_av(input, NULL); + if (!frame) + goto error; break; default: goto error; // unexpected packet type } @@ -273,9 +281,9 @@ error: static const struct mp_filter_info af_lavcac3enc_filter = { .name = "lavcac3enc", .priv_size = sizeof(struct priv), - .process = process, - .reset = reset, - .destroy = destroy, + .process = af_lavcac3enc_process, + .reset = af_lavcac3enc_reset, + .destroy = af_lavcac3enc_destroy, }; static void add_chmaps_to_autoconv(struct mp_filter *f, @@ -418,13 +426,13 @@ const struct mp_user_filter_entry af_lavcac3enc = { .name = "lavcac3enc", .priv_size = sizeof(OPT_BASE_STRUCT), .priv_defaults = &(const OPT_BASE_STRUCT) { - .add_iec61937_header = 1, + .add_iec61937_header = true, .bit_rate = 640, .min_channel_num = 3, .encoder = "ac3", }, .options = (const struct m_option[]) { - {"tospdif", OPT_FLAG(add_iec61937_header)}, + {"tospdif", OPT_BOOL(add_iec61937_header)}, {"bitrate", OPT_CHOICE(bit_rate, {"auto", 0}, {"default", 0}), M_RANGE(32, 640)}, {"minch", OPT_INT(min_channel_num), M_RANGE(2, 6)}, diff --git a/audio/filter/af_rubberband.c b/audio/filter/af_rubberband.c index 40b0c7188b..e71937fcb2 100644 --- a/audio/filter/af_rubberband.c +++ b/audio/filter/af_rubberband.c @@ -80,7 +80,7 @@ static bool init_rubberband(struct mp_filter *f) int opts = p->opts->transients | p->opts->detector | p->opts->phase | p->opts->window | p->opts->smoothing | p->opts->formant | - p->opts->pitch | p->opts->channels | + p->opts->pitch | p->opts->channels | #if HAVE_RUBBERBAND_3 p->opts->engine | #endif @@ -105,7 +105,7 @@ static bool init_rubberband(struct mp_filter *f) return true; } -static void process(struct mp_filter *f) +static void af_rubberband_process(struct mp_filter *f) { struct priv *p = f->priv; @@ -233,7 +233,7 @@ error: mp_filter_internal_mark_failed(f); } -static bool command(struct mp_filter *f, struct mp_filter_command *cmd) +static bool af_rubberband_command(struct mp_filter *f, struct mp_filter_command *cmd) { struct priv *p = f->priv; @@ -263,7 +263,7 @@ static bool command(struct mp_filter *f, struct mp_filter_command *cmd) return false; } -static void reset(struct mp_filter *f) +static void af_rubberband_reset(struct mp_filter *f) { struct priv *p = f->priv; @@ -274,7 +274,7 @@ static void reset(struct mp_filter *f) TA_FREEP(&p->pending); } -static void destroy(struct mp_filter *f) +static void af_rubberband_destroy(struct mp_filter *f) { struct priv *p = f->priv; @@ -286,10 +286,10 @@ static void destroy(struct mp_filter *f) static const struct mp_filter_info af_rubberband_filter = { .name = "rubberband", .priv_size = sizeof(struct priv), - .process = process, - .command = command, - .reset = reset, - .destroy = destroy, + .process = af_rubberband_process, + .command = af_rubberband_command, + .reset = af_rubberband_reset, + .destroy = af_rubberband_destroy, }; static struct mp_filter *af_rubberband_create(struct mp_filter *parent, diff --git a/audio/filter/af_scaletempo.c b/audio/filter/af_scaletempo.c index 8675c9a50d..482b91209e 100644 --- a/audio/filter/af_scaletempo.c +++ b/audio/filter/af_scaletempo.c @@ -2,7 +2,7 @@ * scaletempo audio filter * * scale tempo while maintaining pitch - * (WSOLA technique with cross correlation) + * (WSOLA technique with taxicab distance) * inspired by SoundTouch library by Olli Parviainen * * basic algorithm @@ -35,6 +35,7 @@ #include <string.h> #include <limits.h> #include <assert.h> +#include <math.h> #include "audio/aframe.h" #include "audio/format.h" @@ -48,7 +49,7 @@ struct f_opts { float scale_nominal; float ms_stride; float ms_search; - float percent_overlap; + float factor_overlap; #define SCALE_TEMPO 1 #define SCALE_PITCH 2 int speed_opt; @@ -87,8 +88,6 @@ struct priv { // best overlap int frames_search; int num_channels; - void *buf_pre_corr; - void *table_window; int (*best_overlap_offset)(struct priv *s); }; @@ -135,72 +134,144 @@ static bool fill_queue(struct priv *s) return bytes_needed == 0; } -#define UNROLL_PADDING (4 * 4) +// Fit the curve f(x) = a * x^2 + b * x + c such that +// f(-1) = y[0] +// f(0) = y[1] +// f(1) = y[2] +// and return the extremum position and value +// assuming y[0] <= y[1] >= y[2] || y[0] >= y[1] <= y[2] +static void quadratic_interpolation_float( + const float* y_values, float* x, float* value) +{ + const float b = (y_values[2] - y_values[0]) * 0.5f; + const float c = y_values[1]; + const float a = y_values[0] + b - c; + + if (a == 0.f) { + // it's a flat line + *x = 0; + *value = c; + } else { + const float pos = -b / (2.f * a); + *x = pos; + *value = a * pos * pos + b * pos + c; + } +} + +static void quadratic_interpolation_s16( + const int32_t* y_values, float* x, int32_t* value) +{ + const float b = (y_values[2] - y_values[0]) * 0.5f; + const float c = y_values[1]; + const float a = y_values[0] + b - c; + + if (a == 0.f) { + // it's a flat line + *x = 0; + *value = c; + } else { + const float pos = -b / (2.f * a); + *x = pos; + *value = a * pos * pos + b * pos + c; + } +} static int best_overlap_offset_float(struct priv *s) { - float best_corr = INT_MIN; - int best_off = 0; - - float *pw = s->table_window; - float *po = s->buf_overlap; - po += s->num_channels; - float *ppc = s->buf_pre_corr; - for (int i = s->num_channels; i < s->samples_overlap; i++) - *ppc++ = *pw++ **po++; - - float *search_start = (float *)s->buf_queue + s->num_channels; - for (int off = 0; off < s->frames_search; off++) { - float corr = 0; - float *ps = search_start; - ppc = s->buf_pre_corr; - for (int i = s->num_channels; i < s->samples_overlap; i++) - corr += *ppc++ **ps++; - if (corr > best_corr) { - best_corr = corr; - best_off = off; + int num_channels = s->num_channels, frames_search = s->frames_search; + float *source = (float *)s->buf_queue + num_channels; + float *target = (float *)s->buf_overlap + num_channels; + int num_samples = s->samples_overlap - num_channels; + int step_size = 3; + float history[3] = {}; + + float best_distance = FLT_MAX; + int best_offset_approx = 0; + for (int offset = 0; offset < frames_search; offset += step_size) { + float distance = 0; + for (int i = 0; i < num_samples; i++) + distance += fabsf(target[i] - source[offset * num_channels + i]); + + int offset_approx = offset; + history[0] = history[1]; + history[1] = history[2]; + history[2] = distance; + if(offset >= 2 && history[0] >= history[1] && history[1] <= history[2]) { + float extremum; + quadratic_interpolation_float(history, &extremum, &distance); + offset_approx = offset - step_size + (int)(extremum * step_size + 0.5f); + } + + if (distance < best_distance) { + best_distance = distance; + best_offset_approx = offset_approx; + } + } + + best_distance = FLT_MAX; + int best_offset = 0; + int min_offset = MPMAX(0, best_offset_approx - step_size + 1); + int max_offset = MPMIN(frames_search, best_offset_approx + step_size); + for (int offset = min_offset; offset < max_offset; offset++) { + float distance = 0; + for (int i = 0; i < num_samples; i++) + distance += fabsf(target[i] - source[offset * num_channels + i]); + if (distance < best_distance) { + best_distance = distance; + best_offset = offset; } - search_start += s->num_channels; } - return best_off * 4 * s->num_channels; + return best_offset * 4 * num_channels; } static int best_overlap_offset_s16(struct priv *s) { - int64_t best_corr = INT64_MIN; - int best_off = 0; - - int32_t *pw = s->table_window; - int16_t *po = s->buf_overlap; - po += s->num_channels; - int32_t *ppc = s->buf_pre_corr; - for (long i = s->num_channels; i < s->samples_overlap; i++) - *ppc++ = (*pw++ **po++) >> 15; - - int16_t *search_start = (int16_t *)s->buf_queue + s->num_channels; - for (int off = 0; off < s->frames_search; off++) { - int64_t corr = 0; - int16_t *ps = search_start; - ppc = s->buf_pre_corr; - ppc += s->samples_overlap - s->num_channels; - ps += s->samples_overlap - s->num_channels; - long i = -(s->samples_overlap - s->num_channels); - do { - corr += ppc[i + 0] * (int64_t)ps[i + 0]; - corr += ppc[i + 1] * (int64_t)ps[i + 1]; - corr += ppc[i + 2] * (int64_t)ps[i + 2]; - corr += ppc[i + 3] * (int64_t)ps[i + 3]; - i += 4; - } while (i < 0); - if (corr > best_corr) { - best_corr = corr; - best_off = off; + int num_channels = s->num_channels, frames_search = s->frames_search; + int16_t *source = (int16_t *)s->buf_queue + num_channels; + int16_t *target = (int16_t *)s->buf_overlap + num_channels; + int num_samples = s->samples_overlap - num_channels; + int step_size = 3; + int32_t history[3] = {}; + + int32_t best_distance = INT32_MAX; + int best_offset_approx = 0; + for (int offset = 0; offset < frames_search; offset += step_size) { + int32_t distance = 0; + for (int i = 0; i < num_samples; i++) + distance += abs((int32_t)target[i] - source[offset * num_channels + i]); + + int offset_approx = offset; + history[0] = history[1]; + history[1] = history[2]; + history[2] = distance; + if(offset >= 2 && history[0] >= history[1] && history[1] <= history[2]) { + float extremum; + quadratic_interpolation_s16(history, &extremum, &distance); + offset_approx = offset - step_size + (int)(extremum * step_size + 0.5f); + } + + if (distance < best_distance) { + best_distance = distance; + best_offset_approx = offset_approx; } - search_start += s->num_channels; } - return best_off * 2 * s->num_channels; + best_distance = INT32_MAX; + int best_offset = 0; + int min_offset = MPMAX(0, best_offset_approx - step_size + 1); + int max_offset = MPMIN(frames_search, best_offset_approx + step_size); + for (int offset = min_offset; offset < max_offset; offset++) { + int32_t distance = 0; + for (int i = 0; i < num_samples; i++) + distance += abs((int32_t)target[i] - source[offset * num_channels + i]); + if (distance < best_distance) { + best_distance = distance; + best_offset = offset; + } + } + + return best_offset * 2 * s->num_channels; } static void output_overlap_float(struct priv *s, void *buf_out, @@ -211,8 +282,9 @@ static void output_overlap_float(struct priv *s, void *buf_out, float *po = s->buf_overlap; float *pin = (float *)(s->buf_queue + bytes_off); for (int i = 0; i < s->samples_overlap; i++) { - *pout++ = *po - *pb++ *(*po - *pin++); - po++; + // the math is equal to *po * (1 - *pb) + *pin * *pb + float o = *po++; + *pout++ = o - *pb++ * (o - *pin++); } } @@ -224,12 +296,13 @@ static void output_overlap_s16(struct priv *s, void *buf_out, int16_t *po = s->buf_overlap; int16_t *pin = (int16_t *)(s->buf_queue + bytes_off); for (int i = 0; i < s->samples_overlap; i++) { - *pout++ = *po - ((*pb++ *(*po - *pin++)) >> 16); - po++; + // the math is equal to *po * (1 - *pb) + *pin * *pb + int32_t o = *po++; + *pout++ = o - ((*pb++ *(o - *pin++)) >> 16); } } -static void process(struct mp_filter *f) +static void af_scaletempo_process(struct mp_filter *f) { struct priv *s = f->priv; @@ -400,7 +473,7 @@ static bool reinit(struct mp_filter *f) update_speed(s, s->speed); - int frames_overlap = s->frames_stride * s->opts->percent_overlap; + int frames_overlap = s->frames_stride * s->opts->factor_overlap; if (frames_overlap <= 0) { s->bytes_standing = s->bytes_stride; s->samples_standing = s->bytes_standing / bps; @@ -420,18 +493,20 @@ static bool reinit(struct mp_filter *f) memset(s->buf_overlap, 0, s->bytes_overlap); if (use_int) { int32_t *pb = s->table_blend; - int64_t blend = 0; + const float scale = M_PI / frames_overlap; for (int i = 0; i < frames_overlap; i++) { - int32_t v = blend / frames_overlap; + // Hann function + const int32_t v = 0.5f * (1.0f - cosf(i * scale)) * 65536 + 0.5; for (int j = 0; j < nch; j++) *pb++ = v; - blend += 65536; // 2^16 } s->output_overlap = output_overlap_s16; } else { float *pb = s->table_blend; + const float scale = M_PI / frames_overlap; for (int i = 0; i < frames_overlap; i++) { - float v = i / (float)frames_overlap; + // Hann function + const float v = 0.5f * (1.0f - cosf(i * scale)); for (int j = 0; j < nch; j++) *pb++ = v; } @@ -444,39 +519,8 @@ static bool reinit(struct mp_filter *f) s->best_overlap_offset = NULL; else { if (use_int) { - int64_t t = frames_overlap; - int32_t n = 8589934588LL / (t * t); // 4 * (2^31 - 1) / t^2 - s->buf_pre_corr = realloc(s->buf_pre_corr, - s->bytes_overlap * 2 + UNROLL_PADDING); - s->table_window = realloc(s->table_window, - s->bytes_overlap * 2 - nch * bps * 2); - if (!s->buf_pre_corr || !s->table_window) { - MP_FATAL(f, "Out of memory\n"); - return false; - } - memset((char *)s->buf_pre_corr + s->bytes_overlap * 2, 0, - UNROLL_PADDING); - int32_t *pw = s->table_window; - for (int i = 1; i < frames_overlap; i++) { - int32_t v = (i * (t - i) * n) >> 15; - for (int j = 0; j < nch; j++) - *pw++ = v; - } s->best_overlap_offset = best_overlap_offset_s16; } else { - s->buf_pre_corr = realloc(s->buf_pre_corr, s->bytes_overlap); - s->table_window = realloc(s->table_window, - s->bytes_overlap - nch * bps); - if (!s->buf_pre_corr || !s->table_window) { - MP_FATAL(f, "Out of memory\n"); - return false; - } - float *pw = s->table_window; - for (int i = 1; i < frames_overlap; i++) { - float v = i * (frames_overlap - i); - for (int j = 0; j < nch; j++) - *pw++ = v; - } s->best_overlap_offset = best_overlap_offset_float; } } @@ -486,7 +530,7 @@ static bool reinit(struct mp_filter *f) s->bytes_queue = (s->frames_search + s->frames_stride + frames_overlap) * bps * nch; - s->buf_queue = realloc(s->buf_queue, s->bytes_queue + UNROLL_PADDING); + s->buf_queue = realloc(s->buf_queue, s->bytes_queue); if (!s->buf_queue) { MP_FATAL(f, "Out of memory\n"); return false; @@ -511,7 +555,7 @@ static bool reinit(struct mp_filter *f) return true; } -static bool command(struct mp_filter *f, struct mp_filter_command *cmd) +static bool af_scaletempo_command(struct mp_filter *f, struct mp_filter_command *cmd) { struct priv *s = f->priv; @@ -530,7 +574,7 @@ static bool command(struct mp_filter *f, struct mp_filter_command *cmd) return false; } -static void reset(struct mp_filter *f) +static void af_scaletempo_reset(struct mp_filter *f) { struct priv *s = f->priv; @@ -543,14 +587,12 @@ static void reset(struct mp_filter *f) TA_FREEP(&s->in); } -static void destroy(struct mp_filter *f) +static void af_scaletempo_destroy(struct mp_filter *f) { struct priv *s = f->priv; free(s->buf_queue); free(s->buf_overlap); - free(s->buf_pre_corr); free(s->table_blend); - free(s->table_window); TA_FREEP(&s->in); mp_filter_free_children(f); } @@ -558,10 +600,10 @@ static void destroy(struct mp_filter *f) static const struct mp_filter_info af_scaletempo_filter = { .name = "scaletempo", .priv_size = sizeof(struct priv), - .process = process, - .command = command, - .reset = reset, - .destroy = destroy, + .process = af_scaletempo_process, + .command = af_scaletempo_command, + .reset = af_scaletempo_reset, + .destroy = af_scaletempo_destroy, }; static struct mp_filter *af_scaletempo_create(struct mp_filter *parent, @@ -604,7 +646,7 @@ const struct mp_user_filter_entry af_scaletempo = { .priv_size = sizeof(OPT_BASE_STRUCT), .priv_defaults = &(const OPT_BASE_STRUCT) { .ms_stride = 60, - .percent_overlap = .20, + .factor_overlap = .20, .ms_search = 14, .speed_opt = SCALE_TEMPO, .scale_nominal = 1.0, @@ -612,7 +654,7 @@ const struct mp_user_filter_entry af_scaletempo = { .options = (const struct m_option[]) { {"scale", OPT_FLOAT(scale_nominal), M_RANGE(0.01, DBL_MAX)}, {"stride", OPT_FLOAT(ms_stride), M_RANGE(0.01, DBL_MAX)}, - {"overlap", OPT_FLOAT(percent_overlap), M_RANGE(0, 1)}, + {"overlap", OPT_FLOAT(factor_overlap), M_RANGE(0, 1)}, {"search", OPT_FLOAT(ms_search), M_RANGE(0, DBL_MAX)}, {"speed", OPT_CHOICE(speed_opt, {"pitch", SCALE_PITCH}, diff --git a/audio/filter/af_scaletempo2.c b/audio/filter/af_scaletempo2.c index 1a822ecd50..749e219454 100644 --- a/audio/filter/af_scaletempo2.c +++ b/audio/filter/af_scaletempo2.c @@ -8,21 +8,20 @@ #include "options/m_option.h" struct priv { - struct mp_scaletempo2 data; + struct mp_scaletempo2 *data; struct mp_pin *in_pin; struct mp_aframe *cur_format; struct mp_aframe_pool *out_pool; bool sent_final; struct mp_aframe *pending; bool initialized; - double frame_delay; float speed; }; static bool init_scaletempo2(struct mp_filter *f); -static void reset(struct mp_filter *f); +static void af_scaletempo2_reset(struct mp_filter *f); -static void process(struct mp_filter *f) +static void af_scaletempo2_process(struct mp_filter *f) { struct priv *p = f->priv; @@ -30,7 +29,7 @@ static void process(struct mp_filter *f) return; while (!p->initialized || !p->pending || - !mp_scaletempo2_frames_available(&p->data)) + !mp_scaletempo2_frames_available(p->data, p->speed)) { bool eof = false; if (!p->pending || !mp_aframe_get_size(p->pending)) { @@ -65,14 +64,16 @@ static void process(struct mp_filter *f) if (p->pending && !format_change && !p->sent_final) { int frame_size = mp_aframe_get_size(p->pending); uint8_t **planes = mp_aframe_get_data_ro(p->pending); - int read = mp_scaletempo2_fill_input_buffer(&p->data, - planes, frame_size, final); - p->frame_delay += read; + int read = mp_scaletempo2_fill_input_buffer(p->data, + planes, frame_size, p->speed); mp_aframe_skip_samples(p->pending, read); } - p->sent_final |= final; + if (final && p->pending && !p->sent_final) { + mp_scaletempo2_set_final(p->data); + p->sent_final = true; + } - if (mp_scaletempo2_frames_available(&p->data)) { + if (mp_scaletempo2_frames_available(p->data, p->speed)) { if (eof) { mp_pin_out_repeat_eof(p->in_pin); // drain more next time } @@ -82,18 +83,15 @@ static void process(struct mp_filter *f) if (eof) { mp_pin_in_write(f->ppins[1], MP_EOF_FRAME); return; - } else if (format_change) { - // go on with proper reinit on the next iteration - p->initialized = false; - p->sent_final = false; } + // for format change go on with proper reinit on the next iteration } } assert(p->pending); - if (mp_scaletempo2_frames_available(&p->data)) { + if (mp_scaletempo2_frames_available(p->data, p->speed)) { struct mp_aframe *out = mp_aframe_new_ref(p->cur_format); - int out_samples = p->data.ola_hop_size; + int out_samples = p->data->ola_hop_size; if (mp_aframe_pool_allocate(p->out_pool, out, out_samples) < 0) { talloc_free(out); goto error; @@ -103,17 +101,30 @@ static void process(struct mp_filter *f) uint8_t **planes = mp_aframe_get_data_rw(out); assert(planes); - assert(mp_aframe_get_planes(out) == p->data.channels); + assert(mp_aframe_get_planes(out) == p->data->channels); - out_samples = mp_scaletempo2_fill_buffer(&p->data, + out_samples = mp_scaletempo2_fill_buffer(p->data, (float**)planes, out_samples, p->speed); double pts = mp_aframe_get_pts(p->pending); - p->frame_delay -= out_samples * p->speed; - if (pts != MP_NOPTS_VALUE) { - double delay = p->frame_delay / mp_aframe_get_effective_rate(out); - mp_aframe_set_pts(out, pts - delay); + double frame_delay = mp_scaletempo2_get_latency(p->data, p->speed) + + out_samples * p->speed; + mp_aframe_set_pts(out, pts - frame_delay / mp_aframe_get_effective_rate(out)); + + if (p->sent_final) { + double remain_pts = pts - mp_aframe_get_pts(out); + double rate = mp_aframe_get_effective_rate(out) / p->speed; + int max_samples = MPMAX(0, (int) (remain_pts * rate)); + // truncate final packet to expected length + if (out_samples >= max_samples) { + out_samples = max_samples; + + // reset the filter to ensure it stops generating audio + // and mp_scaletempo2_frames_available returns false + mp_scaletempo2_reset(p->data); + } + } } mp_aframe_set_size(out, out_samples); @@ -137,16 +148,15 @@ static bool init_scaletempo2(struct mp_filter *f) mp_aframe_reset(p->cur_format); p->initialized = true; p->sent_final = false; - p->frame_delay = 0; mp_aframe_config_copy(p->cur_format, p->pending); - mp_scaletempo2_init(&p->data, mp_aframe_get_channels(p->pending), + mp_scaletempo2_init(p->data, mp_aframe_get_channels(p->pending), mp_aframe_get_rate(p->pending)); return true; } -static bool command(struct mp_filter *f, struct mp_filter_command *cmd) +static bool af_scaletempo2_command(struct mp_filter *f, struct mp_filter_command *cmd) { struct priv *p = f->priv; @@ -159,29 +169,28 @@ static bool command(struct mp_filter *f, struct mp_filter_command *cmd) return false; } -static void reset(struct mp_filter *f) +static void af_scaletempo2_reset(struct mp_filter *f) { struct priv *p = f->priv; - mp_scaletempo2_reset(&p->data); - p->frame_delay = 0; + mp_scaletempo2_reset(p->data); p->initialized = false; TA_FREEP(&p->pending); } -static void destroy(struct mp_filter *f) +static void af_scaletempo2_destroy(struct mp_filter *f) { struct priv *p = f->priv; - mp_scaletempo2_destroy(&p->data); - talloc_free(p->pending); + TA_FREEP(&p->data); + TA_FREEP(&p->pending); } static const struct mp_filter_info af_scaletempo2_filter = { .name = "scaletempo2", .priv_size = sizeof(struct priv), - .process = process, - .command = command, - .reset = reset, - .destroy = destroy, + .process = af_scaletempo2_process, + .command = af_scaletempo2_command, + .reset = af_scaletempo2_reset, + .destroy = af_scaletempo2_destroy, }; static struct mp_filter *af_scaletempo2_create( @@ -197,7 +206,8 @@ static struct mp_filter *af_scaletempo2_create( mp_filter_add_pin(f, MP_PIN_OUT, "out"); struct priv *p = f->priv; - p->data.opts = talloc_steal(p, options); + p->data = talloc_zero(p, struct mp_scaletempo2); + p->data->opts = talloc_steal(p, options); p->speed = 1.0; p->cur_format = talloc_steal(p, mp_aframe_create()); p->out_pool = mp_aframe_pool_create(p); @@ -225,9 +235,9 @@ const struct mp_user_filter_entry af_scaletempo2 = { .priv_size = sizeof(OPT_BASE_STRUCT), .priv_defaults = &(const OPT_BASE_STRUCT) { .min_playback_rate = 0.25, - .max_playback_rate = 4.0, - .ola_window_size_ms = 20, - .wsola_search_interval_ms = 30, + .max_playback_rate = 8.0, + .ola_window_size_ms = 12, + .wsola_search_interval_ms = 40, }, .options = (const struct m_option[]) { {"search-interval", diff --git a/audio/filter/af_scaletempo2_internals.c b/audio/filter/af_scaletempo2_internals.c index d7c0677c45..7f3a99638f 100644 --- a/audio/filter/af_scaletempo2_internals.c +++ b/audio/filter/af_scaletempo2_internals.c @@ -41,19 +41,15 @@ static bool in_interval(int n, struct interval q) return n >= q.lo && n <= q.hi; } -static float **realloc_2d(float **p, int x, int y) +static void alloc_sample_buffer(struct mp_scaletempo2 *p, float ***ptr, size_t size) { - float **array = realloc(p, sizeof(float*) * x + sizeof(float) * x * y); - float* data = (float*) (array + x); - for (int i = 0; i < x; ++i) { - array[i] = data + i * y; - } - return array; -} + talloc_free(*ptr); -static void zero_2d(float **a, int x, int y) -{ - memset(a + x, 0, sizeof(float) * x * y); + float **buff = talloc_array(p, float*, p->channels); + for (int i = 0; i < p->channels; ++i) { + buff[i] = talloc_array(buff, float, size); + } + *ptr = buff; } static void zero_2d_partial(float **a, int x, int y) @@ -93,15 +89,15 @@ static void multi_channel_moving_block_energies( } static float multi_channel_similarity_measure( - const float* dot_prod_a_b, - const float* energy_a, const float* energy_b, + const float* dot_prod, + const float* energy_target, const float* energy_candidate, int channels) { const float epsilon = 1e-12f; float similarity_measure = 0.0f; for (int n = 0; n < channels; ++n) { - similarity_measure += dot_prod_a_b[n] - / sqrtf(energy_a[n] * energy_b[n] + epsilon); + similarity_measure += dot_prod[n] * energy_target[n] + / sqrtf(energy_target[n] * energy_candidate[n] + epsilon); } return similarity_measure; } @@ -421,18 +417,15 @@ |