summaryrefslogtreecommitdiffstats
path: root/audio/filter
diff options
context:
space:
mode:
Diffstat (limited to 'audio/filter')
-rw-r--r--audio/filter/af_drop.c18
-rw-r--r--audio/filter/af_format.c8
-rw-r--r--audio/filter/af_lavcac3enc.c38
-rw-r--r--audio/filter/af_rubberband.c18
-rw-r--r--audio/filter/af_scaletempo.c262
-rw-r--r--audio/filter/af_scaletempo2.c88
-rw-r--r--audio/filter/af_scaletempo2_internals.c237
-rw-r--r--audio/filter/af_scaletempo2_internals.h38
8 files changed, 413 insertions, 294 deletions
diff --git a/audio/filter/af_drop.c b/audio/filter/af_drop.c
index 724c482720..499389dd2b 100644
--- a/audio/filter/af_drop.c
+++ b/audio/filter/af_drop.c
@@ -11,7 +11,7 @@ struct priv {
struct mp_aframe *last; // for repeating
};
-static void process(struct mp_filter *f)
+static void af_drop_process(struct mp_filter *f)
{
struct priv *p = f->priv;
@@ -52,7 +52,7 @@ static void process(struct mp_filter *f)
mp_pin_in_write(f->ppins[1], frame);
}
-static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
+static bool af_drop_command(struct mp_filter *f, struct mp_filter_command *cmd)
{
struct priv *p = f->priv;
@@ -65,7 +65,7 @@ static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
return false;
}
-static void reset(struct mp_filter *f)
+static void af_drop_reset(struct mp_filter *f)
{
struct priv *p = f->priv;
@@ -73,18 +73,18 @@ static void reset(struct mp_filter *f)
p->diff = 0;
}
-static void destroy(struct mp_filter *f)
+static void af_drop_destroy(struct mp_filter *f)
{
- reset(f);
+ af_drop_reset(f);
}
static const struct mp_filter_info af_drop_filter = {
.name = "drop",
.priv_size = sizeof(struct priv),
- .process = process,
- .command = command,
- .reset = reset,
- .destroy = destroy,
+ .process = af_drop_process,
+ .command = af_drop_command,
+ .reset = af_drop_reset,
+ .destroy = af_drop_destroy,
};
static struct mp_filter *af_drop_create(struct mp_filter *parent, void *options)
diff --git a/audio/filter/af_format.c b/audio/filter/af_format.c
index 88ae99ed56..eddce6422f 100644
--- a/audio/filter/af_format.c
+++ b/audio/filter/af_format.c
@@ -30,7 +30,7 @@ struct f_opts {
int out_srate;
struct m_channels out_channels;
- int fail;
+ bool fail;
};
struct priv {
@@ -38,7 +38,7 @@ struct priv {
struct mp_pin *in_pin;
};
-static void process(struct mp_filter *f)
+static void af_format_process(struct mp_filter *f)
{
struct priv *p = f->priv;
@@ -85,7 +85,7 @@ error:
static const struct mp_filter_info af_format_filter = {
.name = "format",
.priv_size = sizeof(struct priv),
- .process = process,
+ .process = af_format_process,
};
static struct mp_filter *af_format_create(struct mp_filter *parent,
@@ -135,7 +135,7 @@ const struct mp_user_filter_entry af_format = {
{"out-srate", OPT_INT(out_srate), M_RANGE(1000, 8*48000)},
{"out-channels", OPT_CHANNELS(out_channels),
.flags = M_OPT_CHANNELS_LIMITED},
- {"fail", OPT_FLAG(fail)},
+ {"fail", OPT_BOOL(fail)},
{0}
},
},
diff --git a/audio/filter/af_lavcac3enc.c b/audio/filter/af_lavcac3enc.c
index 86c34a1278..def9700d18 100644
--- a/audio/filter/af_lavcac3enc.c
+++ b/audio/filter/af_lavcac3enc.c
@@ -50,13 +50,13 @@
#define AC3_MAX_CHANNELS 6
#define AC3_MAX_CODED_FRAME_SIZE 3840
#define AC3_FRAME_SIZE (6 * 256)
-const static uint16_t ac3_bitrate_tab[19] = {
+static const uint16_t ac3_bitrate_tab[19] = {
32, 40, 48, 56, 64, 80, 96, 112, 128,
160, 192, 224, 256, 320, 384, 448, 512, 576, 640
};
struct f_opts {
- int add_iec61937_header;
+ bool add_iec61937_header;
int bit_rate;
int min_channel_num;
char *encoder;
@@ -103,7 +103,15 @@ static bool reinit(struct mp_filter *f)
if (!bit_rate && chmap.num < AC3_MAX_CHANNELS + 1)
bit_rate = default_bit_rate[chmap.num];
- avcodec_close(s->lavc_actx);
+ avcodec_free_context(&s->lavc_actx);
+ s->lavc_actx = avcodec_alloc_context3(s->lavc_acodec);
+ if (!s->lavc_actx) {
+ MP_ERR(f, "Audio LAVC, couldn't reallocate context!\n");
+ return false;
+ }
+
+ if (mp_set_avopts(f->log, s->lavc_actx, s->opts->avopts) < 0)
+ return false;
// Put sample parameters
s->lavc_actx->sample_fmt = af_to_avformat(format);
@@ -131,18 +139,18 @@ static bool reinit(struct mp_filter *f)
return true;
}
-static void reset(struct mp_filter *f)
+static void af_lavcac3enc_reset(struct mp_filter *f)
{
struct priv *s = f->priv;
TA_FREEP(&s->in_frame);
}
-static void destroy(struct mp_filter *f)
+static void af_lavcac3enc_destroy(struct mp_filter *f)
{
struct priv *s = f->priv;
- reset(f);
+ af_lavcac3enc_reset(f);
av_packet_free(&s->lavc_pkt);
avcodec_free_context(&s->lavc_actx);
}
@@ -153,7 +161,7 @@ static void swap_16(uint16_t *ptr, size_t size)
ptr[n] = av_bswap16(ptr[n]);
}
-static void process(struct mp_filter *f)
+static void af_lavcac3enc_process(struct mp_filter *f)
{
struct priv *s = f->priv;
@@ -187,9 +195,6 @@ static void process(struct mp_filter *f)
case MP_FRAME_AUDIO:
TA_FREEP(&s->in_frame);
s->in_frame = input.data;
- frame = mp_frame_to_av(input, NULL);
- if (!frame)
- goto error;
if (mp_aframe_get_channels(s->in_frame) < s->opts->min_channel_num) {
// Just pass it through.
s->in_frame = NULL;
@@ -200,6 +205,9 @@ static void process(struct mp_filter *f)
if (!reinit(f))
goto error;
}
+ frame = mp_frame_to_av(input, NULL);
+ if (!frame)
+ goto error;
break;
default: goto error; // unexpected packet type
}
@@ -273,9 +281,9 @@ error:
static const struct mp_filter_info af_lavcac3enc_filter = {
.name = "lavcac3enc",
.priv_size = sizeof(struct priv),
- .process = process,
- .reset = reset,
- .destroy = destroy,
+ .process = af_lavcac3enc_process,
+ .reset = af_lavcac3enc_reset,
+ .destroy = af_lavcac3enc_destroy,
};
static void add_chmaps_to_autoconv(struct mp_filter *f,
@@ -418,13 +426,13 @@ const struct mp_user_filter_entry af_lavcac3enc = {
.name = "lavcac3enc",
.priv_size = sizeof(OPT_BASE_STRUCT),
.priv_defaults = &(const OPT_BASE_STRUCT) {
- .add_iec61937_header = 1,
+ .add_iec61937_header = true,
.bit_rate = 640,
.min_channel_num = 3,
.encoder = "ac3",
},
.options = (const struct m_option[]) {
- {"tospdif", OPT_FLAG(add_iec61937_header)},
+ {"tospdif", OPT_BOOL(add_iec61937_header)},
{"bitrate", OPT_CHOICE(bit_rate,
{"auto", 0}, {"default", 0}), M_RANGE(32, 640)},
{"minch", OPT_INT(min_channel_num), M_RANGE(2, 6)},
diff --git a/audio/filter/af_rubberband.c b/audio/filter/af_rubberband.c
index 40b0c7188b..e71937fcb2 100644
--- a/audio/filter/af_rubberband.c
+++ b/audio/filter/af_rubberband.c
@@ -80,7 +80,7 @@ static bool init_rubberband(struct mp_filter *f)
int opts = p->opts->transients | p->opts->detector | p->opts->phase |
p->opts->window | p->opts->smoothing | p->opts->formant |
- p->opts->pitch | p->opts->channels |
+ p->opts->pitch | p->opts->channels |
#if HAVE_RUBBERBAND_3
p->opts->engine |
#endif
@@ -105,7 +105,7 @@ static bool init_rubberband(struct mp_filter *f)
return true;
}
-static void process(struct mp_filter *f)
+static void af_rubberband_process(struct mp_filter *f)
{
struct priv *p = f->priv;
@@ -233,7 +233,7 @@ error:
mp_filter_internal_mark_failed(f);
}
-static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
+static bool af_rubberband_command(struct mp_filter *f, struct mp_filter_command *cmd)
{
struct priv *p = f->priv;
@@ -263,7 +263,7 @@ static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
return false;
}
-static void reset(struct mp_filter *f)
+static void af_rubberband_reset(struct mp_filter *f)
{
struct priv *p = f->priv;
@@ -274,7 +274,7 @@ static void reset(struct mp_filter *f)
TA_FREEP(&p->pending);
}
-static void destroy(struct mp_filter *f)
+static void af_rubberband_destroy(struct mp_filter *f)
{
struct priv *p = f->priv;
@@ -286,10 +286,10 @@ static void destroy(struct mp_filter *f)
static const struct mp_filter_info af_rubberband_filter = {
.name = "rubberband",
.priv_size = sizeof(struct priv),
- .process = process,
- .command = command,
- .reset = reset,
- .destroy = destroy,
+ .process = af_rubberband_process,
+ .command = af_rubberband_command,
+ .reset = af_rubberband_reset,
+ .destroy = af_rubberband_destroy,
};
static struct mp_filter *af_rubberband_create(struct mp_filter *parent,
diff --git a/audio/filter/af_scaletempo.c b/audio/filter/af_scaletempo.c
index 8675c9a50d..482b91209e 100644
--- a/audio/filter/af_scaletempo.c
+++ b/audio/filter/af_scaletempo.c
@@ -2,7 +2,7 @@
* scaletempo audio filter
*
* scale tempo while maintaining pitch
- * (WSOLA technique with cross correlation)
+ * (WSOLA technique with taxicab distance)
* inspired by SoundTouch library by Olli Parviainen
*
* basic algorithm
@@ -35,6 +35,7 @@
#include <string.h>
#include <limits.h>
#include <assert.h>
+#include <math.h>
#include "audio/aframe.h"
#include "audio/format.h"
@@ -48,7 +49,7 @@ struct f_opts {
float scale_nominal;
float ms_stride;
float ms_search;
- float percent_overlap;
+ float factor_overlap;
#define SCALE_TEMPO 1
#define SCALE_PITCH 2
int speed_opt;
@@ -87,8 +88,6 @@ struct priv {
// best overlap
int frames_search;
int num_channels;
- void *buf_pre_corr;
- void *table_window;
int (*best_overlap_offset)(struct priv *s);
};
@@ -135,72 +134,144 @@ static bool fill_queue(struct priv *s)
return bytes_needed == 0;
}
-#define UNROLL_PADDING (4 * 4)
+// Fit the curve f(x) = a * x^2 + b * x + c such that
+// f(-1) = y[0]
+// f(0) = y[1]
+// f(1) = y[2]
+// and return the extremum position and value
+// assuming y[0] <= y[1] >= y[2] || y[0] >= y[1] <= y[2]
+static void quadratic_interpolation_float(
+ const float* y_values, float* x, float* value)
+{
+ const float b = (y_values[2] - y_values[0]) * 0.5f;
+ const float c = y_values[1];
+ const float a = y_values[0] + b - c;
+
+ if (a == 0.f) {
+ // it's a flat line
+ *x = 0;
+ *value = c;
+ } else {
+ const float pos = -b / (2.f * a);
+ *x = pos;
+ *value = a * pos * pos + b * pos + c;
+ }
+}
+
+static void quadratic_interpolation_s16(
+ const int32_t* y_values, float* x, int32_t* value)
+{
+ const float b = (y_values[2] - y_values[0]) * 0.5f;
+ const float c = y_values[1];
+ const float a = y_values[0] + b - c;
+
+ if (a == 0.f) {
+ // it's a flat line
+ *x = 0;
+ *value = c;
+ } else {
+ const float pos = -b / (2.f * a);
+ *x = pos;
+ *value = a * pos * pos + b * pos + c;
+ }
+}
static int best_overlap_offset_float(struct priv *s)
{
- float best_corr = INT_MIN;
- int best_off = 0;
-
- float *pw = s->table_window;
- float *po = s->buf_overlap;
- po += s->num_channels;
- float *ppc = s->buf_pre_corr;
- for (int i = s->num_channels; i < s->samples_overlap; i++)
- *ppc++ = *pw++ **po++;
-
- float *search_start = (float *)s->buf_queue + s->num_channels;
- for (int off = 0; off < s->frames_search; off++) {
- float corr = 0;
- float *ps = search_start;
- ppc = s->buf_pre_corr;
- for (int i = s->num_channels; i < s->samples_overlap; i++)
- corr += *ppc++ **ps++;
- if (corr > best_corr) {
- best_corr = corr;
- best_off = off;
+ int num_channels = s->num_channels, frames_search = s->frames_search;
+ float *source = (float *)s->buf_queue + num_channels;
+ float *target = (float *)s->buf_overlap + num_channels;
+ int num_samples = s->samples_overlap - num_channels;
+ int step_size = 3;
+ float history[3] = {};
+
+ float best_distance = FLT_MAX;
+ int best_offset_approx = 0;
+ for (int offset = 0; offset < frames_search; offset += step_size) {
+ float distance = 0;
+ for (int i = 0; i < num_samples; i++)
+ distance += fabsf(target[i] - source[offset * num_channels + i]);
+
+ int offset_approx = offset;
+ history[0] = history[1];
+ history[1] = history[2];
+ history[2] = distance;
+ if(offset >= 2 && history[0] >= history[1] && history[1] <= history[2]) {
+ float extremum;
+ quadratic_interpolation_float(history, &extremum, &distance);
+ offset_approx = offset - step_size + (int)(extremum * step_size + 0.5f);
+ }
+
+ if (distance < best_distance) {
+ best_distance = distance;
+ best_offset_approx = offset_approx;
+ }
+ }
+
+ best_distance = FLT_MAX;
+ int best_offset = 0;
+ int min_offset = MPMAX(0, best_offset_approx - step_size + 1);
+ int max_offset = MPMIN(frames_search, best_offset_approx + step_size);
+ for (int offset = min_offset; offset < max_offset; offset++) {
+ float distance = 0;
+ for (int i = 0; i < num_samples; i++)
+ distance += fabsf(target[i] - source[offset * num_channels + i]);
+ if (distance < best_distance) {
+ best_distance = distance;
+ best_offset = offset;
}
- search_start += s->num_channels;
}
- return best_off * 4 * s->num_channels;
+ return best_offset * 4 * num_channels;
}
static int best_overlap_offset_s16(struct priv *s)
{
- int64_t best_corr = INT64_MIN;
- int best_off = 0;
-
- int32_t *pw = s->table_window;
- int16_t *po = s->buf_overlap;
- po += s->num_channels;
- int32_t *ppc = s->buf_pre_corr;
- for (long i = s->num_channels; i < s->samples_overlap; i++)
- *ppc++ = (*pw++ **po++) >> 15;
-
- int16_t *search_start = (int16_t *)s->buf_queue + s->num_channels;
- for (int off = 0; off < s->frames_search; off++) {
- int64_t corr = 0;
- int16_t *ps = search_start;
- ppc = s->buf_pre_corr;
- ppc += s->samples_overlap - s->num_channels;
- ps += s->samples_overlap - s->num_channels;
- long i = -(s->samples_overlap - s->num_channels);
- do {
- corr += ppc[i + 0] * (int64_t)ps[i + 0];
- corr += ppc[i + 1] * (int64_t)ps[i + 1];
- corr += ppc[i + 2] * (int64_t)ps[i + 2];
- corr += ppc[i + 3] * (int64_t)ps[i + 3];
- i += 4;
- } while (i < 0);
- if (corr > best_corr) {
- best_corr = corr;
- best_off = off;
+ int num_channels = s->num_channels, frames_search = s->frames_search;
+ int16_t *source = (int16_t *)s->buf_queue + num_channels;
+ int16_t *target = (int16_t *)s->buf_overlap + num_channels;
+ int num_samples = s->samples_overlap - num_channels;
+ int step_size = 3;
+ int32_t history[3] = {};
+
+ int32_t best_distance = INT32_MAX;
+ int best_offset_approx = 0;
+ for (int offset = 0; offset < frames_search; offset += step_size) {
+ int32_t distance = 0;
+ for (int i = 0; i < num_samples; i++)
+ distance += abs((int32_t)target[i] - source[offset * num_channels + i]);
+
+ int offset_approx = offset;
+ history[0] = history[1];
+ history[1] = history[2];
+ history[2] = distance;
+ if(offset >= 2 && history[0] >= history[1] && history[1] <= history[2]) {
+ float extremum;
+ quadratic_interpolation_s16(history, &extremum, &distance);
+ offset_approx = offset - step_size + (int)(extremum * step_size + 0.5f);
+ }
+
+ if (distance < best_distance) {
+ best_distance = distance;
+ best_offset_approx = offset_approx;
}
- search_start += s->num_channels;
}
- return best_off * 2 * s->num_channels;
+ best_distance = INT32_MAX;
+ int best_offset = 0;
+ int min_offset = MPMAX(0, best_offset_approx - step_size + 1);
+ int max_offset = MPMIN(frames_search, best_offset_approx + step_size);
+ for (int offset = min_offset; offset < max_offset; offset++) {
+ int32_t distance = 0;
+ for (int i = 0; i < num_samples; i++)
+ distance += abs((int32_t)target[i] - source[offset * num_channels + i]);
+ if (distance < best_distance) {
+ best_distance = distance;
+ best_offset = offset;
+ }
+ }
+
+ return best_offset * 2 * s->num_channels;
}
static void output_overlap_float(struct priv *s, void *buf_out,
@@ -211,8 +282,9 @@ static void output_overlap_float(struct priv *s, void *buf_out,
float *po = s->buf_overlap;
float *pin = (float *)(s->buf_queue + bytes_off);
for (int i = 0; i < s->samples_overlap; i++) {
- *pout++ = *po - *pb++ *(*po - *pin++);
- po++;
+ // the math is equal to *po * (1 - *pb) + *pin * *pb
+ float o = *po++;
+ *pout++ = o - *pb++ * (o - *pin++);
}
}
@@ -224,12 +296,13 @@ static void output_overlap_s16(struct priv *s, void *buf_out,
int16_t *po = s->buf_overlap;
int16_t *pin = (int16_t *)(s->buf_queue + bytes_off);
for (int i = 0; i < s->samples_overlap; i++) {
- *pout++ = *po - ((*pb++ *(*po - *pin++)) >> 16);
- po++;
+ // the math is equal to *po * (1 - *pb) + *pin * *pb
+ int32_t o = *po++;
+ *pout++ = o - ((*pb++ *(o - *pin++)) >> 16);
}
}
-static void process(struct mp_filter *f)
+static void af_scaletempo_process(struct mp_filter *f)
{
struct priv *s = f->priv;
@@ -400,7 +473,7 @@ static bool reinit(struct mp_filter *f)
update_speed(s, s->speed);
- int frames_overlap = s->frames_stride * s->opts->percent_overlap;
+ int frames_overlap = s->frames_stride * s->opts->factor_overlap;
if (frames_overlap <= 0) {
s->bytes_standing = s->bytes_stride;
s->samples_standing = s->bytes_standing / bps;
@@ -420,18 +493,20 @@ static bool reinit(struct mp_filter *f)
memset(s->buf_overlap, 0, s->bytes_overlap);
if (use_int) {
int32_t *pb = s->table_blend;
- int64_t blend = 0;
+ const float scale = M_PI / frames_overlap;
for (int i = 0; i < frames_overlap; i++) {
- int32_t v = blend / frames_overlap;
+ // Hann function
+ const int32_t v = 0.5f * (1.0f - cosf(i * scale)) * 65536 + 0.5;
for (int j = 0; j < nch; j++)
*pb++ = v;
- blend += 65536; // 2^16
}
s->output_overlap = output_overlap_s16;
} else {
float *pb = s->table_blend;
+ const float scale = M_PI / frames_overlap;
for (int i = 0; i < frames_overlap; i++) {
- float v = i / (float)frames_overlap;
+ // Hann function
+ const float v = 0.5f * (1.0f - cosf(i * scale));
for (int j = 0; j < nch; j++)
*pb++ = v;
}
@@ -444,39 +519,8 @@ static bool reinit(struct mp_filter *f)
s->best_overlap_offset = NULL;
else {
if (use_int) {
- int64_t t = frames_overlap;
- int32_t n = 8589934588LL / (t * t); // 4 * (2^31 - 1) / t^2
- s->buf_pre_corr = realloc(s->buf_pre_corr,
- s->bytes_overlap * 2 + UNROLL_PADDING);
- s->table_window = realloc(s->table_window,
- s->bytes_overlap * 2 - nch * bps * 2);
- if (!s->buf_pre_corr || !s->table_window) {
- MP_FATAL(f, "Out of memory\n");
- return false;
- }
- memset((char *)s->buf_pre_corr + s->bytes_overlap * 2, 0,
- UNROLL_PADDING);
- int32_t *pw = s->table_window;
- for (int i = 1; i < frames_overlap; i++) {
- int32_t v = (i * (t - i) * n) >> 15;
- for (int j = 0; j < nch; j++)
- *pw++ = v;
- }
s->best_overlap_offset = best_overlap_offset_s16;
} else {
- s->buf_pre_corr = realloc(s->buf_pre_corr, s->bytes_overlap);
- s->table_window = realloc(s->table_window,
- s->bytes_overlap - nch * bps);
- if (!s->buf_pre_corr || !s->table_window) {
- MP_FATAL(f, "Out of memory\n");
- return false;
- }
- float *pw = s->table_window;
- for (int i = 1; i < frames_overlap; i++) {
- float v = i * (frames_overlap - i);
- for (int j = 0; j < nch; j++)
- *pw++ = v;
- }
s->best_overlap_offset = best_overlap_offset_float;
}
}
@@ -486,7 +530,7 @@ static bool reinit(struct mp_filter *f)
s->bytes_queue = (s->frames_search + s->frames_stride + frames_overlap)
* bps * nch;
- s->buf_queue = realloc(s->buf_queue, s->bytes_queue + UNROLL_PADDING);
+ s->buf_queue = realloc(s->buf_queue, s->bytes_queue);
if (!s->buf_queue) {
MP_FATAL(f, "Out of memory\n");
return false;
@@ -511,7 +555,7 @@ static bool reinit(struct mp_filter *f)
return true;
}
-static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
+static bool af_scaletempo_command(struct mp_filter *f, struct mp_filter_command *cmd)
{
struct priv *s = f->priv;
@@ -530,7 +574,7 @@ static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
return false;
}
-static void reset(struct mp_filter *f)
+static void af_scaletempo_reset(struct mp_filter *f)
{
struct priv *s = f->priv;
@@ -543,14 +587,12 @@ static void reset(struct mp_filter *f)
TA_FREEP(&s->in);
}
-static void destroy(struct mp_filter *f)
+static void af_scaletempo_destroy(struct mp_filter *f)
{
struct priv *s = f->priv;
free(s->buf_queue);
free(s->buf_overlap);
- free(s->buf_pre_corr);
free(s->table_blend);
- free(s->table_window);
TA_FREEP(&s->in);
mp_filter_free_children(f);
}
@@ -558,10 +600,10 @@ static void destroy(struct mp_filter *f)
static const struct mp_filter_info af_scaletempo_filter = {
.name = "scaletempo",
.priv_size = sizeof(struct priv),
- .process = process,
- .command = command,
- .reset = reset,
- .destroy = destroy,
+ .process = af_scaletempo_process,
+ .command = af_scaletempo_command,
+ .reset = af_scaletempo_reset,
+ .destroy = af_scaletempo_destroy,
};
static struct mp_filter *af_scaletempo_create(struct mp_filter *parent,
@@ -604,7 +646,7 @@ const struct mp_user_filter_entry af_scaletempo = {
.priv_size = sizeof(OPT_BASE_STRUCT),
.priv_defaults = &(const OPT_BASE_STRUCT) {
.ms_stride = 60,
- .percent_overlap = .20,
+ .factor_overlap = .20,
.ms_search = 14,
.speed_opt = SCALE_TEMPO,
.scale_nominal = 1.0,
@@ -612,7 +654,7 @@ const struct mp_user_filter_entry af_scaletempo = {
.options = (const struct m_option[]) {
{"scale", OPT_FLOAT(scale_nominal), M_RANGE(0.01, DBL_MAX)},
{"stride", OPT_FLOAT(ms_stride), M_RANGE(0.01, DBL_MAX)},
- {"overlap", OPT_FLOAT(percent_overlap), M_RANGE(0, 1)},
+ {"overlap", OPT_FLOAT(factor_overlap), M_RANGE(0, 1)},
{"search", OPT_FLOAT(ms_search), M_RANGE(0, DBL_MAX)},
{"speed", OPT_CHOICE(speed_opt,
{"pitch", SCALE_PITCH},
diff --git a/audio/filter/af_scaletempo2.c b/audio/filter/af_scaletempo2.c
index 1a822ecd50..749e219454 100644
--- a/audio/filter/af_scaletempo2.c
+++ b/audio/filter/af_scaletempo2.c
@@ -8,21 +8,20 @@
#include "options/m_option.h"
struct priv {
- struct mp_scaletempo2 data;
+ struct mp_scaletempo2 *data;
struct mp_pin *in_pin;
struct mp_aframe *cur_format;
struct mp_aframe_pool *out_pool;
bool sent_final;
struct mp_aframe *pending;
bool initialized;
- double frame_delay;
float speed;
};
static bool init_scaletempo2(struct mp_filter *f);
-static void reset(struct mp_filter *f);
+static void af_scaletempo2_reset(struct mp_filter *f);
-static void process(struct mp_filter *f)
+static void af_scaletempo2_process(struct mp_filter *f)
{
struct priv *p = f->priv;
@@ -30,7 +29,7 @@ static void process(struct mp_filter *f)
return;
while (!p->initialized || !p->pending ||
- !mp_scaletempo2_frames_available(&p->data))
+ !mp_scaletempo2_frames_available(p->data, p->speed))
{
bool eof = false;
if (!p->pending || !mp_aframe_get_size(p->pending)) {
@@ -65,14 +64,16 @@ static void process(struct mp_filter *f)
if (p->pending && !format_change && !p->sent_final) {
int frame_size = mp_aframe_get_size(p->pending);
uint8_t **planes = mp_aframe_get_data_ro(p->pending);
- int read = mp_scaletempo2_fill_input_buffer(&p->data,
- planes, frame_size, final);
- p->frame_delay += read;
+ int read = mp_scaletempo2_fill_input_buffer(p->data,
+ planes, frame_size, p->speed);
mp_aframe_skip_samples(p->pending, read);
}
- p->sent_final |= final;
+ if (final && p->pending && !p->sent_final) {
+ mp_scaletempo2_set_final(p->data);
+ p->sent_final = true;
+ }
- if (mp_scaletempo2_frames_available(&p->data)) {
+ if (mp_scaletempo2_frames_available(p->data, p->speed)) {
if (eof) {
mp_pin_out_repeat_eof(p->in_pin); // drain more next time
}
@@ -82,18 +83,15 @@ static void process(struct mp_filter *f)
if (eof) {
mp_pin_in_write(f->ppins[1], MP_EOF_FRAME);
return;
- } else if (format_change) {
- // go on with proper reinit on the next iteration
- p->initialized = false;
- p->sent_final = false;
}
+ // for format change go on with proper reinit on the next iteration
}
}
assert(p->pending);
- if (mp_scaletempo2_frames_available(&p->data)) {
+ if (mp_scaletempo2_frames_available(p->data, p->speed)) {
struct mp_aframe *out = mp_aframe_new_ref(p->cur_format);
- int out_samples = p->data.ola_hop_size;
+ int out_samples = p->data->ola_hop_size;
if (mp_aframe_pool_allocate(p->out_pool, out, out_samples) < 0) {
talloc_free(out);
goto error;
@@ -103,17 +101,30 @@ static void process(struct mp_filter *f)
uint8_t **planes = mp_aframe_get_data_rw(out);
assert(planes);
- assert(mp_aframe_get_planes(out) == p->data.channels);
+ assert(mp_aframe_get_planes(out) == p->data->channels);
- out_samples = mp_scaletempo2_fill_buffer(&p->data,
+ out_samples = mp_scaletempo2_fill_buffer(p->data,
(float**)planes, out_samples, p->speed);
double pts = mp_aframe_get_pts(p->pending);
- p->frame_delay -= out_samples * p->speed;
-
if (pts != MP_NOPTS_VALUE) {
- double delay = p->frame_delay / mp_aframe_get_effective_rate(out);
- mp_aframe_set_pts(out, pts - delay);
+ double frame_delay = mp_scaletempo2_get_latency(p->data, p->speed)
+ + out_samples * p->speed;
+ mp_aframe_set_pts(out, pts - frame_delay / mp_aframe_get_effective_rate(out));
+
+ if (p->sent_final) {
+ double remain_pts = pts - mp_aframe_get_pts(out);
+ double rate = mp_aframe_get_effective_rate(out) / p->speed;
+ int max_samples = MPMAX(0, (int) (remain_pts * rate));
+ // truncate final packet to expected length
+ if (out_samples >= max_samples) {
+ out_samples = max_samples;
+
+ // reset the filter to ensure it stops generating audio
+ // and mp_scaletempo2_frames_available returns false
+ mp_scaletempo2_reset(p->data);
+ }
+ }
}
mp_aframe_set_size(out, out_samples);
@@ -137,16 +148,15 @@ static bool init_scaletempo2(struct mp_filter *f)
mp_aframe_reset(p->cur_format);
p->initialized = true;
p->sent_final = false;
- p->frame_delay = 0;
mp_aframe_config_copy(p->cur_format, p->pending);
- mp_scaletempo2_init(&p->data, mp_aframe_get_channels(p->pending),
+ mp_scaletempo2_init(p->data, mp_aframe_get_channels(p->pending),
mp_aframe_get_rate(p->pending));
return true;
}
-static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
+static bool af_scaletempo2_command(struct mp_filter *f, struct mp_filter_command *cmd)
{
struct priv *p = f->priv;
@@ -159,29 +169,28 @@ static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
return false;
}
-static void reset(struct mp_filter *f)
+static void af_scaletempo2_reset(struct mp_filter *f)
{
struct priv *p = f->priv;
- mp_scaletempo2_reset(&p->data);
- p->frame_delay = 0;
+ mp_scaletempo2_reset(p->data);
p->initialized = false;
TA_FREEP(&p->pending);
}
-static void destroy(struct mp_filter *f)
+static void af_scaletempo2_destroy(struct mp_filter *f)
{
struct priv *p = f->priv;
- mp_scaletempo2_destroy(&p->data);
- talloc_free(p->pending);
+ TA_FREEP(&p->data);
+ TA_FREEP(&p->pending);
}
static const struct mp_filter_info af_scaletempo2_filter = {
.name = "scaletempo2",
.priv_size = sizeof(struct priv),
- .process = process,
- .command = command,
- .reset = reset,
- .destroy = destroy,
+ .process = af_scaletempo2_process,
+ .command = af_scaletempo2_command,
+ .reset = af_scaletempo2_reset,
+ .destroy = af_scaletempo2_destroy,
};
static struct mp_filter *af_scaletempo2_create(
@@ -197,7 +206,8 @@ static struct mp_filter *af_scaletempo2_create(
mp_filter_add_pin(f, MP_PIN_OUT, "out");
struct priv *p = f->priv;
- p->data.opts = talloc_steal(p, options);
+ p->data = talloc_zero(p, struct mp_scaletempo2);
+ p->data->opts = talloc_steal(p, options);
p->speed = 1.0;
p->cur_format = talloc_steal(p, mp_aframe_create());
p->out_pool = mp_aframe_pool_create(p);
@@ -225,9 +235,9 @@ const struct mp_user_filter_entry af_scaletempo2 = {
.priv_size = sizeof(OPT_BASE_STRUCT),
.priv_defaults = &(const OPT_BASE_STRUCT) {
.min_playback_rate = 0.25,
- .max_playback_rate = 4.0,
- .ola_window_size_ms = 20,
- .wsola_search_interval_ms = 30,
+ .max_playback_rate = 8.0,
+ .ola_window_size_ms = 12,
+ .wsola_search_interval_ms = 40,
},
.options = (const struct m_option[]) {
{"search-interval",
diff --git a/audio/filter/af_scaletempo2_internals.c b/audio/filter/af_scaletempo2_internals.c
index d7c0677c45..7f3a99638f 100644
--- a/audio/filter/af_scaletempo2_internals.c
+++ b/audio/filter/af_scaletempo2_internals.c
@@ -41,19 +41,15 @@ static bool in_interval(int n, struct interval q)
return n >= q.lo && n <= q.hi;
}
-static float **realloc_2d(float **p, int x, int y)
+static void alloc_sample_buffer(struct mp_scaletempo2 *p, float ***ptr, size_t size)
{
- float **array = realloc(p, sizeof(float*) * x + sizeof(float) * x * y);
- float* data = (float*) (array + x);
- for (int i = 0; i < x; ++i) {
- array[i] = data + i * y;
- }
- return array;
-}
+ talloc_free(*ptr);
-static void zero_2d(float **a, int x, int y)
-{
- memset(a + x, 0, sizeof(float) * x * y);
+ float **buff = talloc_array(p, float*, p->channels);
+ for (int i = 0; i < p->channels; ++i) {
+ buff[i] = talloc_array(buff, float, size);
+ }
+ *ptr = buff;
}
static void zero_2d_partial(float **a, int x, int y)
@@ -93,15 +89,15 @@ static void multi_channel_moving_block_energies(
}
static float multi_channel_similarity_measure(
- const float* dot_prod_a_b,
- const float* energy_a, const float* energy_b,
+ const float* dot_prod,
+ const float* energy_target, const float* energy_candidate,
int channels)
{
const float epsilon = 1e-12f;
float similarity_measure = 0.0f;
for (int n = 0; n < channels; ++n) {
- similarity_measure += dot_prod_a_b[n]
- / sqrtf(energy_a[n] * energy_b[n] + epsilon);
+ similarity_measure += dot_prod[n] * energy_target[n]
+ / sqrtf(energy_target[n] * energy_candidate[n] + epsilon);
}
return similarity_measure;
}
@@ -421,18 +417,15 @@