diff options
author | Christoph Heinrich <christoph.heinrich@student.tugraz.at> | 2023-09-29 21:00:30 +0200 |
---|---|---|
committer | Kacper Michajłow <kasper93@gmail.com> | 2024-05-07 23:32:55 +0200 |
commit | e166ae0ed8980ac090097d81b218239723f007a0 (patch) | |
tree | d2889d0703209ee19b87bd0be9d0bf77699eb012 | |
parent | 18ed9e105a1fa20f45e3ccdcb51d5939aee4a6ee (diff) | |
download | mpv-e166ae0ed8980ac090097d81b218239723f007a0.tar.bz2 mpv-e166ae0ed8980ac090097d81b218239723f007a0.tar.xz |
af_scaletempo: optimize overlap search
scaletempo2 has this optimization where it first uses a step size of 5
together with a quadratic interpolation to quickly get the approximate
position of the best overlap and then does a more thorough search aroun
that area.
Doing the same thing in scaletempo brought a 4.8x performance
improvement, however in my measurements a step size of 3 more
consistently finds good overlaps and it's still a 2.9x improvement for
this function.
I should note that while a step size of 3 produced better numbers,
I was not actually able to hear any difference in my test.
A step size of 3 was chosen just in case it actually makes an audible
difference in some cases and the cpu usage isn't really a problem
anymore, but that can be revisited in the future.
scaletempo2 is still faster then scaletempo with a step size of 5,
which I suspect is mostly because it uses some vectorized functions and
scaletempo does not.
-rw-r--r-- | audio/filter/af_scaletempo.c | 138 |
1 files changed, 116 insertions, 22 deletions
diff --git a/audio/filter/af_scaletempo.c b/audio/filter/af_scaletempo.c index 77ca00f453..482b91209e 100644 --- a/audio/filter/af_scaletempo.c +++ b/audio/filter/af_scaletempo.c @@ -134,50 +134,144 @@ static bool fill_queue(struct priv *s) return bytes_needed == 0; } +// Fit the curve f(x) = a * x^2 + b * x + c such that +// f(-1) = y[0] +// f(0) = y[1] +// f(1) = y[2] +// and return the extremum position and value +// assuming y[0] <= y[1] >= y[2] || y[0] >= y[1] <= y[2] +static void quadratic_interpolation_float( + const float* y_values, float* x, float* value) +{ + const float b = (y_values[2] - y_values[0]) * 0.5f; + const float c = y_values[1]; + const float a = y_values[0] + b - c; + + if (a == 0.f) { + // it's a flat line + *x = 0; + *value = c; + } else { + const float pos = -b / (2.f * a); + *x = pos; + *value = a * pos * pos + b * pos + c; + } +} + +static void quadratic_interpolation_s16( + const int32_t* y_values, float* x, int32_t* value) +{ + const float b = (y_values[2] - y_values[0]) * 0.5f; + const float c = y_values[1]; + const float a = y_values[0] + b - c; + + if (a == 0.f) { + // it's a flat line + *x = 0; + *value = c; + } else { + const float pos = -b / (2.f * a); + *x = pos; + *value = a * pos * pos + b * pos + c; + } +} + static int best_overlap_offset_float(struct priv *s) { + int num_channels = s->num_channels, frames_search = s->frames_search; + float *source = (float *)s->buf_queue + num_channels; + float *target = (float *)s->buf_overlap + num_channels; + int num_samples = s->samples_overlap - num_channels; + int step_size = 3; + float history[3] = {}; + float best_distance = FLT_MAX; - int best_off = 0; + int best_offset_approx = 0; + for (int offset = 0; offset < frames_search; offset += step_size) { + float distance = 0; + for (int i = 0; i < num_samples; i++) + distance += fabsf(target[i] - source[offset * num_channels + i]); + + int offset_approx = offset; + history[0] = history[1]; + history[1] = history[2]; + history[2] = distance; + if(offset >= 2 && history[0] >= history[1] && history[1] <= history[2]) { + float extremum; + quadratic_interpolation_float(history, &extremum, &distance); + offset_approx = offset - step_size + (int)(extremum * step_size + 0.5f); + } - float *search_start = (float *)s->buf_queue + s->num_channels; - for (int off = 0; off < s->frames_search; off++) { + if (distance < best_distance) { + best_distance = distance; + best_offset_approx = offset_approx; + } + } + + best_distance = FLT_MAX; + int best_offset = 0; + int min_offset = MPMAX(0, best_offset_approx - step_size + 1); + int max_offset = MPMIN(frames_search, best_offset_approx + step_size); + for (int offset = min_offset; offset < max_offset; offset++) { float distance = 0; - float *ps = search_start; - float *po = s->buf_overlap; - po += s->num_channels; - for (int i = s->num_channels; i < s->samples_overlap; i++) - distance += fabsf(*po++ - *ps++); + for (int i = 0; i < num_samples; i++) + distance += fabsf(target[i] - source[offset * num_channels + i]); if (distance < best_distance) { best_distance = distance; - best_off = off; + best_offset = offset; } - search_start += s->num_channels; } - return best_off * 4 * s->num_channels; + return best_offset * 4 * num_channels; } static int best_overlap_offset_s16(struct priv *s) { + int num_channels = s->num_channels, frames_search = s->frames_search; + int16_t *source = (int16_t *)s->buf_queue + num_channels; + int16_t *target = (int16_t *)s->buf_overlap + num_channels; + int num_samples = s->samples_overlap - num_channels; + int step_size = 3; + int32_t history[3] = {}; + int32_t best_distance = INT32_MAX; - int best_off = 0; + int best_offset_approx = 0; + for (int offset = 0; offset < frames_search; offset += step_size) { + int32_t distance = 0; + for (int i = 0; i < num_samples; i++) + distance += abs((int32_t)target[i] - source[offset * num_channels + i]); + + int offset_approx = offset; + history[0] = history[1]; + history[1] = history[2]; + history[2] = distance; + if(offset >= 2 && history[0] >= history[1] && history[1] <= history[2]) { + float extremum; + quadratic_interpolation_s16(history, &extremum, &distance); + offset_approx = offset - step_size + (int)(extremum * step_size + 0.5f); + } + + if (distance < best_distance) { + best_distance = distance; + best_offset_approx = offset_approx; + } + } - int16_t *search_start = (int16_t *)s->buf_queue + s->num_channels; - for (int off = 0; off < s->frames_search; off++) { + best_distance = INT32_MAX; + int best_offset = 0; + int min_offset = MPMAX(0, best_offset_approx - step_size + 1); + int max_offset = MPMIN(frames_search, best_offset_approx + step_size); + for (int offset = min_offset; offset < max_offset; offset++) { int32_t distance = 0; - int16_t *ps = search_start; - int16_t *po = s->buf_overlap; - po += s->num_channels; - for (int i = s->num_channels; i < s->samples_overlap; i++) - distance += abs((int32_t)*po++ - (int32_t)*ps++); + for (int i = 0; i < num_samples; i++) + distance += abs((int32_t)target[i] - source[offset * num_channels + i]); if (distance < best_distance) { best_distance = distance; - best_off = off; + best_offset = offset; } - search_start += s->num_channels; } - return best_off * 2 * s->num_channels; + return best_offset * 2 * s->num_channels; } static void output_overlap_float(struct priv *s, void *buf_out, |