summaryrefslogtreecommitdiffstats
path: root/audio/filter
diff options
context:
space:
mode:
authorferreum <code@ferreum.de>2023-08-08 12:50:39 +0200
committerNiklas Haas <github-daiK1o@haasn.dev>2023-09-20 14:36:23 +0200
commit33d6d0f311b410a5a5a6acb1838a41ec3e91c25b (patch)
tree92299e69ba7eaa963228b6f43aa5f8b3ef840e46 /audio/filter
parentc3bceb324343afe423d24428a56047aeb45d5f67 (diff)
downloadmpv-33d6d0f311b410a5a5a6acb1838a41ec3e91c25b.tar.bz2
mpv-33d6d0f311b410a5a5a6acb1838a41ec3e91c25b.tar.xz
af_scaletempo2: fix audio artifact on initial WSOLA iteration
The first WSOLA iteration overlapped audio with whatever was in the `wsola_output` buffer. This was either silence (if not run before), or old frames (if switching to 1x and back to a different speed). Track the state of the output buffer and memcpy the whole window for the first iteration instead.
Diffstat (limited to 'audio/filter')
-rw-r--r--audio/filter/af_scaletempo2_internals.c24
-rw-r--r--audio/filter/af_scaletempo2_internals.h3
2 files changed, 20 insertions, 7 deletions
diff --git a/audio/filter/af_scaletempo2_internals.c b/audio/filter/af_scaletempo2_internals.c
index 168914de28..a4ef710319 100644
--- a/audio/filter/af_scaletempo2_internals.c
+++ b/audio/filter/af_scaletempo2_internals.c
@@ -620,17 +620,24 @@ static bool run_one_wsola_iteration(struct mp_scaletempo2 *p, float playback_rat
for (int k = 0; k < p->channels; ++k) {
float* ch_opt_frame = p->optimal_block[k];
float* ch_output = p->wsola_output[k] + p->num_complete_frames;
- for (int n = 0; n < p->ola_hop_size; ++n) {
- ch_output[n] = ch_output[n] * p->ola_window[p->ola_hop_size + n] +
- ch_opt_frame[n] * p->ola_window[n];
- }
+ if (p->wsola_output_started) {
+ for (int n = 0; n < p->ola_hop_size; ++n) {
+ ch_output[n] = ch_output[n] * p->ola_window[p->ola_hop_size + n] +
+ ch_opt_frame[n] * p->ola_window[n];
+ }
- // Copy the second half to the output.
- memcpy(&ch_output[p->ola_hop_size], &ch_opt_frame[p->ola_hop_size],
- sizeof(*ch_opt_frame) * p->ola_hop_size);
+ // Copy the second half to the output.
+ memcpy(&ch_output[p->ola_hop_size], &ch_opt_frame[p->ola_hop_size],
+ sizeof(*ch_opt_frame) * p->ola_hop_size);
+ } else {
+ // No overlap for the first iteration.
+ memcpy(ch_output, ch_opt_frame,
+ sizeof(*ch_opt_frame) * p->ola_window_size);
+ }
}
p->num_complete_frames += p->ola_hop_size;
+ p->wsola_output_started = true;
update_output_time(p, playback_rate, p->ola_hop_size);
remove_old_input_frames(p, playback_rate);
return true;
@@ -686,6 +693,7 @@ int mp_scaletempo2_fill_buffer(struct mp_scaletempo2 *p,
// Optimize the most common |playback_rate| ~= 1 case to use a single copy
// instead of copying frame by frame.
if (p->ola_window_size <= faster_step && slower_step >= p->ola_window_size) {
+ p->wsola_output_started = false;
return read_input_buffer(p, dest_size, dest);
}
@@ -730,6 +738,7 @@ void mp_scaletempo2_reset(struct mp_scaletempo2 *p)
// Clear the queue of decoded packets.
zero_2d(p->wsola_output, p->channels, p->wsola_output_size);
p->num_complete_frames = 0;
+ p->wsola_output_started = false;
}
// Return a "periodic" Hann window. This is the first L samples of an L+1
@@ -748,6 +757,7 @@ void mp_scaletempo2_init(struct mp_scaletempo2 *p, int channels, int rate)
p->search_block_center_offset = 0;
p->search_block_index = 0;
p->num_complete_frames = 0;
+ p->wsola_output_started = false;
p->channels = channels;
p->samples_per_second = rate;
diff --git a/audio/filter/af_scaletempo2_internals.h b/audio/filter/af_scaletempo2_internals.h
index b062159966..64f4104019 100644
--- a/audio/filter/af_scaletempo2_internals.h
+++ b/audio/filter/af_scaletempo2_internals.h
@@ -80,6 +80,9 @@ struct mp_scaletempo2 {
// them and can be copied to output if fill_buffer() is called. It also
// specifies the index where the next WSOLA window has to overlap-and-add.
int num_complete_frames;
+ // Whether |wsola_output| contains an additional |ola_hop_size| of overlap
+ // frames for the next iteration.
+ bool wsola_output_started;
// Overlap-and-add window.
float *ola_window;
// Transition window, used to update |optimal_block| by a weighted sum of