af_scaletempo2: fix audio artifact on initial WSOLA iteration

The first WSOLA iteration overlapped audio with whatever was in the `wsola_output` buffer. This was either silence (if not run before), or old frames (if switching to 1x and back to a different speed). Track the state of the output buffer and memcpy the whole window for the first iteration instead.
author: ferreum <code@ferreum.de> 2023-08-08 12:50:39 +0200
committer: Niklas Haas <github-daiK1o@haasn.dev> 2023-09-20 14:36:23 +0200
commit: 33d6d0f311b410a5a5a6acb1838a41ec3e91c25b (patch)
tree: 92299e69ba7eaa963228b6f43aa5f8b3ef840e46 /audio/filter
parent: c3bceb324343afe423d24428a56047aeb45d5f67 (diff)
download: mpv-33d6d0f311b410a5a5a6acb1838a41ec3e91c25b.tar.bz2
mpv-33d6d0f311b410a5a5a6acb1838a41ec3e91c25b.tar.xz
2 files changed, 20 insertions, 7 deletions
diff --git a/audio/filter/af_scaletempo2_internals.c b/audio/filter/af_scaletempo2_internals.c
index 168914de28..a4ef710319 100644
--- a/audio/filter/af_scaletempo2_internals.c
+++ b/audio/filter/af_scaletempo2_internals.c
@@ -620,17 +620,24 @@ static bool run_one_wsola_iteration(struct mp_scaletempo2 *p, float playback_rat
     for (int k = 0; k < p->channels; ++k) {
         float* ch_opt_frame = p->optimal_block[k];
         float* ch_output = p->wsola_output[k] + p->num_complete_frames;
-        for (int n = 0; n < p->ola_hop_size; ++n) {
-            ch_output[n] = ch_output[n] * p->ola_window[p->ola_hop_size + n] +
-                ch_opt_frame[n] * p->ola_window[n];
-        }
+        if (p->wsola_output_started) {
+            for (int n = 0; n < p->ola_hop_size; ++n) {
+                ch_output[n] = ch_output[n] * p->ola_window[p->ola_hop_size + n] +
+                    ch_opt_frame[n] * p->ola_window[n];
+            }
 
-        // Copy the second half to the output.
-        memcpy(&ch_output[p->ola_hop_size], &ch_opt_frame[p->ola_hop_size],
-               sizeof(*ch_opt_frame) * p->ola_hop_size);
+            // Copy the second half to the output.
+            memcpy(&ch_output[p->ola_hop_size], &ch_opt_frame[p->ola_hop_size],
+                   sizeof(*ch_opt_frame) * p->ola_hop_size);
+        } else {
+            // No overlap for the first iteration.
+            memcpy(ch_output, ch_opt_frame,
+                   sizeof(*ch_opt_frame) * p->ola_window_size);
+        }
     }
 
     p->num_complete_frames += p->ola_hop_size;
+    p->wsola_output_started = true;
     update_output_time(p, playback_rate, p->ola_hop_size);
     remove_old_input_frames(p, playback_rate);
     return true;
@@ -686,6 +693,7 @@ int mp_scaletempo2_fill_buffer(struct mp_scaletempo2 *p,
     // Optimize the most common |playback_rate| ~= 1 case to use a single copy
     // instead of copying frame by frame.
     if (p->ola_window_size <= faster_step && slower_step >= p->ola_window_size) {
+        p->wsola_output_started = false;
         return read_input_buffer(p, dest_size, dest);
     }
 
@@ -730,6 +738,7 @@ void mp_scaletempo2_reset(struct mp_scaletempo2 *p)
     // Clear the queue of decoded packets.
     zero_2d(p->wsola_output, p->channels, p->wsola_output_size);
     p->num_complete_frames = 0;
+    p->wsola_output_started = false;
 }
 
 // Return a "periodic" Hann window. This is the first L samples of an L+1
@@ -748,6 +757,7 @@ void mp_scaletempo2_init(struct mp_scaletempo2 *p, int channels, int rate)
     p->search_block_center_offset = 0;
     p->search_block_index = 0;
     p->num_complete_frames = 0;
+    p->wsola_output_started = false;
     p->channels = channels;
 
     p->samples_per_second = rate;
diff --git a/audio/filter/af_scaletempo2_internals.h b/audio/filter/af_scaletempo2_internals.h
index b062159966..64f4104019 100644
--- a/audio/filter/af_scaletempo2_internals.h
+++ b/audio/filter/af_scaletempo2_internals.h
@@ -80,6 +80,9 @@ struct mp_scaletempo2 {
     // them and can be copied to output if fill_buffer() is called. It also
     // specifies the index where the next WSOLA window has to overlap-and-add.
     int num_complete_frames;
+    // Whether |wsola_output| contains an additional |ola_hop_size| of overlap
+    // frames for the next iteration.
+    bool wsola_output_started;
     // Overlap-and-add window.
     float *ola_window;
     // Transition window, used to update |optimal_block| by a weighted sum of
author	ferreum <code@ferreum.de>	2023-08-08 12:50:39 +0200
committer	Niklas Haas <github-daiK1o@haasn.dev>	2023-09-20 14:36:23 +0200
commit	33d6d0f311b410a5a5a6acb1838a41ec3e91c25b (patch)
tree	92299e69ba7eaa963228b6f43aa5f8b3ef840e46 /audio/filter
parent	c3bceb324343afe423d24428a56047aeb45d5f67 (diff)
download	mpv-33d6d0f311b410a5a5a6acb1838a41ec3e91c25b.tar.bz2 mpv-33d6d0f311b410a5a5a6acb1838a41ec3e91c25b.tar.xz