af_scaletempo2: fix processing of final packet

After the final input packet, the filter padded with silence to allow one more iteration. That was not enough to process the final frames. Continue padding the end of `input_buffer` with silence until the final frames have been processed. Implementation: Instead of padding when adding final samples, pad before running WSOLA iteration. Count number of added silent frames and remaining input frames for time keeping.
author: ferreum <code@ferreum.de> 2023-08-13 13:10:58 +0200
committer: Niklas Haas <github-daiK1o@haasn.dev> 2023-09-20 14:36:23 +0200
commit: 8080d00d7f31a0e1ba25418e0f08474f1a2f1f61 (patch)
tree: 3c0841b6ae303a9d94feba57df321682e17ca9fd /audio/filter
parent: cf8b7ff0d666a4c13b32d8d9e973435df419b5ef (diff)
download: mpv-8080d00d7f31a0e1ba25418e0f08474f1a2f1f61.tar.bz2
mpv-8080d00d7f31a0e1ba25418e0f08474f1a2f1f61.tar.xz
3 files changed, 64 insertions, 16 deletions
diff --git a/audio/filter/af_scaletempo2.c b/audio/filter/af_scaletempo2.c
index 0dc0a974d4..5258fe3204 100644
--- a/audio/filter/af_scaletempo2.c
+++ b/audio/filter/af_scaletempo2.c
@@ -65,10 +65,13 @@ static void process(struct mp_filter *f)
             int frame_size = mp_aframe_get_size(p->pending);
             uint8_t **planes = mp_aframe_get_data_ro(p->pending);
             int read = mp_scaletempo2_fill_input_buffer(&p->data,
-                planes, frame_size, final, p->speed);
+                planes, frame_size, p->speed);
             mp_aframe_skip_samples(p->pending, read);
         }
-        p->sent_final |= final;
+        if (final && p->pending && !p->sent_final) {
+            mp_scaletempo2_set_final(&p->data);
+            p->sent_final = true;
+        }
 
         if (mp_scaletempo2_frames_available(&p->data, p->speed)) {
             if (eof) {
@@ -80,11 +83,8 @@ static void process(struct mp_filter *f)
             if (eof) {
                 mp_pin_in_write(f->ppins[1], MP_EOF_FRAME);
                 return;
-            } else if (format_change) {
-                // go on with proper reinit on the next iteration
-                p->initialized = false;
-                p->sent_final = false;
             }
+            // for format change go on with proper reinit on the next iteration
         }
     }
 
diff --git a/audio/filter/af_scaletempo2_internals.c b/audio/filter/af_scaletempo2_internals.c
index a9d0fba136..4b68fae6c4 100644
--- a/audio/filter/af_scaletempo2_internals.c
+++ b/audio/filter/af_scaletempo2_internals.c
@@ -421,6 +421,9 @@ static void seek_buffer(struct mp_scaletempo2 *p, int frames)
 {
     assert(p->input_buffer_frames >= frames);
     p->input_buffer_frames -= frames;
+    if (p->input_buffer_final_frames > 0) {
+        p->input_buffer_final_frames = MPMAX(0, p->input_buffer_final_frames - frames);
+    }
     for (int i = 0; i < p->channels; ++i) {
         memmove(p->input_buffer[i], p->input_buffer[i] + frames,
             p->input_buffer_frames * sizeof(float));
@@ -483,27 +486,53 @@ static void resize_input_buffer(struct mp_scaletempo2 *p, int size)
     p->input_buffer = realloc_2d(p->input_buffer, p->channels, size);
 }
 
+// pad end with silence until a wsola iteration can be performed
+static void add_input_buffer_final_silence(struct mp_scaletempo2 *p, double playback_rate)
+{
+    int needed = frames_needed(p, playback_rate);
+    if (needed <= 0)
+        return; // no silence needed for iteration
+
+    int required_size = needed + p->input_buffer_frames;
+    if (required_size > p->input_buffer_size)
+        resize_input_buffer(p, required_size);
+
+    for (int i = 0; i < p->channels; ++i) {
+        float *ch_input = p->input_buffer[i];
+        for (int j = 0; j < needed; ++j) {
+            ch_input[p->input_buffer_frames + j] = 0.0f;
+        }
+    }
+
+    p->input_buffer_added_silence += needed;
+    p->input_buffer_frames += needed;
+}
+
+void mp_scaletempo2_set_final(struct mp_scaletempo2 *p)
+{
+    if (p->input_buffer_final_frames <= 0) {
+        p->input_buffer_final_frames = p->input_buffer_frames;
+    }
+}
+
 int mp_scaletempo2_fill_input_buffer(struct mp_scaletempo2 *p,
-    uint8_t **planes, int frame_size, bool final, double playback_rate)
+    uint8_t **planes, int frame_size, double playback_rate)
 {
     int needed = frames_needed(p, playback_rate);
     int read = MPMIN(needed, frame_size);
-    int total_fill = final ? needed : read;
-    if (total_fill == 0) return 0;
+    if (read == 0)
+        return 0;
 
-    int required_size = total_fill + p->input_buffer_frames;
+    int required_size = read + p->input_buffer_frames;
     if (required_size > p->input_buffer_size)
         resize_input_buffer(p, required_size);
 
     for (int i = 0; i < p->channels; ++i) {
         memcpy(p->input_buffer[i] + p->input_buffer_frames,
             planes[i], read * sizeof(float));
-        for (int j = read; j < total_fill; ++j) {
-            p->input_buffer[i][p->input_buffer_frames + j] = 0.0f;
-        }
     }
 
-    p->input_buffer_frames += total_fill;
+    p->input_buffer_frames += read;
     return read;
 }
 
@@ -669,6 +698,10 @@ int mp_scaletempo2_fill_buffer(struct mp_scaletempo2 *p,
 {
     if (playback_rate == 0) return 0;
 
+    if (p->input_buffer_final_frames > 0) {
+        add_input_buffer_final_silence(p, playback_rate);
+    }
+
     // Optimize the muted case to issue a single clear instead of performing
     // the full crossfade and clearing each crossfaded frame.
     if (playback_rate < p->opts->min_playback_rate
@@ -726,12 +759,15 @@ int mp_scaletempo2_fill_buffer(struct mp_scaletempo2 *p,
 double mp_scaletempo2_get_latency(struct mp_scaletempo2 *p, double playback_rate)
 {
     return p->input_buffer_frames - p->output_time
+        - p->input_buffer_added_silence
         + p->num_complete_frames * playback_rate;
 }
 
 bool mp_scaletempo2_frames_available(struct mp_scaletempo2 *p, double playback_rate)
 {
-    return can_perform_wsola(p, playback_rate) || p->num_complete_frames > 0;
+    return p->input_buffer_final_frames > p->target_block_index
+        || can_perform_wsola(p, playback_rate)
+        || p->num_complete_frames > 0;
 }
 
 void mp_scaletempo2_destroy(struct mp_scaletempo2 *p)
@@ -749,6 +785,8 @@ void mp_scaletempo2_destroy(struct mp_scaletempo2 *p)
 void mp_scaletempo2_reset(struct mp_scaletempo2 *p)
 {
     p->input_buffer_frames = 0;
+    p->input_buffer_final_frames = 0;
+    p->input_buffer_added_silence = 0;
     p->output_time = 0.0;
     p->search_block_index = 0;
     p->target_block_index = 0;
@@ -827,6 +865,8 @@ void mp_scaletempo2_init(struct mp_scaletempo2 *p, int channels, int rate)
 
     resize_input_buffer(p, 4 * MPMAX(p->ola_window_size, p->search_block_size));
     p->input_buffer_frames = 0;
+    p->input_buffer_final_frames = 0;
+    p->input_buffer_added_silence = 0;
 
     p->energy_candidate_blocks = realloc(p->energy_candidate_blocks,
         sizeof(float) * p->channels * p->num_candidate_blocks);
diff --git a/audio/filter/af_scaletempo2_internals.h b/audio/filter/af_scaletempo2_internals.h
index 622d6e20da..6c3c94c0a9 100644
--- a/audio/filter/af_scaletempo2_internals.h
+++ b/audio/filter/af_scaletempo2_internals.h
@@ -112,6 +112,13 @@ struct mp_scaletempo2 {
     float **input_buffer;
     int input_buffer_size;
     int input_buffer_frames;
+    // How many frames in |input_buffer| need to be flushed by padding with
+    // silence to process the final packet. While this is nonzero, the filter
+    // appends silence to |input_buffer| until these frames are processed.
+    int input_buffer_final_frames;
+    // How many additional frames of silence have been added to |input_buffer|
+    // for padding after the final packet.
+    int input_buffer_added_silence;
     float *energy_candidate_blocks;
 };
 
@@ -120,7 +127,8 @@ void mp_scaletempo2_reset(struct mp_scaletempo2 *p);
 void mp_scaletempo2_init(struct mp_scaletempo2 *p, int channels, int rate);
 double mp_scaletempo2_get_latency(struct mp_scaletempo2 *p, double playback_rate);
 int mp_scaletempo2_fill_input_buffer(struct mp_scaletempo2 *p,
-    uint8_t **planes, int frame_size, bool final, double playback_rate);
+    uint8_t **planes, int frame_size, double playback_rate);
+void mp_scaletempo2_set_final(struct mp_scaletempo2 *p);
 int mp_scaletempo2_fill_buffer(struct mp_scaletempo2 *p,
     float **dest, int dest_size, double playback_rate);
 bool mp_scaletempo2_frames_available(struct mp_scaletempo2 *p, double playback_rate);
author	ferreum <code@ferreum.de>	2023-08-13 13:10:58 +0200
committer	Niklas Haas <github-daiK1o@haasn.dev>	2023-09-20 14:36:23 +0200
commit	8080d00d7f31a0e1ba25418e0f08474f1a2f1f61 (patch)
tree	3c0841b6ae303a9d94feba57df321682e17ca9fd /audio/filter
parent	cf8b7ff0d666a4c13b32d8d9e973435df419b5ef (diff)
download	mpv-8080d00d7f31a0e1ba25418e0f08474f1a2f1f61.tar.bz2 mpv-8080d00d7f31a0e1ba25418e0f08474f1a2f1f61.tar.xz