audio: add scaletempo2 filter based on chromium

scaletempo2 is a new audio filter for playing back audio at modified speed and is based on chromium commit 51ed77e3f37a9a9b80d6d0a8259e84a8ca635259. It sounds subjectively better than the existing implementions scaletempo and rubberband.
author: Dorian Rudolph <dorianrudo97@googlemail.com> 2020-07-25 18:02:58 +0200
committer: wm4 <1387750+wm4@users.noreply.github.com> 2020-07-27 00:57:22 +0200
commit: 785a2b126152d50ca43f6ce390b3a2114a469dad (patch)
tree: 15726491c518cf8222f6cac2cef40b04f6734190 /audio/filter/af_scaletempo2_internals.h
parent: b5368980a83a4308c710c2258a66623a7f63aabd (diff)
download: mpv-785a2b126152d50ca43f6ce390b3a2114a469dad.tar.bz2
mpv-785a2b126152d50ca43f6ce390b3a2114a469dad.tar.xz
1 files changed, 121 insertions, 0 deletions
diff --git a/audio/filter/af_scaletempo2_internals.h b/audio/filter/af_scaletempo2_internals.h
new file mode 100644
index 0000000000..3557cd3bd1
--- /dev/null
+++ b/audio/filter/af_scaletempo2_internals.h
@@ -0,0 +1,121 @@
+// This filter was ported from Chromium 
+// (https://chromium.googlesource.com/chromium/chromium/+/51ed77e3f37a9a9b80d6d0a8259e84a8ca635259/media/filters/audio_renderer_algorithm.cc)
+//
+// Copyright 2015 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "common/common.h"
+
+struct mp_scaletempo2_opts {
+    // Max/min supported playback rates for fast/slow audio. Audio outside of these
+    // ranges are muted.
+    // Audio at these speeds would sound better under a frequency domain algorithm.
+    float min_playback_rate;
+    float max_playback_rate;
+    // Overlap-and-add window size in milliseconds.
+    float ola_window_size_ms;
+    // Size of search interval in milliseconds. The search interval is
+    // [-delta delta] around |output_index| * |playback_rate|. So the search
+    // interval is 2 * delta.
+    float wsola_search_interval_ms;
+};
+
+struct mp_scaletempo2 {
+    struct mp_scaletempo2_opts *opts;
+    // Number of channels in audio stream.
+    int channels;
+    // Sample rate of audio stream.
+    int samples_per_second;
+    // If muted, keep track of partial frames that should have been skipped over.
+    double muted_partial_frame;
+    // Book keeping of the current time of generated audio, in frames. This
+    // should be appropriately updated when out samples are generated, regardless
+    // of whether we push samples out when fill_buffer() is called or we store
+    // audio in |wsola_output| for the subsequent calls to fill_buffer().
+    // Furthermore, if samples from |audio_buffer| are evicted then this
+    // member variable should be updated based on |playback_rate|.
+    // Note that this member should be updated ONLY by calling update_output_time(),
+    // so that |search_block_index| is update accordingly.
+    double output_time;
+    // The offset of the center frame of |search_block| w.r.t. its first frame.
+    int search_block_center_offset;
+    // Index of the beginning of the |search_block|, in frames.
+    int search_block_index;
+    // Number of Blocks to search to find the most similar one to the target
+    // frame.
+    int num_candidate_blocks;
+    // Index of the beginning of the target block, counted in frames.
+    int target_block_index;
+    // Overlap-and-add window size in frames.
+    int ola_window_size;
+    // The hop size of overlap-and-add in frames. This implementation assumes 50%
+    // overlap-and-add.
+    int ola_hop_size;
+    // Number of frames in |wsola_output| that overlap-and-add is completed for
+    // them and can be copied to output if fill_buffer() is called. It also
+    // specifies the index where the next WSOLA window has to overlap-and-add.
+    int num_complete_frames;
+    // Overlap-and-add window.
+    float *ola_window;
+    // Transition window, used to update |optimal_block| by a weighted sum of
+    // |optimal_block| and |target_block|.
+    float *transition_window;
+    // This stores a part of the output that is created but couldn't be rendered.
+    // Output is generated frame-by-frame which at some point might exceed the
+    // number of requested samples. Furthermore, due to overlap-and-add,
+    // the last half-window of the output is incomplete, which is stored in this
+    // buffer.
+    float **wsola_output;
+    int wsola_output_size;
+    // Auxiliary variables to avoid allocation in every iteration.
+    // Stores the optimal block in every iteration. This is the most
+    // similar block to |target_block| within |search_block| and it is
+    // overlap-and-added to |wsola_output|.
+    float **optimal_block;
+    // A block of data that search is performed over to find the |optimal_block|.
+    float **search_block;
+    int search_block_size;
+    // Stores the target block, denoted as |target| above. |search_block| is
+    // searched for a block (|optimal_block|) that is most similar to
+    // |target_block|.
+    float **target_block;
+    // Buffered audio data.
+    float **input_buffer;
+    int input_buffer_size;
+    int input_buffer_frames;
+    float *energy_candidate_blocks;
+};
+
+void mp_scaletempo2_destroy(struct mp_scaletempo2 *p);
+void mp_scaletempo2_reset(struct mp_scaletempo2 *p);
+void mp_scaletempo2_init(struct mp_scaletempo2 *p, int channels, int rate);
+int mp_scaletempo2_fill_input_buffer(struct mp_scaletempo2 *p,
+    uint8_t **planes, int frame_size, bool final);
+int mp_scaletempo2_fill_buffer(struct mp_scaletempo2 *p,
+    float **dest, int dest_size, float playback_rate);
+bool mp_scaletempo2_frames_available(struct mp_scaletempo2 *p);
+\ No newline at end of file
author	Dorian Rudolph <dorianrudo97@googlemail.com>	2020-07-25 18:02:58 +0200
committer	wm4 <1387750+wm4@users.noreply.github.com>	2020-07-27 00:57:22 +0200
commit	785a2b126152d50ca43f6ce390b3a2114a469dad (patch)
tree	15726491c518cf8222f6cac2cef40b04f6734190 /audio/filter/af_scaletempo2_internals.h
parent	b5368980a83a4308c710c2258a66623a7f63aabd (diff)
download	mpv-785a2b126152d50ca43f6ce390b3a2114a469dad.tar.bz2 mpv-785a2b126152d50ca43f6ce390b3a2114a469dad.tar.xz