summaryrefslogtreecommitdiffstats
path: root/audio/filter/af_scaletempo2_internals.h
diff options
context:
space:
mode:
authorDorian Rudolph <dorianrudo97@googlemail.com>2020-07-25 18:02:58 +0200
committerwm4 <1387750+wm4@users.noreply.github.com>2020-07-27 00:57:22 +0200
commit785a2b126152d50ca43f6ce390b3a2114a469dad (patch)
tree15726491c518cf8222f6cac2cef40b04f6734190 /audio/filter/af_scaletempo2_internals.h
parentb5368980a83a4308c710c2258a66623a7f63aabd (diff)
downloadmpv-785a2b126152d50ca43f6ce390b3a2114a469dad.tar.bz2
mpv-785a2b126152d50ca43f6ce390b3a2114a469dad.tar.xz
audio: add scaletempo2 filter based on chromium
scaletempo2 is a new audio filter for playing back audio at modified speed and is based on chromium commit 51ed77e3f37a9a9b80d6d0a8259e84a8ca635259. It sounds subjectively better than the existing implementions scaletempo and rubberband.
Diffstat (limited to 'audio/filter/af_scaletempo2_internals.h')
-rw-r--r--audio/filter/af_scaletempo2_internals.h121
1 files changed, 121 insertions, 0 deletions
diff --git a/audio/filter/af_scaletempo2_internals.h b/audio/filter/af_scaletempo2_internals.h
new file mode 100644
index 0000000000..3557cd3bd1
--- /dev/null
+++ b/audio/filter/af_scaletempo2_internals.h
@@ -0,0 +1,121 @@
+// This filter was ported from Chromium
+// (https://chromium.googlesource.com/chromium/chromium/+/51ed77e3f37a9a9b80d6d0a8259e84a8ca635259/media/filters/audio_renderer_algorithm.cc)
+//
+// Copyright 2015 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "common/common.h"
+
+struct mp_scaletempo2_opts {
+ // Max/min supported playback rates for fast/slow audio. Audio outside of these
+ // ranges are muted.
+ // Audio at these speeds would sound better under a frequency domain algorithm.
+ float min_playback_rate;
+ float max_playback_rate;
+ // Overlap-and-add window size in milliseconds.
+ float ola_window_size_ms;
+ // Size of search interval in milliseconds. The search interval is
+ // [-delta delta] around |output_index| * |playback_rate|. So the search
+ // interval is 2 * delta.
+ float wsola_search_interval_ms;
+};
+
+struct mp_scaletempo2 {
+ struct mp_scaletempo2_opts *opts;
+ // Number of channels in audio stream.
+ int channels;
+ // Sample rate of audio stream.
+ int samples_per_second;
+ // If muted, keep track of partial frames that should have been skipped over.
+ double muted_partial_frame;
+ // Book keeping of the current time of generated audio, in frames. This
+ // should be appropriately updated when out samples are generated, regardless
+ // of whether we push samples out when fill_buffer() is called or we store
+ // audio in |wsola_output| for the subsequent calls to fill_buffer().
+ // Furthermore, if samples from |audio_buffer| are evicted then this
+ // member variable should be updated based on |playback_rate|.
+ // Note that this member should be updated ONLY by calling update_output_time(),
+ // so that |search_block_index| is update accordingly.
+ double output_time;
+ // The offset of the center frame of |search_block| w.r.t. its first frame.
+ int search_block_center_offset;
+ // Index of the beginning of the |search_block|, in frames.
+ int search_block_index;
+ // Number of Blocks to search to find the most similar one to the target
+ // frame.
+ int num_candidate_blocks;
+ // Index of the beginning of the target block, counted in frames.
+ int target_block_index;
+ // Overlap-and-add window size in frames.
+ int ola_window_size;
+ // The hop size of overlap-and-add in frames. This implementation assumes 50%
+ // overlap-and-add.
+ int ola_hop_size;
+ // Number of frames in |wsola_output| that overlap-and-add is completed for
+ // them and can be copied to output if fill_buffer() is called. It also
+ // specifies the index where the next WSOLA window has to overlap-and-add.
+ int num_complete_frames;
+ // Overlap-and-add window.
+ float *ola_window;
+ // Transition window, used to update |optimal_block| by a weighted sum of
+ // |optimal_block| and |target_block|.
+ float *transition_window;
+ // This stores a part of the output that is created but couldn't be rendered.
+ // Output is generated frame-by-frame which at some point might exceed the
+ // number of requested samples. Furthermore, due to overlap-and-add,
+ // the last half-window of the output is incomplete, which is stored in this
+ // buffer.
+ float **wsola_output;
+ int wsola_output_size;
+ // Auxiliary variables to avoid allocation in every iteration.
+ // Stores the optimal block in every iteration. This is the most
+ // similar block to |target_block| within |search_block| and it is
+ // overlap-and-added to |wsola_output|.
+ float **optimal_block;
+ // A block of data that search is performed over to find the |optimal_block|.
+ float **search_block;
+ int search_block_size;
+ // Stores the target block, denoted as |target| above. |search_block| is
+ // searched for a block (|optimal_block|) that is most similar to
+ // |target_block|.
+ float **target_block;
+ // Buffered audio data.
+ float **input_buffer;
+ int input_buffer_size;
+ int input_buffer_frames;
+ float *energy_candidate_blocks;
+};
+
+void mp_scaletempo2_destroy(struct mp_scaletempo2 *p);
+void mp_scaletempo2_reset(struct mp_scaletempo2 *p);
+void mp_scaletempo2_init(struct mp_scaletempo2 *p, int channels, int rate);
+int mp_scaletempo2_fill_input_buffer(struct mp_scaletempo2 *p,
+ uint8_t **planes, int frame_size, bool final);
+int mp_scaletempo2_fill_buffer(struct mp_scaletempo2 *p,
+ float **dest, int dest_size, float playback_rate);
+bool mp_scaletempo2_frames_available(struct mp_scaletempo2 *p); \ No newline at end of file