From 785a2b126152d50ca43f6ce390b3a2114a469dad Mon Sep 17 00:00:00 2001 From: Dorian Rudolph Date: Sat, 25 Jul 2020 18:02:58 +0200 Subject: audio: add scaletempo2 filter based on chromium scaletempo2 is a new audio filter for playing back audio at modified speed and is based on chromium commit 51ed77e3f37a9a9b80d6d0a8259e84a8ca635259. It sounds subjectively better than the existing implementions scaletempo and rubberband. --- audio/filter/af_scaletempo2_internals.h | 121 ++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 audio/filter/af_scaletempo2_internals.h (limited to 'audio/filter/af_scaletempo2_internals.h') diff --git a/audio/filter/af_scaletempo2_internals.h b/audio/filter/af_scaletempo2_internals.h new file mode 100644 index 0000000000..3557cd3bd1 --- /dev/null +++ b/audio/filter/af_scaletempo2_internals.h @@ -0,0 +1,121 @@ +// This filter was ported from Chromium +// (https://chromium.googlesource.com/chromium/chromium/+/51ed77e3f37a9a9b80d6d0a8259e84a8ca635259/media/filters/audio_renderer_algorithm.cc) +// +// Copyright 2015 The Chromium Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "common/common.h" + +struct mp_scaletempo2_opts { + // Max/min supported playback rates for fast/slow audio. Audio outside of these + // ranges are muted. + // Audio at these speeds would sound better under a frequency domain algorithm. + float min_playback_rate; + float max_playback_rate; + // Overlap-and-add window size in milliseconds. + float ola_window_size_ms; + // Size of search interval in milliseconds. The search interval is + // [-delta delta] around |output_index| * |playback_rate|. So the search + // interval is 2 * delta. + float wsola_search_interval_ms; +}; + +struct mp_scaletempo2 { + struct mp_scaletempo2_opts *opts; + // Number of channels in audio stream. + int channels; + // Sample rate of audio stream. + int samples_per_second; + // If muted, keep track of partial frames that should have been skipped over. + double muted_partial_frame; + // Book keeping of the current time of generated audio, in frames. This + // should be appropriately updated when out samples are generated, regardless + // of whether we push samples out when fill_buffer() is called or we store + // audio in |wsola_output| for the subsequent calls to fill_buffer(). + // Furthermore, if samples from |audio_buffer| are evicted then this + // member variable should be updated based on |playback_rate|. + // Note that this member should be updated ONLY by calling update_output_time(), + // so that |search_block_index| is update accordingly. + double output_time; + // The offset of the center frame of |search_block| w.r.t. its first frame. + int search_block_center_offset; + // Index of the beginning of the |search_block|, in frames. + int search_block_index; + // Number of Blocks to search to find the most similar one to the target + // frame. + int num_candidate_blocks; + // Index of the beginning of the target block, counted in frames. + int target_block_index; + // Overlap-and-add window size in frames. + int ola_window_size; + // The hop size of overlap-and-add in frames. This implementation assumes 50% + // overlap-and-add. + int ola_hop_size; + // Number of frames in |wsola_output| that overlap-and-add is completed for + // them and can be copied to output if fill_buffer() is called. It also + // specifies the index where the next WSOLA window has to overlap-and-add. + int num_complete_frames; + // Overlap-and-add window. + float *ola_window; + // Transition window, used to update |optimal_block| by a weighted sum of + // |optimal_block| and |target_block|. + float *transition_window; + // This stores a part of the output that is created but couldn't be rendered. + // Output is generated frame-by-frame which at some point might exceed the + // number of requested samples. Furthermore, due to overlap-and-add, + // the last half-window of the output is incomplete, which is stored in this + // buffer. + float **wsola_output; + int wsola_output_size; + // Auxiliary variables to avoid allocation in every iteration. + // Stores the optimal block in every iteration. This is the most + // similar block to |target_block| within |search_block| and it is + // overlap-and-added to |wsola_output|. + float **optimal_block; + // A block of data that search is performed over to find the |optimal_block|. + float **search_block; + int search_block_size; + // Stores the target block, denoted as |target| above. |search_block| is + // searched for a block (|optimal_block|) that is most similar to + // |target_block|. + float **target_block; + // Buffered audio data. + float **input_buffer; + int input_buffer_size; + int input_buffer_frames; + float *energy_candidate_blocks; +}; + +void mp_scaletempo2_destroy(struct mp_scaletempo2 *p); +void mp_scaletempo2_reset(struct mp_scaletempo2 *p); +void mp_scaletempo2_init(struct mp_scaletempo2 *p, int channels, int rate); +int mp_scaletempo2_fill_input_buffer(struct mp_scaletempo2 *p, + uint8_t **planes, int frame_size, bool final); +int mp_scaletempo2_fill_buffer(struct mp_scaletempo2 *p, + float **dest, int dest_size, float playback_rate); +bool mp_scaletempo2_frames_available(struct mp_scaletempo2 *p); \ No newline at end of file -- cgit v1.2.3