summaryrefslogtreecommitdiffstats
path: root/audio/filter/af_scaletempo2_internals.h
blob: c95256b1990dce9748f4b12378e6a8bd0cfb3038 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
// This filter was ported from Chromium
// (https://chromium.googlesource.com/chromium/chromium/+/51ed77e3f37a9a9b80d6d0a8259e84a8ca635259/media/filters/audio_renderer_algorithm.cc)
//
// Copyright 2015 The Chromium Authors. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//    * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//    * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//    * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "common/common.h"

struct mp_scaletempo2_opts {
    // Max/min supported playback rates for fast/slow audio. Audio outside of these
    // ranges are muted.
    // Audio at these speeds would sound better under a frequency domain algorithm.
    float min_playback_rate;
    float max_playback_rate;
    // Overlap-and-add window size in milliseconds.
    float ola_window_size_ms;
    // Size of search interval in milliseconds. The search interval is
    // [-delta delta] around |output_index| * |playback_rate|. So the search
    // interval is 2 * delta.
    float wsola_search_interval_ms;
};

struct mp_scaletempo2 {
    struct mp_scaletempo2_opts *opts;
    // Number of channels in audio stream.
    int channels;
    // Sample rate of audio stream.
    int samples_per_second;
    // If muted, keep track of partial frames that should have been skipped over.
    double muted_partial_frame;
    // Book keeping of the current time of generated audio, in frames. This
    // should be appropriately updated when out samples are generated, regardless
    // of whether we push samples out when fill_buffer() is called or we store
    // audio in |wsola_output| for the subsequent calls to fill_buffer().
    // Furthermore, if samples from |audio_buffer| are evicted then this
    // member variable should be updated based on |playback_rate|.
    // Note that this member should be updated ONLY by calling update_output_time(),
    // so that |search_block_index| is update accordingly.
    double output_time;
    // The offset of the center frame of |search_block| w.r.t. its first frame.
    int search_block_center_offset;
    // Index of the beginning of the |search_block|, in frames.
    int search_block_index;
    // Number of Blocks to search to find the most similar one to the target
    // frame.
    int num_candidate_blocks;
    // Index of the beginning of the target block, counted in frames.
    int target_block_index;
    // Overlap-and-add window size in frames.
    int ola_window_size;
    // The hop size of overlap-and-add in frames. This implementation assumes 50%
    // overlap-and-add.
    int ola_hop_size;
    // Number of frames in |wsola_output| that overlap-and-add is completed for
    // them and can be copied to output if fill_buffer() is called. It also
    // specifies the index where the next WSOLA window has to overlap-and-add.
    int num_complete_frames;
    // Overlap-and-add window.
    float *ola_window;
    // Transition window, used to update |optimal_block| by a weighted sum of
    // |optimal_block| and |target_block|.
    float *transition_window;
    // This stores a part of the output that is created but couldn't be rendered.
    // Output is generated frame-by-frame which at some point might exceed the
    // number of requested samples. Furthermore, due to overlap-and-add,
    // the last half-window of the output is incomplete, which is stored in this
    // buffer.
    float **wsola_output;
    int wsola_output_size;
    // Auxiliary variables to avoid allocation in every iteration.
    // Stores the optimal block in every iteration. This is the most
    // similar block to |target_block| within |search_block| and it is
    // overlap-and-added to |wsola_output|.
    float **optimal_block;
    // A block of data that search is performed over to find the |optimal_block|.
    float **search_block;
    int search_block_size;
    // Stores the target block, denoted as |target| above. |search_block| is
    // searched for a block (|optimal_block|) that is most similar to
    // |target_block|.
    float **target_block;
    // Buffered audio data.
    float **input_buffer;
    int input_buffer_size;
    int input_buffer_frames;
    float *energy_candidate_blocks;
};

void mp_scaletempo2_destroy(struct mp_scaletempo2 *p);
void mp_scaletempo2_reset(struct mp_scaletempo2 *p);
void mp_scaletempo2_init(struct mp_scaletempo2 *p, int channels, int rate);
int mp_scaletempo2_fill_input_buffer(struct mp_scaletempo2 *p,
    uint8_t **planes, int frame_size, bool final);
int mp_scaletempo2_fill_buffer(struct mp_scaletempo2 *p,
    float **dest, int dest_size, float playback_rate);
bool mp_scaletempo2_frames_available(struct mp_scaletempo2 *p);