From b9f804b566c4c528714e4ec5e63675ad7ba5fefd Mon Sep 17 00:00:00 2001 From: wm4 Date: Thu, 18 Jan 2018 14:44:20 +0100 Subject: audio: rewrite filtering glue code Use the new filtering code for audio too. --- filters/f_auto_filters.c | 90 ++++++ filters/f_auto_filters.h | 3 + filters/f_autoconvert.c | 159 +++++++++++ filters/f_autoconvert.h | 11 + filters/f_lavfi.c | 101 +++++-- filters/f_output_chain.c | 355 ++++++++++++++++++++++- filters/f_output_chain.h | 27 ++ filters/f_swresample.c | 717 +++++++++++++++++++++++++++++++++++++++++++++++ filters/f_swresample.h | 42 +++ filters/f_utils.c | 118 ++++++++ filters/f_utils.h | 6 + filters/filter.h | 5 + filters/user_filters.c | 29 ++ filters/user_filters.h | 7 + 14 files changed, 1649 insertions(+), 21 deletions(-) create mode 100644 filters/f_swresample.c create mode 100644 filters/f_swresample.h (limited to 'filters') diff --git a/filters/f_auto_filters.c b/filters/f_auto_filters.c index eac6f745ca..b9f32026d5 100644 --- a/filters/f_auto_filters.c +++ b/filters/f_auto_filters.c @@ -242,3 +242,93 @@ struct mp_filter *mp_autorotate_create(struct mp_filter *parent) return f; } + +struct aspeed_priv { + struct mp_subfilter sub; + double cur_speed; +}; + +static void aspeed_process(struct mp_filter *f) +{ + struct aspeed_priv *p = f->priv; + + if (!mp_subfilter_read(&p->sub)) + return; + + if (fabs(p->cur_speed - 1.0) < 1e-8) { + if (p->sub.filter) + MP_VERBOSE(f, "removing scaletempo\n"); + if (!mp_subfilter_drain_destroy(&p->sub)) + return; + } else if (!p->sub.filter) { + MP_VERBOSE(f, "adding scaletempo\n"); + p->sub.filter = + mp_create_user_filter(f, MP_OUTPUT_CHAIN_AUDIO, "scaletempo", NULL); + if (!p->sub.filter) { + MP_ERR(f, "could not create scaletempo filter\n"); + mp_subfilter_continue(&p->sub); + return; + } + } + + if (p->sub.filter) { + struct mp_filter_command cmd = { + .type = MP_FILTER_COMMAND_SET_SPEED, + .speed = p->cur_speed, + }; + mp_filter_command(p->sub.filter, &cmd); + } + + mp_subfilter_continue(&p->sub); +} + +static bool aspeed_command(struct mp_filter *f, struct mp_filter_command *cmd) +{ + struct aspeed_priv *p = f->priv; + + if (cmd->type == MP_FILTER_COMMAND_SET_SPEED) { + p->cur_speed = cmd->speed; + return true; + } + + return false; +} + +static void aspeed_reset(struct mp_filter *f) +{ + struct aspeed_priv *p = f->priv; + + mp_subfilter_reset(&p->sub); +} + +static void aspeed_destroy(struct mp_filter *f) +{ + struct aspeed_priv *p = f->priv; + + mp_subfilter_reset(&p->sub); + TA_FREEP(&p->sub.filter); +} + +static const struct mp_filter_info aspeed_filter = { + .name = "autoaspeed", + .priv_size = sizeof(struct aspeed_priv), + .command = aspeed_command, + .process = aspeed_process, + .reset = aspeed_reset, + .destroy = aspeed_destroy, +}; + +struct mp_filter *mp_autoaspeed_create(struct mp_filter *parent) +{ + struct mp_filter *f = mp_filter_create(parent, &aspeed_filter); + if (!f) + return NULL; + + struct aspeed_priv *p = f->priv; + p->cur_speed = 1.0; + + p->sub.in = mp_filter_add_pin(f, MP_PIN_IN, "in"); + p->sub.out = mp_filter_add_pin(f, MP_PIN_OUT, "out"); + + return f; +} diff --git a/filters/f_auto_filters.h b/filters/f_auto_filters.h index 5f1a99f636..98043c9301 100644 --- a/filters/f_auto_filters.h +++ b/filters/f_auto_filters.h @@ -8,3 +8,6 @@ struct mp_filter *mp_deint_create(struct mp_filter *parent); // Rotate according to mp_image.rotate and VO capabilities. struct mp_filter *mp_autorotate_create(struct mp_filter *parent); + +// Insert a filter that inserts scaletempo depending on speed settings. +struct mp_filter *mp_autoaspeed_create(struct mp_filter *parent); diff --git a/filters/f_autoconvert.c b/filters/f_autoconvert.c index 687a846ae5..ce9d82cbc2 100644 --- a/filters/f_autoconvert.c +++ b/filters/f_autoconvert.c @@ -1,5 +1,8 @@ #include "config.h" +#include "audio/aframe.h" +#include "audio/chmap_sel.h" +#include "audio/format.h" #include "common/common.h" #include "common/msg.h" #include "video/hwdec.h" @@ -7,6 +10,7 @@ #include "f_autoconvert.h" #include "f_hwtransfer.h" +#include "f_swresample.h" #include "f_swscale.h" #include "f_utils.h" #include "filter.h" @@ -29,6 +33,18 @@ struct priv { // sws state int in_imgfmt, in_subfmt; + int *afmts; + int num_afmts; + int *srates; + int num_srates; + struct mp_chmap_sel chmaps; + + int in_afmt, in_srate; + struct mp_chmap in_chmap; + + double audio_speed; + bool resampling_forced; + struct mp_autoconvert public; }; @@ -56,6 +72,10 @@ void mp_autoconvert_clear(struct mp_autoconvert *c) struct priv *p = c->f->priv; p->num_imgfmts = 0; + p->num_afmts = 0; + p->num_srates = 0; + p->chmaps = (struct mp_chmap_sel){0}; + p->force_update = true; } void mp_autoconvert_add_imgfmt(struct mp_autoconvert *c, int imgfmt, int subfmt) @@ -110,6 +130,33 @@ void mp_autoconvert_add_vo_hwdec_subfmts(struct mp_autoconvert *c, p->vo_convert = true; } +void mp_autoconvert_add_afmt(struct mp_autoconvert *c, int afmt) +{ + struct priv *p = c->f->priv; + + MP_TARRAY_APPEND(p, p->afmts, p->num_afmts, afmt); + p->force_update = true; +} + +void mp_autoconvert_add_chmap(struct mp_autoconvert *c, struct mp_chmap *chmap) +{ + struct priv *p = c->f->priv; + + mp_chmap_sel_add_map(&p->chmaps, chmap); + p->force_update = true; +} + +void mp_autoconvert_add_srate(struct mp_autoconvert *c, int rate) +{ + struct priv *p = c->f->priv; + + MP_TARRAY_APPEND(p, p->srates, p->num_srates, rate); + // Some other API we call expects a 0-terminated sample rates array. + MP_TARRAY_GROW(p, p->srates, p->num_srates); + p->srates[p->num_srates] = 0; + p->force_update = true; +} + static void handle_video_frame(struct mp_filter *f) { struct priv *p = f->priv; @@ -227,6 +274,94 @@ static void handle_video_frame(struct mp_filter *f) mp_subfilter_continue(&p->sub); } +static void handle_audio_frame(struct mp_filter *f) +{ + struct priv *p = f->priv; + + struct mp_frame frame = p->sub.frame; + if (frame.type != MP_FRAME_AUDIO) { + MP_ERR(p, "audio input required!\n"); + mp_filter_internal_mark_failed(f); + return; + } + + struct mp_aframe *aframe = frame.data; + + int afmt = mp_aframe_get_format(aframe); + int srate = mp_aframe_get_rate(aframe); + struct mp_chmap chmap = {0}; + mp_aframe_get_chmap(aframe, &chmap); + + if (afmt == p->in_afmt && srate == p->in_srate && + mp_chmap_equals(&chmap, &p->in_chmap) && + (!p->resampling_forced || p->sub.filter) && + !p->force_update) + { + goto cont; + } + + if (!mp_subfilter_drain_destroy(&p->sub)) + return; + + p->in_afmt = afmt; + p->in_srate = srate; + p->in_chmap = chmap; + p->force_update = false; + + int out_afmt = 0; + int best_score = 0; + for (int n = 0; n < p->num_afmts; n++) { + int score = af_format_conversion_score(p->afmts[n], afmt); + if (!out_afmt || score > best_score) { + best_score = score; + out_afmt = p->afmts[n]; + } + } + if (!out_afmt) + out_afmt = afmt; + + // (The p->srates array is 0-terminated already.) + int out_srate = af_select_best_samplerate(srate, p->srates); + if (out_srate <= 0) + out_srate = p->num_srates ? p->srates[0] : srate; + + struct mp_chmap out_chmap = chmap; + if (p->chmaps.num_chmaps) { + if (!mp_chmap_sel_adjust(&p->chmaps, &out_chmap)) + out_chmap = p->chmaps.chmaps[0]; // violently force fallback + } + + if (out_afmt == p->in_afmt && out_srate == p->in_srate && + mp_chmap_equals(&out_chmap, &p->in_chmap) && !p->resampling_forced) + { + goto cont; + } + + MP_VERBOSE(p, "inserting resampler\n"); + + struct mp_swresample *s = mp_swresample_create(f, NULL); + if (!s) + abort(); + + s->out_format = out_afmt; + s->out_rate = out_srate; + s->out_channels = out_chmap; + + p->sub.filter = s->f; + +cont: + + if (p->sub.filter) { + struct mp_filter_command cmd = { + .type = MP_FILTER_COMMAND_SET_SPEED_RESAMPLE, + .speed = p->audio_speed, + }; + mp_filter_command(p->sub.filter, &cmd); + } + + mp_subfilter_continue(&p->sub); +} + static void process(struct mp_filter *f) { struct priv *p = f->priv; @@ -241,11 +376,33 @@ static void process(struct mp_filter *f) handle_video_frame(f); return; } + if (p->num_afmts || p->num_srates || p->chmaps.num_chmaps || + p->resampling_forced) + { + handle_audio_frame(f); + return; + } } mp_subfilter_continue(&p->sub); } +static bool command(struct mp_filter *f, struct mp_filter_command *cmd) +{ + struct priv *p = f->priv; + + if (cmd->type == MP_FILTER_COMMAND_SET_SPEED_RESAMPLE) { + p->audio_speed = cmd->speed; + // If we needed resampling once, keep forcing resampling, as it might be + // quickly changing between 1.0 and other values for A/V compensation. + if (p->audio_speed != 1.0) + p->resampling_forced = true; + return true; + } + + return false; +} + static void reset(struct mp_filter *f) { struct priv *p = f->priv; @@ -265,6 +422,7 @@ static const struct mp_filter_info autoconvert_filter = { .name = "autoconvert", .priv_size = sizeof(struct priv), .process = process, + .command = command, .reset = reset, .destroy = destroy, }; @@ -281,6 +439,7 @@ struct mp_autoconvert *mp_autoconvert_create(struct mp_filter *parent) struct priv *p = f->priv; p->public.f = f; p->log = f->log; + p->audio_speed = 1.0; p->sub.in = f->ppins[0]; p->sub.out = f->ppins[1]; diff --git a/filters/f_autoconvert.h b/filters/f_autoconvert.h index 72af21a0df..77e07aecf1 100644 --- a/filters/f_autoconvert.h +++ b/filters/f_autoconvert.h @@ -29,6 +29,17 @@ struct mp_hwdec_devices; void mp_autoconvert_add_vo_hwdec_subfmts(struct mp_autoconvert *c, struct mp_hwdec_devices *devs); +// Add afmt (an AF_FORMAT_* value) as allowed audio format. +// See mp_autoconvert_add_imgfmt() for other remarks. +void mp_autoconvert_add_afmt(struct mp_autoconvert *c, int afmt); + +// Add allowed audio channel configuration. +struct mp_chmap; +void mp_autoconvert_add_chmap(struct mp_autoconvert *c, struct mp_chmap *chmap); + +// Add allowed audio sample rate. +void mp_autoconvert_add_srate(struct mp_autoconvert *c, int rate); + // Reset set of allowed formats back to initial state. (This does not flush // any frames or remove currently active filters, although to get reasonable // behavior, you need to readd all previously allowed formats, or reset the diff --git a/filters/f_lavfi.c b/filters/f_lavfi.c index a97f126efb..b3f74b508b 100644 --- a/filters/f_lavfi.c +++ b/filters/f_lavfi.c @@ -71,6 +71,8 @@ struct lavfi { // linked. bool initialized; + bool warned_nospeed; + // Graph is draining to either handle format changes (if input format // changes for one pad, recreate the graph after draining all buffered // frames), or undo previously sent EOF (libavfilter does not accept @@ -597,6 +599,15 @@ static bool feed_input_pads(struct lavfi *c) continue; } + if (pad->pending.type == MP_FRAME_AUDIO && !c->warned_nospeed) { + struct mp_aframe *aframe = pad->pending.data; + if (mp_aframe_get_speed(aframe) != 1.0) { + MP_ERR(c, "speed changing filters before libavfilter are not " + "supported and can cause desyncs\n"); + c->warned_nospeed = true; + } + } + AVFrame *frame = mp_frame_to_av(pad->pending, &pad->timebase); bool eof = pad->pending.type == MP_FRAME_EOF; @@ -853,6 +864,7 @@ struct mp_lavfi *mp_lavfi_create_filter(struct mp_filter *parent, struct lavfi_user_opts { bool is_bridge; + enum mp_frame_type type; char *graph; char **avopts; @@ -861,62 +873,109 @@ struct lavfi_user_opts { char **filter_opts; }; -static struct mp_filter *vf_lavfi_create(struct mp_filter *parent, void *options) +static struct mp_filter *lavfi_create(struct mp_filter *parent, void *options) { struct lavfi_user_opts *opts = options; struct mp_lavfi *l; if (opts->is_bridge) { - l = mp_lavfi_create_filter(parent, MP_FRAME_VIDEO, true, - opts->avopts, opts->filter_name, - opts->filter_opts); + l = mp_lavfi_create_filter(parent, opts->type, true, opts->avopts, + opts->filter_name, opts->filter_opts); } else { - l = mp_lavfi_create_graph(parent, MP_FRAME_VIDEO, true, + l = mp_lavfi_create_graph(parent, opts->type, true, opts->avopts, opts->graph); } talloc_free(opts); return l ? l->f : NULL; } -static bool is_single_video_only(const AVFilterPad *pads) +static bool is_single_media_only(const AVFilterPad *pads, int media_type) { int count = avfilter_pad_count(pads); if (count != 1) return false; - return avfilter_pad_get_type(pads, 0) == AVMEDIA_TYPE_VIDEO; + return avfilter_pad_get_type(pads, 0) == media_type; } // Does it have exactly one video input and one video output? -static bool is_usable(const AVFilter *filter) +static bool is_usable(const AVFilter *filter, int media_type) { - return is_single_video_only(filter->inputs) && - is_single_video_only(filter->outputs); + return is_single_media_only(filter->inputs, media_type) && + is_single_media_only(filter->outputs, media_type); } -static void print_help(struct mp_log *log) +static void print_help(struct mp_log *log, int mediatype, char *name, char *ex) { mp_info(log, "List of libavfilter filters:\n"); for (const AVFilter *filter = avfilter_next(NULL); filter; filter = avfilter_next(filter)) { - if (is_usable(filter)) + if (is_usable(filter, mediatype)) mp_info(log, " %-16s %s\n", filter->name, filter->description); } mp_info(log, "\n" - "This lists video->video filters only. Refer to\n" + "This lists %s->%s filters only. Refer to\n" "\n" " https://ffmpeg.org/ffmpeg-filters.html\n" "\n" "to see how to use each filter and what arguments each filter takes.\n" "Also, be sure to quote the FFmpeg filter string properly, e.g.:\n" "\n" - " \"--vf=lavfi=[gradfun=20:30]\"\n" + " \"%s\"\n" "\n" "Otherwise, mpv and libavfilter syntax will conflict.\n" - "\n"); + "\n", name, name, ex); +} + +static void print_help_v(struct mp_log *log) +{ + print_help(log, AVMEDIA_TYPE_VIDEO, "video", "--vf=lavfi=[gradfun=20:30]"); +} + +static void print_help_a(struct mp_log *log) +{ + print_help(log, AVMEDIA_TYPE_AUDIO, "audio", "--af=lavfi=[volume=0.5]"); } #define OPT_BASE_STRUCT struct lavfi_user_opts +const struct mp_user_filter_entry af_lavfi = { + .desc = { + .description = "libavfilter bridge", + .name = "lavfi", + .priv_size = sizeof(OPT_BASE_STRUCT), + .options = (const m_option_t[]){ + OPT_STRING("graph", graph, M_OPT_MIN, .min = 1), + OPT_KEYVALUELIST("o", avopts, 0), + {0} + }, + .priv_defaults = &(const OPT_BASE_STRUCT){ + .type = MP_FRAME_AUDIO, + }, + .print_help = print_help_a, + }, + .create = lavfi_create, +}; + +const struct mp_user_filter_entry af_lavfi_bridge = { + .desc = { + .description = "libavfilter bridge (explicit options)", + .name = "lavfi-bridge", + .priv_size = sizeof(OPT_BASE_STRUCT), + .options = (const m_option_t[]){ + OPT_STRING("name", filter_name, M_OPT_MIN, .min = 1), + OPT_KEYVALUELIST("opts", filter_opts, 0), + OPT_KEYVALUELIST("o", avopts, 0), + {0} + }, + .priv_defaults = &(const OPT_BASE_STRUCT){ + .is_bridge = true, + .type = MP_FRAME_AUDIO, + }, + .print_help = print_help_a, + }, + .create = lavfi_create, +}; + const struct mp_user_filter_entry vf_lavfi = { .desc = { .description = "libavfilter bridge", @@ -927,9 +986,12 @@ const struct mp_user_filter_entry vf_lavfi = { OPT_KEYVALUELIST("o", avopts, 0), {0} }, - .print_help = print_help, + .priv_defaults = &(const OPT_BASE_STRUCT){ + .type = MP_FRAME_VIDEO, + }, + .print_help = print_help_v, }, - .create = vf_lavfi_create, + .create = lavfi_create, }; const struct mp_user_filter_entry vf_lavfi_bridge = { @@ -945,8 +1007,9 @@ const struct mp_user_filter_entry vf_lavfi_bridge = { }, .priv_defaults = &(const OPT_BASE_STRUCT){ .is_bridge = true, + .type = MP_FRAME_VIDEO, }, - .print_help = print_help, + .print_help = print_help_v, }, - .create = vf_lavfi_create, + .create = lavfi_create, }; diff --git a/filters/f_output_chain.c b/filters/f_output_chain.c index d98c7ca4b3..3cbbaa2ccb 100644 --- a/filters/f_output_chain.c +++ b/filters/f_output_chain.c @@ -1,3 +1,5 @@ +#include "audio/aframe.h" +#include "audio/out/ao.h" #include "common/global.h" #include "options/m_config.h" #include "options/m_option.h" @@ -41,6 +43,22 @@ struct chain { struct mp_autoconvert *convert; struct vo *vo; + struct ao *ao; + + struct mp_frame pending_input; + + // Some chain types (MP_OUTPUT_CHAIN_AUDIO) require draining the entire + // filter chain on format changes and further complex actions: + // 0: normal filtering + // 1: input changed, flushing out remaining frames from current filters + // 2: flushing finished + // 3: sent new frame through chain for format probing + // 4: sent EOF through chain for format probing + // 5: received format probing frame; now waiting for API user to call + // mp_output_chain_set_ao(). + int format_change_phase; + // True if it's a second run trying to see if downmix can be moved up. + bool format_change_second_try; struct mp_output_chain public; }; @@ -60,13 +78,20 @@ struct mp_user_filter { char *name; bool is_output_converter; bool is_input; + bool is_channelremix; struct mp_image_params last_out_params; + struct mp_aframe *last_out_aformat; + + int64_t last_in_pts, last_out_pts; bool failed; bool error_eof_sent; }; +static void recheck_channelremix_filter(struct chain *p); +static void remove_channelremix_filter(struct chain *p); + static void update_output_caps(struct chain *p) { if (p->type != MP_OUTPUT_CHAIN_VIDEO) @@ -119,6 +144,25 @@ static bool check_out_format_change(struct mp_user_filter *u, } } + if (frame.type == MP_FRAME_AUDIO) { + struct mp_aframe *aframe = frame.data; + + if (!mp_aframe_config_equals(aframe, u->last_out_aformat)) { + MP_VERBOSE(p, "[%s] %s\n", u->name, + mp_aframe_format_str(aframe)); + mp_aframe_config_copy(u->last_out_aformat, aframe); + + if (u->is_input) { + mp_aframe_config_copy(p->public.input_aformat, aframe); + } else if (u->is_output_converter) { + mp_aframe_config_copy(p->public.output_aformat, aframe); + } + + p->public.reconfig_happened = true; + changed = true; + } + } + return changed; } @@ -137,12 +181,15 @@ static void process_user(struct mp_filter *f) MP_FATAL(p, "Cannot convert decoder/filter output to any format " "supported by the output.\n"); p->public.failed_output_conversion = true; + p->format_change_phase = 0; mp_filter_wakeup(p->f); } else { MP_ERR(p, "Disabling filter %s because it has failed.\n", name); mp_filter_reset(u->f); // clear out staled buffered data } u->failed = true; + if (p->format_change_phase) + p->format_change_phase = 2; // redo without it } if (u->failed) { @@ -159,12 +206,35 @@ static void process_user(struct mp_filter *f) return; } - mp_pin_transfer_data(u->f->pins[0], f->ppins[0]); + if (mp_pin_can_transfer_data(u->f->pins[0], f->ppins[0])) { + struct mp_frame frame = mp_pin_out_read(f->ppins[0]); + + double pts = mp_frame_get_pts(frame); + if (pts != MP_NOPTS_VALUE) + u->last_in_pts = pts; + + mp_pin_in_write(u->f->pins[0], frame); + } if (mp_pin_can_transfer_data(f->ppins[1], u->f->pins[1])) { struct mp_frame frame = mp_pin_out_read(u->f->pins[1]); - check_out_format_change(u, frame); + bool changed = check_out_format_change(u, frame); + if (p->type == MP_OUTPUT_CHAIN_AUDIO && (!p->ao || changed) && + u->is_input && !p->format_change_phase) + { + // Format changed -> block filtering, start draining current filters. + MP_VERBOSE(p, "format changed, draining filter chain\n"); + mp_frame_unref(&p->pending_input); + p->pending_input = frame; + p->format_change_phase = 1; + mp_pin_in_write(f->ppins[1], MP_EOF_FRAME); + return; + } + + double pts = mp_frame_get_pts(frame); + if (pts != MP_NOPTS_VALUE) + u->last_out_pts = pts; mp_pin_in_write(f->ppins[1], frame); } @@ -175,6 +245,7 @@ static void reset_user(struct mp_filter *f) struct mp_user_filter *u = f->priv; u->error_eof_sent = false; + u->last_in_pts = u->last_out_pts = MP_NOPTS_VALUE; } static void destroy_user(struct mp_filter *f) @@ -203,6 +274,7 @@ static struct mp_user_filter *create_wrapper_filter(struct chain *p) struct mp_user_filter *wrapper = f->priv; wrapper->wrapper = f; wrapper->p = p; + wrapper->last_out_aformat = talloc_steal(wrapper, mp_aframe_create()); mp_filter_add_pin(f, MP_PIN_IN, "in"); mp_filter_add_pin(f, MP_PIN_OUT, "out"); return wrapper; @@ -237,10 +309,100 @@ static void relink_filter_list(struct chain *p) } } +// Special logic for draining on format changes (for audio). Never used or +// initiated video. +static void process_format_change(struct mp_filter *f) +{ + struct chain *p = f->priv; + + if (mp_pin_in_needs_data(p->filters_in)) { + if (p->format_change_phase == 2) { + MP_VERBOSE(p, "probing new format\n"); + // Clear any old state. + if (!p->format_change_second_try) { + mp_autoconvert_clear(p->convert); + remove_channelremix_filter(p); + } + for (int n = 0; n < p->num_all_filters; n++) + mp_filter_reset(p->all_filters[n]->f); + // Filter a copy of the new input frame to see what comes out. + struct mp_frame frame = mp_frame_ref(p->pending_input); + if (!frame.type) + abort(); + mp_pin_in_write(p->filters_in, frame); + mp_pin_out_request_data(p->filters_out); + p->format_change_phase = 3; + } else if (p->format_change_phase == 3) { + MP_VERBOSE(p, "probing new format (drain)\n"); + mp_pin_in_write(p->filters_in, MP_EOF_FRAME); + p->format_change_phase = 4; + } + } + + if (mp_pin_can_transfer_data(f->ppins[1], p->filters_out)) { + struct mp_frame frame = mp_pin_out_read(p->filters_out); + + if (frame.type == MP_FRAME_EOF) { + // We're apparently draining for a format change, and we got EOF + // from the chain, which means we're done draining. + if (p->format_change_phase == 1) { + MP_VERBOSE(p, "done format change draining\n"); + // Then we need to start probing the new format. + p->format_change_phase = 2; + mp_pin_out_request_data(p->filters_out); + } else if (!p->public.failed_output_conversion) { + MP_ERR(p, "we didn't get an output frame? (broken filter?)\n"); + } + mp_filter_internal_mark_progress(f); + return; + } + + if (p->format_change_phase >= 2) { + // We were filtering a "test" frame to probe the format. Now + // that we have it (apparently), just discard it, and make the + // user aware of the previously grabbed format. + MP_VERBOSE(p, "got output format from probing\n"); + mp_frame_unref(&frame); + for (int n = 0; n < p->num_all_filters; n++) + mp_filter_reset(p->all_filters[n]->f); + if (p->format_change_second_try) { + p->format_change_second_try = false; + p->format_change_phase = 0; + recheck_channelremix_filter(p); + } else { + p->ao = NULL; + p->public.ao_needs_update = true; + p->format_change_phase = 5; + } + // Do something silly to ensure the f_output_chain user gets + // notified properly. + mp_filter_wakeup(f); + return; + } + + // Draining remaining data. + mp_pin_in_write(f->ppins[1], frame); + } +} + + static void process(struct mp_filter *f) { struct chain *p = f->priv; + if (p->format_change_phase) { + process_format_change(f); + return; + } + + // Send remaining input from previous format change. + if (p->pending_input.type) { + if (mp_pin_in_needs_data(p->filters_in)) { + mp_pin_in_write(p->filters_in, p->pending_input); + p->pending_input = MP_NO_FRAME; + } + } + if (mp_pin_can_transfer_data(p->filters_in, f->ppins[0])) { struct mp_frame frame = mp_pin_out_read(f->ppins[0]); @@ -268,6 +430,14 @@ static void reset(struct mp_filter *f) { struct chain *p = f->priv; + // (if format initialization was in progress, this can be repeated next time) + mp_frame_unref(&p->pending_input); + p->format_change_phase = 0; + if (p->format_change_second_try) + remove_channelremix_filter(p); + p->format_change_second_try = false; + p->public.ao_needs_update = false; + p->public.got_input_eof = false; p->public.got_output_eof = false; } @@ -322,6 +492,120 @@ void mp_output_chain_set_vo(struct mp_output_chain *c, struct vo *vo) update_output_caps(p); } +// If there are any user filters, and they don't affect the channel config, +// then move upmix/downmix to the start of the chain. +static void maybe_move_up_channelremix(struct chain *p, struct mp_chmap *final) +{ + assert(p->num_all_filters >= 2); // at least in/convert filters + struct mp_user_filter *first = p->all_filters[0]; // "in" pseudo filter + struct mp_chmap in = {0}; + mp_aframe_get_chmap(first->last_out_aformat, &in); + if (mp_chmap_is_unknown(&in)) + return; + mp_chmap_reorder_to_lavc(&in); + if (!mp_chmap_is_valid(&in) || mp_chmap_equals_reordered(&in, final)) + return; + for (int n = 0; n < p->num_all_filters; n++) { + struct mp_user_filter *u = p->all_filters[n]; + struct mp_chmap chmap = {0}; + mp_aframe_get_chmap(u->last_out_aformat, &chmap); + if (!mp_chmap_equals_reordered(&in, &chmap)) + return; // some remix going in + } + + if (!p->num_user_filters) + return; // would be a NOP + + MP_VERBOSE(p, "trying with channel remixing moved to start of chain\n"); + + struct mp_user_filter *remix = create_wrapper_filter(p); + struct mp_autoconvert *convert = mp_autoconvert_create(remix->wrapper); + if (!convert) + abort(); + mp_autoconvert_add_chmap(convert, final); + remix->name = "channelremix"; + remix->f = convert->f; + remix->is_channelremix = true; + MP_TARRAY_APPEND(p, p->pre_filters, p->num_pre_filters, remix); + relink_filter_list(p); + + // now run the scary state machine again in order to see what filters do + // with the remixed channel data and if it was a good idea + p->format_change_phase = 2; + p->format_change_second_try = true; +} + +static void remove_channelremix_filter(struct chain *p) +{ + for (int n = 0; n < p->num_pre_filters; n++) { + struct mp_user_filter *u = p->pre_filters[n]; + if (u->is_channelremix) { + MP_TARRAY_REMOVE_AT(p->pre_filters, p->num_pre_filters, n); + talloc_free(u->wrapper); + relink_filter_list(p); + break; + } + } +} + +static void recheck_channelremix_filter(struct chain *p) +{ + struct mp_chmap in = {0}; + int start = -1; + for (int n = 0; n < p->num_all_filters; n++) { + struct mp_user_filter *u = p->all_filters[n]; + if (u->is_channelremix) { + mp_aframe_get_chmap(u->last_out_aformat, &in); + start = n; + break; + } + } + + if (start < 0 || !mp_chmap_is_valid(&in)) + goto remove; + + for (int n = start; n < p->num_all_filters; n++) { + struct mp_user_filter *u = p->all_filters[n]; + struct mp_chmap chmap = {0}; + mp_aframe_get_chmap(u->last_out_aformat, &chmap); + if (!mp_chmap_equals_reordered(&in, &chmap)) + goto remove; + } + + return; +remove: + MP_VERBOSE(p, "reverting moved up channel remixing\n"); + remove_channelremix_filter(p); +} + +void mp_output_chain_set_ao(struct mp_output_chain *c, struct ao *ao) +{ + struct chain *p = c->f->priv; + + assert(p->public.ao_needs_update); // can't just call it any time + assert(p->format_change_phase == 5); + assert(!p->format_change_second_try); + + p->public.ao_needs_update = false; + p->format_change_phase = 0; + + p->ao = ao; + + int out_format = 0; + int out_rate = 0; + struct mp_chmap out_channels = {0}; + ao_get_format(p->ao, &out_rate, &out_format, &out_channels); + + mp_autoconvert_clear(p->convert); + mp_autoconvert_add_afmt(p->convert, out_format); + mp_autoconvert_add_srate(p->convert, out_rate); + mp_autoconvert_add_chmap(p->convert, &out_channels); + + maybe_move_up_channelremix(p, &out_channels); + + mp_filter_wakeup(p->f); +} + static struct mp_user_filter *find_by_label(struct chain *p, const char *label) { for (int n = 0; n < p->num_user_filters; n++) { @@ -354,6 +638,57 @@ bool mp_output_chain_command(struct mp_output_chain *c, const char *target, return mp_filter_command(f->f, cmd); } +// Set the speed on the last filter in the chain that supports it. If a filter +// supports it, reset *speed, then keep setting the speed on the other filters. +// The purpose of this is to make sure only 1 filter changes speed. +static void set_speed_any(struct mp_user_filter **filters, int num_filters, + bool resample, double *speed) +{ + for (int n = num_filters - 1; n >= 0; n--) { + assert(*speed); + struct mp_filter_command cmd = { + .type = resample ? MP_FILTER_COMMAND_SET_SPEED_RESAMPLE + : MP_FILTER_COMMAND_SET_SPEED, + .speed = *speed, + }; + if (mp_filter_command(filters[n]->f, &cmd)) + *speed = 1.0; + } +} + +void mp_output_chain_set_audio_speed(struct mp_output_chain *c, + double speed, double resample) +{ + struct chain *p = c->f->priv; + + // We always resample with the final libavresample instance. + set_speed_any(p->post_filters, p->num_post_filters, true, &resample); + + // If users have filters like "scaletempo" insert anywhere, use that, + // otherwise use the builtin ones. + set_speed_any(p->user_filters, p->num_user_filters, false, &speed); + set_speed_any(p->post_filters, p->num_post_filters, false, &speed); +} + +double mp_output_get_measured_total_delay(struct mp_output_chain *c) +{ + struct chain *p = c->f->priv; + + double delay = 0; + + for (int n = 0; n < p->num_all_filters; n++) { + struct mp_user_filter *u = p->all_filters[n]; + + if (u->last_in_pts != MP_NOPTS_VALUE && + u->last_out_pts != MP_NOPTS_VALUE) + { + delay += u->last_in_pts - u->last_out_pts; + } + } + + return delay; +} + static bool compare_filter(struct m_obj_settings *a, struct m_obj_settings *b) { if (a == b || !a || !b) @@ -509,6 +844,18 @@ static void create_video_things(struct chain *p) MP_TARRAY_APPEND(p, p->post_filters, p->num_post_filters, f); } +static void create_audio_things(struct chain *p) +{ + p->frame_type = MP_FRAME_AUDIO; + + struct mp_user_filter *f = create_wrapper_filter(p); + f->name = "userspeed"; + f->f = mp_autoaspeed_create(f->wrapper); + if (!f->f) + abort(); + MP_TARRAY_APPEND(p, p->post_filters, p->num_post_filters, f); +} + struct mp_output_chain *mp_output_chain_create(struct mp_filter *parent, enum mp_output_chain_type type) { @@ -522,6 +869,7 @@ struct mp_output_chain *mp_output_chain_create(struct mp_filter *parent, const char *log_name = NULL; switch (type) { case MP_OUTPUT_CHAIN_VIDEO: log_name = "!vf"; break; + case MP_OUTPUT_CHAIN_AUDIO: log_name = "!af"; break; } if (log_name) f->log = mp_log_new(f, parent->global->log, log_name); @@ -533,6 +881,8 @@ struct mp_output_chain *mp_output_chain_create(struct mp_filter *parent, struct mp_output_chain *c = &p->public; c->f = f; + c->input_aformat = talloc_steal(p, mp_aframe_create()); + c->output_aformat = talloc_steal(p, mp_aframe_create()); // Dummy filter for reporting and logging the input format. p->input = create_wrapper_filter(p); @@ -545,6 +895,7 @@ struct mp_output_chain *mp_output_chain_create(struct mp_filter *parent, switch (type) { case MP_OUTPUT_CHAIN_VIDEO: create_video_things(p); break; + case MP_OUTPUT_CHAIN_AUDIO: create_audio_things(p); break; } p->output = create_wrapper_filter(p); diff --git a/filters/f_output_chain.h b/filters/f_output_chain.h index 64667ed1bd..246f84181f 100644 --- a/filters/f_output_chain.h +++ b/filters/f_output_chain.h @@ -7,6 +7,7 @@ enum mp_output_chain_type { MP_OUTPUT_CHAIN_VIDEO = 1, // --vf + MP_OUTPUT_CHAIN_AUDIO, // --af }; // A classic single-media filter chain, which reflects --vf and --af. @@ -31,6 +32,15 @@ struct mp_output_chain { struct mp_image_params input_params; struct mp_image_params output_params; double container_fps; + + // --- for type==MP_OUTPUT_CHAIN_AUDIO + struct mp_aframe *input_aformat; + struct mp_aframe *output_aformat; + // If true, there was a format change. output_aformat might have changed, + // and the implementation drained the filter chain and unset the internal ao + // reference. The API user needs to call mp_output_chain_set_ao() again. + // Until this is done, the filter chain will not output new data. + bool ao_needs_update; }; // (free by freeing mp_output_chain.f) @@ -43,6 +53,13 @@ struct mp_output_chain *mp_output_chain_create(struct mp_filter *parent, struct vo; void mp_output_chain_set_vo(struct mp_output_chain *p, struct vo *vo); +// Set the AO. The AO format will be used to determine the filter chain output. +// The API user may be asked to update the AO midstream if ao_needs_update is +// set. +// For type==MP_OUTPUT_CHAIN_AUDIO only. +struct ao; +void mp_output_chain_set_ao(struct mp_output_chain *p, struct ao *ao); + // Send a command to the filter with the target label. bool mp_output_chain_command(struct mp_output_chain *p, const char *target, struct mp_filter_command *cmd); @@ -57,3 +74,13 @@ struct m_obj_settings; bool mp_output_chain_update_filters(struct mp_output_chain *p, struct m_obj_settings *list); +// Desired audio speed, with resample being strict resampling. +void mp_output_chain_set_audio_speed(struct mp_output_chain *p, + double speed, double resample); + +// Total delay incured by the filter chain, as measured by the recent filtered +// frames. The intention is that this sums the measured delays for each filter, +// so if a filter is removed, the caller can estimate how much audio is missing +// due to the change. +// Makes sense for audio only. +double mp_output_get_measured_total_delay(struct mp_output_chain *p); diff --git a/filters/f_swresample.c b/filters/f_swresample.c new file mode 100644 index 0000000000..48bd08d847 --- /dev/null +++ b/filters/f_swresample.c @@ -0,0 +1,717 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#include +#include +#include +#include +#include + +#include "config.h" + +#include "audio/aframe.h" +#include "audio/fmt-conversion.h" +#include "audio/format.h" +#include "common/common.h" +#include "common/av_common.h" +#include "common/msg.h" +#include "options/m_config.h" +#include "options/m_option.h" + +#include "f_swresample.h" +#include "filter_internal.h" + +#define HAVE_LIBSWRESAMPLE (!HAVE_LIBAV) +#define HAVE_LIBAVRESAMPLE HAVE_LIBAV + +#if HAVE_LIBAVRESAMPLE +#include +#elif HAVE_LIBSWRESAMPLE +#include +#define AVAudioResampleContext SwrContext +#define avresample_alloc_context swr_alloc +#define avresample_open swr_init +#define avresample_close(x) do { } while(0) +#define avresample_free swr_free +#define avresample_available(x) 0 +#define avresample_convert(ctx, out, out_planesize, out_samples, in, in_planesize, in_samples) \ + swr_convert(ctx, out, out_samples, (const uint8_t**)(in), in_samples) +#define avresample_set_channel_mapping swr_set_channel_mapping +#define avresample_set_compensation swr_set_compensation +#else +#error "config.h broken or no resampler found" +#endif + +struct priv { + struct mp_log *log; + bool is_resampling; + struct AVAudioResampleContext *avrctx; + struct mp_aframe *avrctx_fmt; // output format of avrctx + struct mp_aframe *pool_fmt; // format used to allocate frames for avrctx output + struct mp_aframe *pre_out_fmt; // format before final conversion + struct AVAudioResampleContext *avrctx_out; // for output channel reordering + struct mp_resample_opts *opts; // opts requested by the user + // At least libswresample keeps a pointer around for this: + int reorder_in[MP_NUM_CHANNELS]; + int reorder_out[MP_NUM_CHANNELS]; + struct mp_aframe_pool *reorder_buffer; + struct mp_aframe_pool *out_pool; + + int in_rate_user; // user input sample rate + int in_rate; // actual rate (used by lavr), adjusted for playback speed + int in_format; + struct mp_chmap in_channels; + int out_rate; + int out_format; + struct mp_chmap out_channels; + + double current_pts; + + double cmd_speed; + double speed; + + struct mp_swresample public; +}; + +#define OPT_BASE_STRUCT struct mp_resample_opts +const struct m_sub_options resample_conf = { + .opts = (const m_option_t[]) { + OPT_INTRANGE("audio-resample-filter-size", filter_size, 0, 0, 32), + OPT_INTRANGE("audio-resample-phase-shift", phase_shift, 0, 0, 30), + OPT_FLAG("audio-resample-linear", linear, 0), + OPT_DOUBLE("audio-resample-cutoff", cutoff, M_OPT_RANGE, + .min = 0, .max = 1), + OPT_FLAG("audio-normalize-downmix", normalize, 0), + OPT_KEYVALUELIST("audio-swresample-o", avopts, 0), + {0} + }, + .size = sizeof(struct mp_resample_opts), + .defaults = &(const struct mp_resample_opts)MP_RESAMPLE_OPTS_DEF, + .change_flags = UPDATE_AUDIO, +}; + +#if HAVE_LIBAVRESAMPLE +static double get_delay(struct priv *p) +{ + return avresample_get_delay(p->avrctx) / (double)p->in_rate + + avresample_available(p->avrctx) / (double)p->out_rate; +} +static int get_out_samples(struct priv *p, int in_samples) +{ + return avresample_get_out_samples(p->avrctx, in_samples); +} +#else +static double get_delay(struct priv *p) +{ + int64_t base = p->in_rate * (int64_t)p->out_rate; + return swr_get_delay(p->avrctx, base) / (double)base; +} +static int get_out_samples(struct priv *p, int in_samples) +{ + return swr_get_out_samples(p->avrctx, in_samples); +} +#endif + +static void close_lavrr(struct priv *p) +{ + if (p->avrctx) + avresample_close(p->avrctx); + avresample_free(&p->avrctx); + if (p->avrctx_out) + avresample_close(p->avrctx_out); + avresample_free(&p->avrctx_out); + + TA_FREEP(&p->pre_out_fmt); + TA_FREEP(&p->avrctx_fmt); + TA_FREEP(&p->pool_fmt); +} + +static int rate_from_speed(int rate, double speed) +{ + return lrint(rate * speed); +} + +static struct mp_chmap fudge_pairs[][2] = { + {MP_CHMAP2(BL, BR), MP_CHMAP2(SL, SR)}, + {MP_CHMAP2(SL, SR), MP_CHMAP2(BL, BR)}, + {MP_CHMAP2(SDL, SDR), MP_CHMAP2(SL, SR)}, + {MP_CHMAP2(SL, SR), MP_CHMAP2(SDL, SDR)}, +}; + +// Modify out_layout and return the new value. The intention is reducing the +// loss libswresample's rematrixing will cause by exchanging similar, but +// strictly speaking incompatible channel pairs. For example, 7.1 should be +// changed to 7.1(wide) without dropping the SL/SR channels. (We still leave +// it to libswresample to create the remix matrix.) +static uint64_t fudge_layout_conversion(struct priv *p, + uint64_t in, uint64_t out) +{ + for (int n = 0; n < MP_ARRAY_SIZE(fudge_pairs); n++) { + uint64_t a = mp_chmap_to_lavc(&fudge_pairs[n][0]); + uint64_t b = mp_chmap_to_lavc(&fudge_pairs[n][1]); + if ((in & a) == a && (in & b) == 0 && + (out & a) == 0 && (out & b) == b) + { + out = (out & ~b) | a; + + MP_VERBOSE(p, "Fudge: %s -> %s\n", + mp_chmap_to_str(&fudge_pairs[n][0]), + mp_chmap_to_str(&fudge_pairs[n][1])); + } + } + return out; +} + +// mp_chmap_get_reorder() performs: +// to->speaker[n] = from->speaker[src[n]] +// but libavresample does: +// to->speaker[dst[n]] = from->speaker[n] +static void transpose_order(int *map, int num) +{ + int nmap[MP_NUM_CHANNELS] = {0}; + for (int n = 0; n < num; n++) { + for (int i = 0; i < num; i++) { + if (map[n] == i) + nmap[i] = n; + } + } + memcpy(map, nmap, sizeof(nmap)); +} + +static bool configure_lavrr(struct priv *p, bool verbose) +{ + close_lavrr(p); + + p->in_rate = rate_from_speed(p->in_rate_user, p->speed); + + MP_VERBOSE(p, "%dHz %s %s -> %dHz %s %s\n", + p->in_rate, mp_chmap_to_str(&p->in_channels), + af_fmt_to_str(p->in_format), + p->out_rate, mp_chmap_to_str(&p->out_channels), + af_fmt_to_str(p->out_format)); + + p->avrctx = avresample_alloc_context(); + p->avrctx_out = avresample_alloc_context(); + if (!p->avrctx || !p->avrctx_out) + goto error; + + enum AVSampleFormat in_samplefmt = af_to_avformat(p->in_format); + enum AVSampleFormat out_samplefmt = af_to_avformat(p->out_format); + enum AVSampleFormat out_samplefmtp = av_get_planar_sample_fmt(out_samplefmt); + + if (in_samplefmt == AV_SAMPLE_FMT_NONE || + out_samplefmt == AV_SAMPLE_FMT_NONE || + out_samplefmtp == AV_SAMPLE_FMT_NONE) + { + MP_ERR(p, "unsupported conversion: %s -> %s\n", + af_fmt_to_str(p->in_format), af_fmt_to_str(p->out_format)); + goto error; + } + + av_opt_set_int(p->avrctx, "filter_size", p->opts->filter_size, 0); + av_opt_set_int(p->avrctx, "phase_shift", p->opts->phase_shift, 0); + av_opt_set_int(p->avrctx, "linear_interp", p->opts->linear, 0); + + double cutoff = p->opts->cutoff; + if (cutoff <= 0.0) + cutoff = MPMAX(1.0 - 6.5 / (p->opts->filter_size + 8), 0.80); + av_opt_set_double(p->avrctx, "cutoff", cutoff, 0); + + int normalize = p->opts->normalize; +#if HAVE_LIBSWRESAMPLE + av_opt_set_double(p->avrctx, "rematrix_maxval", normalize ? 1 : 1000, 0); +#else + av_opt_set_int(p->avrctx, "normalize_mix_level", !!normalize, 0); +#endif + + if (mp_set_avopts(p->log, p->avrctx, p->opts->avopts) < 0) + goto error; + + struct mp_chmap map_in = p->in_channels; + struct mp_chmap map_out = p->out_channels; + + // Try not to do any remixing if at least one is "unknown". Some corner + // cases also benefit from disabling all channel handling logic if the + // src/dst layouts are the same (like fl-fr-na -> fl-fr-na). + if (mp_chmap_is_unknown(&map_in) || mp_chmap_is_unknown(&map_out) || + mp_chmap_equals(&map_in, &map_out)) + { + mp_chmap_set_unknown(&map_in, map_in.num); + mp_chmap_set_unknown(&map_out, map_out.num); + } + + // unchecked: don't take any channel reordering into account + uint64_t in_ch_layout = mp_chmap_to_lavc_unchecked(&map_in); + uint64_t out_ch_layout = mp_chmap_to_lavc_unchecked(&map_out); + + struct mp_chmap in_lavc, out_lavc; + mp_chmap_from_lavc(&in_lavc, in_ch_layout); + mp_chmap_from_lavc(&out_lavc, out_ch_layout); + + if (verbose && !mp_chmap_equals(&in_lavc, &out_lavc)) { + MP_VERBOSE(p, "Remix: %s -> %s\n", mp_chmap_to_str(&in_lavc), + mp_chmap_to_str(&out_lavc)); + } + + if (in_lavc.num != map_in.num) { + // For handling NA channels, we would have to add a planarization step. + MP_FATAL(p, "Unsupported input channel layout %s.\n", + mp_chmap_to_str(&map_in)); + goto error; + } + + mp_chmap_get_reorder(p->reorder_in, &map_in, &in_lavc); + transpose_order(p->reorder_in, map_in.num); + + if (mp_chmap_equals(&out_lavc, &map_out)) { + // No intermediate step required - output new format directly. + out_samplefmtp = out_samplefmt; + } else { + // Verify that we really just reorder and/or insert NA channels. + struct mp_chmap withna = out_lavc; + mp_chmap_fill_na(&withna, map_out.num); + if (withna.num != map_out.num) + goto error; + } + mp_chmap_get_reorder(p->reorder_out, &out_lavc, &map_out); + + p->pre_out_fmt = mp_aframe_create(); + mp_aframe_set_rate(p->pre_out_fmt, p->out_rate); + mp_aframe_set_chmap(p->pre_out_fmt, &p->out_channels); + mp_aframe_set_format(p->pre_out_fmt, p->out_format); + + p->avrctx_fmt = mp_aframe_create(); + mp_aframe_config_copy(p->avrctx_fmt, p->pre_out_fmt); + mp_aframe_set_chmap(p->avrctx_fmt, &out_lavc); + mp_aframe_set_format(p->avrctx_fmt, af_from_avformat(out_samplefmtp)); + + // If there are NA channels, the final output will have more channels than + // the avrctx output. Also, avrctx will output planar (out_samplefmtp was + // not overwritten). Allocate the output frame with more channels, so the + // NA channels can be trivially added. + p->pool_fmt = mp_aframe_create(); + mp_aframe_config_copy(p->pool_fmt, p->avrctx_fmt); + if (map_out.num > out_lavc.num) + mp_aframe_set_chmap(p->pool_fmt, &map_out); + + out_ch_layout = fudge_layout_conversion(p, in_ch_layout, out_ch_layout); + + // Real conversion; output is input to avrctx_out. + av_opt_set_int(p->avrctx, "in_channel_layout", in_ch_layout, 0); + av_opt_set_int(p->avrctx, "out_channel_layout", out_ch_layout, 0); + av_opt_set_int(p->avrctx, "in_sample_rate", p->in_rate, 0); + av_opt_set_int(p->avrctx, "out_sample_rate", p->out_rate, 0); + av_opt_set_int(p->avrctx, "in_sample_fmt", in_samplefmt, 0); + av_opt_set_int(p->avrctx, "out_sample_fmt", out_samplefmtp, 0); + + // Just needs the correct number of channels for deplanarization. + struct mp_chmap fake_chmap; + mp_chmap_set_unknown(&fake_chmap, map_out.num); + uint64_t fake_out_ch_layout = mp_chmap_to_lavc_unchecked(&fake_chmap); + if (!fake_out_ch_layout) + goto error; + av_opt_set_int(p->avrctx_out, "in_channel_layout", fake_out_ch_layout, 0); + av_opt_set_int(p->avrctx_out, "out_channel_layout", fake_out_ch_layout, 0); + + av_opt_set_int(p->avrctx_out, "in_sample_fmt", out_samplefmtp, 0); + av_opt_set_int(p->avrctx_out, "out_sample_fmt", out_samplefmt, 0); + av_opt_set_int(p->avrctx_out, "in_sample_rate", p->out_rate, 0); + av_opt_set_int(p->avrctx_out, "out_sample_rate", p->out_rate, 0); + + // API has weird requirements, quoting avresample.h: + // * This function can only be called when the allocated context is not open. + // * Also, the input channel layout must have already been set. + avresample_set_channel_mapping(p->avrctx, p->reorder_in); + + p->is_resampling = false; + + if (avresample_open(p->avrctx) < 0 || avresample_open(p->avrctx_out) < 0) { + MP_ERR(p, "Cannot open Libavresample context.\n"); + goto error; + } + return true; + +error: + close_lavrr(p); + mp_filter_internal_mark_failed(p->public.f); + MP_FATAL(p, "libswresample failed to initialize.\n"); + return false; +} + +static void reset(struct mp_filter *f) +{ + struct priv *p = f->priv; + + p->current_pts = MP_NOPTS_VALUE; + + if (!p->avrctx) + return; +#if HAVE_LIBSWRESAMPLE + swr_close(p->avrctx); + if (swr_init(p->avrctx) < 0) + close_lavrr(p); +#else + while (avresample_read(p->avrctx, NULL, 1000) > 0) {} +#endif +} + +static void extra_output_conversion(struct mp_aframe *mpa) +{ + int format = af_fmt_from_planar(mp_aframe_get_format(mpa)); + int num_planes = mp_aframe_get_planes(mpa); + uint8_t **planes = mp_aframe_get_data_rw(mpa); + if (!planes) + return; + for (int p = 0; p < num_planes; p++) { + void *ptr = planes[p]; + int total = mp_aframe_get_total_plane_samples(mpa); + if (format == AF_FORMAT_FLOAT) { + for (int s = 0; s < total; s++) + ((float *)ptr)[s] = av_clipf(((float *)ptr)[s], -1.0f, 1.0f); + } else if (format == AF_FORMAT_DOUBLE) { + for (int s = 0; s < total; s++) + ((double *)ptr)[s] = MPCLAMP(((double *)ptr)[s], -1.0, 1.0); + } + } +} + +// This relies on the tricky way mpa was allocated. +static bool reorder_planes(struct mp_aframe *mpa, int *reorder, + struct mp_chmap *newmap) +{ + if (!mp_aframe_set_chmap(mpa, newmap)) + return false; + + int num_planes = newmap->num; + uint8_t **planes = mp_aframe_get_data_rw(mpa); + uint8_t *old_planes[MP_NUM_CHANNELS]; + assert(num_planes <= MP_NUM_CHANNELS); + for (int n = 0; n < num_planes; n++) + old_planes[n] = planes[n]; + + int next_na = 0; + for (int n = 0; n < num_planes; n++) + next_na += newmap->speaker[n] != MP_SPEAKER_ID_NA; + + for (int n = 0; n < num_planes; n++) { + int src = reorder[n]; + assert(src >= -1 && src < num_planes); + if (src >= 0) { + planes[n] = old_planes[src]; + } else { + assert(next_na < num_planes); + planes[n] = old_planes[next_na++]; + // The NA planes were never written by avrctx, so clear them. + af_fill_silence(planes[n], + mp_aframe_get_sstride(mpa) * mp_aframe_get_size(mpa), + mp_aframe_get_format(mpa)); + } + } + + return true; +} + +static int resample_frame(struct AVAudioResampleContext *r, + struct mp_aframe *out, struct mp_aframe *in) +{ + // Be aware that the channel layout and count can be different for in and + // out frames. In some situations the caller will fix up the frames before + // or after conversion. The sample rates can also be different. + AVFrame *av_i = in ? mp_aframe_get_raw_avframe(in) : NULL; + AVFrame *av_o = out ? mp_aframe_get_raw_avframe(out) : NULL; + return avresample_convert(r, + av_o ? av_o->extended_data : NULL, + av_o ? av_o->linesize[0] : 0, + av_o ? av_o->nb_samples : 0, + av_i ? av_i->extended_data : NULL, + av_i ? av_i->linesize[0] : 0, + av_i ? av_i->nb_samples : 0); +} + +static struct mp_frame filter_resample_output(struct priv *p, + struct mp_aframe *in) +{ + struct mp_aframe *out = NULL; + + if (!p->avrctx) + goto error; + + int samples = get_out_samples(p, in ? mp_aframe_get_size(in) : 0); + out = mp_aframe_create(); + mp_aframe_config_copy(out, p->pool_fmt); + if (mp_aframe_pool_allocate(p->out_pool, out, samples) < 0) + goto error; + + int out_samples = 0; + if (samples) { + out_samples = resample_frame(p->avrctx, out, in); + if (out_samples < 0 || out_samples > samples) + goto error; + mp_aframe_set_size(out, out_samples); + } + + struct mp_chmap out_chmap; + if (!mp_aframe_get_chmap(p->pool_fmt, &out_chmap)) + goto error; + if (!reorder_planes(out, p->reorder_out, &out_chmap)) + goto error; + + if (!mp_aframe_config_equals(out, p->pre_out_fmt)) { + struct mp_aframe *new = mp_aframe_create(); + mp_aframe_config_copy(new, p->pre_out_fmt); + if (mp_aframe_pool_allocate(p->reorder_buffer, new, out_samples) < 0) { + talloc_free(new); + goto error; + } + int got = 0; + if (out_samples) + got = resample_frame(p->avrctx_out, new, out); + talloc_free(out); + out = new; + if (got != out_samples) + goto error; + } + + extra_output_conversion(out); + + if (in) { + mp_aframe_copy_attributes(out, in); + p->current_pts = mp_aframe_end_pts(in); + } + + if (out_samples) { + if (p->current_pts != MP_NOPTS_VALUE) { + double delay = get_delay(p) * mp_aframe_get_speed(out) + + mp_aframe_duration(out); + mp_aframe_set_pts(out, p->current_pts - delay); + mp_aframe_mul_speed(out, p->speed); + } + } else { + TA_FREEP(&out); + } + + return out ? MAKE_FRAME(MP_FRAME_AUDIO, out) : MP_NO_FRAME; +error: + talloc_free(out); + MP_ERR(p, "Error on resampling.\n"); + mp_filter_internal_mark_failed(p->public.f); + return MP_NO_FRAME; +} + +static void process(struct mp_filter *f) +{ + struct priv *p = f->priv; + + if (!mp_pin_can_transfer_data(f->ppins[1], f->ppins[0])) + return; + + p->speed = p->cmd_speed * p->public.speed; + + struct mp_frame frame = mp_pin_out_read(f->ppins[0]); + + struct mp_aframe *input = NULL; + if (frame.type == MP_FRAME_AUDIO) { + input = frame.data; + } else if (frame.type != MP_FRAME_EOF) { + MP_ERR(p, "Unsupported frame type.\n"); + mp_frame_unref(&frame); + mp_filter_internal_mark_failed(f); + return; + } + + if (!input && !p->avrctx) { + // Obviously no draining needed. + mp_pin_in_write(f->ppins[1], MP_EOF_FRAME); + return; + } + + if (input) { + struct mp_swresample *s = &p->public; + + int in_rate = mp_aframe_get_rate(input); + int in_format = mp_aframe_get_format(input); + struct mp_chmap in_channels = {0}; + mp_aframe_get_chmap(input, &in_channels); + + if (!in_rate || !in_format || !in_channels.num) { + MP_ERR(p, "Frame with invalid format unsupported\n"); + mp_frame_unref(&frame); + mp_filter_internal_mark_failed(f); + return; + } + + int out_rate = s->out_rate ? s->out_rate : in_rate; + int out_format = s->out_format ? s->out_format : in_format; + struct mp_chmap out_channels = + s->out_channels.num ? s->out_channels : in_channels; + + if (p->in_rate_user != in_rate || + p->in_format != in_format || + !mp_chmap_equals(&p->in_channels, &in_channels) || + p->out_rate != out_rate || + p->out_format != out_format || + !mp_chmap_equals(&p->out_channels, &out_channels) || + !p->avrctx) + { + if (p->avrctx) { + // drain remaining audio + struct mp_frame out = filter_resample_output(p, NULL); + if (out.type) { + mp_pin_in_write(f->ppins[1], out); + // continue filtering next time. + mp_pin_out_unread(f->ppins[0], frame); + input = NULL; + } + } + + MP_VERBOSE(p, "format change, reinitializing resampler\n"); + + p->in_rate_user = in_rate; + p->in_format = in_format; + p->in_channels = in_channels; + p->out_rate = out_rate; + p->out_format = out_format; + p->out_channels = out_channels; + + if (!configure_lavrr(p, true)) { + talloc_free(input); + return; + } + + if (!input) { + // continue filtering next time + mp_filter_internal_mark_progress(f); + return; + } + } + } + + int new_rate = rate_from_speed(p->in_rate_user, p->speed); + if (p->avrctx && !(!p->is_resampling && new_rate == p->in_rate)) { + AVRational r = + av_d2q(p->speed * p->in_rate_user / p->in_rate, INT_MAX / 2); + // Essentially, swr/avresample_set_compensation() does 2 things: + // - adjust output sample rate by sample_delta/compensation_distance + // - reset the adjustment after compensation_distance output samples + // Increase the compensation_distance to avoid undesired reset + // semantics - we want to keep the ratio for the whole frame we're + // feeding it, until the next filter() call. + int mult = INT_MAX / 2 / MPMAX(MPMAX(abs(r.num), abs(r.den)), 1); + r = (AVRational){ r.num * mult, r.den * mult }; + if (avresample_set_compensation(p->avrctx, r.den - r.num, r.den) >= 0) { + new_rate = p->in_rate; + p->is_resampling = true; + } + } + + bool need_reinit = fabs(new_rate / (double)p->in_rate - 1) > 0.01; + if (need_reinit && new_rate != p->in_rate) { + // Before reconfiguring, drain the audio that is still buffered + // in the resampler. + struct mp_frame out = filter_resample_output(p, NULL); + bool need_drain = !!out.type; + if (need_drain) { + mp_pin_in_write(f->ppins[1], out); + // Drain; continue filtering next time. + mp_pin_out_unread(f->ppins[0], frame); + } + // Reinitialize resampler. + configure_lavrr(p, false); + if (need_drain) { + mp_filter_internal_mark_progress(f); + return; + } + } + + struct mp_frame out = filter_resample_output(p, input); + + if (input && out.type) { + mp_pin_in_write(f->ppins[1], out); + mp_pin_out_request_data(f->ppins[0]); + } else if (!input && out.type) { + mp_pin_in_write(f->ppins[1], out); + mp_pin_out_repeat_eof(f->ppins[0]); + } else if (!input) { + mp_pin_in_write(f->ppins[1], MP_EOF_FRAME); + } + + talloc_free(input); +} + +double mp_swresample_get_delay(struct mp_swresample *s) +{ + struct priv *p = s->f->priv; + + return get_delay(p); +} + +static bool command(struct mp_filter *f, struct mp_filter_command *cmd) +{ + struct priv *p = f->priv; + + if (cmd->type == MP_FILTER_COMMAND_SET_SPEED_RESAMPLE) { + p->cmd_speed = cmd->speed; + return true; + } + + return false; +} + +static void destroy(struct mp_filter *f) +{ + struct priv *p = f->priv; + + close_lavrr(p); +} + +static const struct mp_filter_info swresample_filter = { + .name = "swresample", + .priv_size = sizeof(struct priv), + .process = process, + .command = command, + .reset = reset, + .destroy = destroy, +}; + +struct mp_swresample *mp_swresample_create(struct mp_filter *parent, + struct mp_resample_opts *opts) +{ + struct mp_filter *f = mp_filter_create(parent, &swresample_filter); + if (!f) + return NULL; + + mp_filter_add_pin(f, MP_PIN_IN, "in"); + mp_filter_add_pin(f, MP_PIN_OUT, "out"); + + struct priv *p = f->priv; + p->public.f = f; + p->public.speed = 1.0; + p->cmd_speed = 1.0; + p->log = f->log; + + if (opts) { + p->opts = talloc_dup(p, opts); + p->opts->avopts = mp_dup_str_array(p, p->opts->avopts); + } else { + p->opts = mp_get_config_group(p, f->global, &resample_conf); + } + + p->reorder_buffer = mp_aframe_pool_create(p); + p->out_pool = mp_aframe_pool_create(p); + + return &p->public; +} diff --git a/filters/f_swresample.h b/filters/f_swresample.h new file mode 100644 index 0000000000..44b2e35d08 --- /dev/null +++ b/filters/f_swresample.h @@ -0,0 +1,42 @@ +#pragma once + +#include + +#include "audio/chmap.h" +#include "filter.h" + +// Resampler filter, wrapping libswresample or libavresample. +struct mp_swresample { + struct mp_filter *f; + // Desired output parameters. For unset parameters, passes through the + // format. + int out_rate; + int out_format; + struct mp_chmap out_channels; + double speed; +}; + +struct mp_resample_opts { + int filter_size; + int phase_shift; + int linear; + double cutoff; + int normalize; + int allow_passthrough; + char **avopts; +}; + +#define MP_RESAMPLE_OPTS_DEF { \ + .filter_size = 16, \ + .cutoff = 0.0, \ + .phase_shift = 10, \ + .normalize = 0, \ + } + +// Create the filter. If opts==NULL, use the global options as defaults. +// Free with talloc_free(mp_swresample.f). +struct mp_swresample *mp_swresample_create(struct mp_filter *parent, + struct mp_resample_opts *opts); + +// Internal resampler delay. Does not include data buffered in mp_pins and such. +double mp_swresample_get_delay(struct mp_swresample *s); diff --git a/filters/f_utils.c b/filters/f_utils.c index f984a3b33a..d15d063879 100644 --- a/filters/f_utils.c +++ b/filters/f_utils.c @@ -1,3 +1,4 @@ +#include "audio/aframe.h" #include "video/mp_image.h" #include "f_utils.h" @@ -173,3 +174,120 @@ struct mp_filter *mp_bidir_nop_filter_create(struct mp_filter *parent) return f; } + +struct fixed_aframe_size_priv { + int samples; + bool pad_silence; + struct mp_aframe *in; + struct mp_aframe *out; + int out_written; // valid samples in out + struct mp_aframe_pool *pool; +}; + +static void fixed_aframe_size_process(struct mp_filter *f) +{ + struct fixed_aframe_size_priv *p = f->priv; + + if (!mp_pin_in_needs_data(f->ppins[1])) + return; + + if (p->in && !mp_aframe_get_size(p->in)) + TA_FREEP(&p->in); + + if (!p->in) { + struct mp_frame frame = mp_pin_out_read(f->ppins[0]); + if (frame.type == MP_FRAME_EOF) { + if (!p->out) { + mp_pin_in_write(f->ppins[1], frame); + return; + } + mp_pin_out_repeat_eof(f->ppins[0]); + } else if (frame.type == MP_FRAME_AUDIO) { +