From 2f69e8f93c468242c866f82b51d4725bcf7d18f2 Mon Sep 17 00:00:00 2001 From: wm4 Date: Sat, 3 Jan 2015 03:01:58 +0100 Subject: video: better pipelining with vf_vapoursynth Most of this is explained in the code comments. This change should improve performance with vapoursynth, especially if concurrent requests are used. This should change nothing if vf_vapoursynth is not in the filter chain, since non-threaded filters obviously can not asynchronously finish filtering of frames. --- player/core.h | 1 + player/main.c | 2 +- player/video.c | 27 +++++++++++++++++++--- video/filter/vf.c | 54 ++++++++++++++++++++++++++++++++++++------- video/filter/vf.h | 13 +++++++++++ video/filter/vf_vapoursynth.c | 14 +++++++++++ 6 files changed, 99 insertions(+), 12 deletions(-) diff --git a/player/core.h b/player/core.h index 4c8b067bb7..c663a046c7 100644 --- a/player/core.h +++ b/player/core.h @@ -398,6 +398,7 @@ int mp_initialize(struct MPContext *mpctx); struct MPContext *mp_create(void); void mp_destroy(struct MPContext *mpctx); void mp_print_version(struct mp_log *log, int always); +void wakeup_playloop(void *ctx); // misc.c double get_start_time(struct MPContext *mpctx); diff --git a/player/main.c b/player/main.c index e9820b2afe..c94c53225b 100644 --- a/player/main.c +++ b/player/main.c @@ -368,7 +368,7 @@ struct MPContext *mp_create(void) return mpctx; } -static void wakeup_playloop(void *ctx) +void wakeup_playloop(void *ctx) { struct MPContext *mpctx = ctx; mp_input_wakeup(mpctx->input); diff --git a/player/video.c b/player/video.c index 55daade37a..49476b7235 100644 --- a/player/video.c +++ b/player/video.c @@ -178,6 +178,8 @@ static void recreate_video_filters(struct MPContext *mpctx) vf_destroy(d_video->vfilter); d_video->vfilter = vf_new(mpctx->global); d_video->vfilter->hwdec = d_video->hwdec_info; + d_video->vfilter->wakeup_callback = wakeup_playloop; + d_video->vfilter->wakeup_callback_ctx = mpctx; vf_append_filter_list(d_video->vfilter, opts->vf_settings); @@ -432,7 +434,8 @@ static int video_filter(struct MPContext *mpctx, bool eof) return VD_ERROR; // There is already a filtered frame available. - if (vf_output_frame(vf, eof) > 0) + // If vf_needs_input() returns > 0, the filter wants input anyway. + if (vf_output_frame(vf, eof) > 0 && vf_needs_input(vf) < 1) return VD_PROGRESS; // Decoder output is different from filter input? @@ -493,6 +496,20 @@ static int video_decode_and_filter(struct MPContext *mpctx) return r; } +static int video_feed_async_filter(struct MPContext *mpctx) +{ + struct dec_video *d_video = mpctx->d_video; + struct vf_chain *vf = d_video->vfilter; + + if (vf->initialized < 0) + return VD_ERROR; + + if (vf_needs_input(vf) < 1) + return 0; + mpctx->sleeptime = 0; // retry until done + return video_decode_and_filter(mpctx); +} + /* Modify video timing to match the audio timeline. There are two main * reasons this is needed. First, video and audio can start from different * positions at beginning of file or after a seek (MPlayer starts both @@ -779,8 +796,12 @@ void write_video(struct MPContext *mpctx, double endpts) double time_frame = MPMAX(mpctx->time_frame, -1); int64_t pts = mp_time_us() + (int64_t)(time_frame * 1e6); - if (!vo_is_ready_for_frame(vo, pts)) - return; // wait until VO wakes us up to get more frames + // wait until VO wakes us up to get more frames + if (!vo_is_ready_for_frame(vo, pts)) { + if (video_feed_async_filter(mpctx) < 0) + goto error; + return; + } int64_t duration = -1; double diff = -1; diff --git a/video/filter/vf.c b/video/filter/vf.c index 6a7c98aef2..874a03bed8 100644 --- a/video/filter/vf.c +++ b/video/filter/vf.c @@ -419,14 +419,12 @@ int vf_filter_frame(struct vf_chain *c, struct mp_image *img) return vf_do_filter(c->first, img); } -// Output the next queued image (if any) from the full filter chain. -// The frame can be retrieved with vf_read_output_frame(). -// eof: if set, assume there's no more input i.e. vf_filter_frame() will -// not be called (until reset) - flush all internally delayed frames -// returns: -1: error, 0: no output, 1: output available -int vf_output_frame(struct vf_chain *c, bool eof) +// Similar to vf_output_frame(), but only ensure that the filter "until" has +// output, instead of the end of the filter chain. +static int vf_output_frame_until(struct vf_chain *c, struct vf_instance *until, + bool eof) { - if (c->last->num_out_queued) + if (until->num_out_queued) return 1; if (c->initialized < 1) return -1; @@ -442,10 +440,12 @@ int vf_output_frame(struct vf_chain *c, bool eof) } if (vf_has_output_frame(cur)) last = cur; + if (cur == until) + break; } if (!last) return 0; - if (!last->next) + if (last == until) return 1; int r = vf_do_filter(last->next, vf_dequeue_output_frame(last)); if (r < 0) @@ -453,6 +453,16 @@ int vf_output_frame(struct vf_chain *c, bool eof) } } +// Output the next queued image (if any) from the full filter chain. +// The frame can be retrieved with vf_read_output_frame(). +// eof: if set, assume there's no more input i.e. vf_filter_frame() will +// not be called (until reset) - flush all internally delayed frames +// returns: -1: error, 0: no output, 1: output available +int vf_output_frame(struct vf_chain *c, bool eof) +{ + return vf_output_frame_until(c, c->last, eof); +} + struct mp_image *vf_read_output_frame(struct vf_chain *c) { if (!c->last->num_out_queued) @@ -460,6 +470,34 @@ struct mp_image *vf_read_output_frame(struct vf_chain *c) return vf_dequeue_output_frame(c->last); } +// Some filters (vf_vapoursynth) filter on separate threads, and may need new +// input from the decoder, even though the core does not need a new output image +// yet (this is required to get proper pipelining in the filter). If the filter +// needs new data, it will call c->wakeup_callback, which in turn causes the +// core to recheck the filter chain, calling this function. Each filter is asked +// whether it needs a frame (with vf->needs_input), and if so, it will try to +// feed it a new frame. If this fails, it will request a new frame from the +// core by returning 1. +// returns -1: error, 0: nothing needed, 1: add new frame with vf_filter_frame() +int vf_needs_input(struct vf_chain *c) +{ + struct vf_instance *prev = c->first; + for (struct vf_instance *cur = c->first; cur; cur = cur->next) { + while (cur->needs_input && cur->needs_input(cur)) { + // Get frames from preceding filters, or if there are none, + // request new frames from decoder. + int r = vf_output_frame_until(c, prev, false); + if (r < 1) + return r < 0 ? -1 : 1; + r = vf_do_filter(cur, vf_dequeue_output_frame(prev)); + if (r < 0) + return r; + } + prev = cur; + } + return 0; +} + static void vf_forget_frames(struct vf_instance *vf) { for (int n = 0; n < vf->num_out_queued; n++) diff --git a/video/filter/vf.h b/video/filter/vf.h index e7e53bc3ce..4ff8398229 100644 --- a/video/filter/vf.h +++ b/video/filter/vf.h @@ -84,6 +84,12 @@ typedef struct vf_instance { // May be called multiple times, even if the filter gives no output. int (*filter_out)(struct vf_instance *vf); + // Optional function that checks whether the filter needs additional + // input. This is for filters with asynchronous behavior: they filter + // frames in the background, and to get good pipelining behavior, new + // data should be fed, even if the playback core doesn't need any yet. + bool (*needs_input)(struct vf_instance *vf); + void (*uninit)(struct vf_instance *vf); char *label; @@ -121,6 +127,12 @@ struct vf_chain { struct MPOpts *opts; struct mpv_global *global; struct mp_hwdec_info *hwdec; + + // Call when the filter chain wants new processing (for filters with + // asynchronous behavior) - must be immutable once filters are created, + // since they are supposed to call it from foreign threads. + void (*wakeup_callback)(void *ctx); + void *wakeup_callback_ctx; }; typedef struct vf_seteq { @@ -150,6 +162,7 @@ int vf_control_any(struct vf_chain *c, int cmd, void *arg); int vf_control_by_label(struct vf_chain *c, int cmd, void *arg, bstr label); int vf_filter_frame(struct vf_chain *c, struct mp_image *img); int vf_output_frame(struct vf_chain *c, bool eof); +int vf_needs_input(struct vf_chain *c); struct mp_image *vf_read_output_frame(struct vf_chain *c); void vf_seek_reset(struct vf_chain *c); struct vf_instance *vf_append_filter(struct vf_chain *c, const char *name, diff --git a/video/filter/vf_vapoursynth.c b/video/filter/vf_vapoursynth.c index ba630f8df1..30fd992071 100644 --- a/video/filter/vf_vapoursynth.c +++ b/video/filter/vf_vapoursynth.c @@ -381,6 +381,17 @@ static int filter_out(struct vf_instance *vf) return ret; } +static bool needs_input(struct vf_instance *vf) +{ + struct vf_priv_s *p = vf->priv; + bool r = false; + pthread_mutex_lock(&p->lock); + locked_read_output(vf); + r = vf->num_out_queued < p->max_requests && locked_need_input(vf); + pthread_mutex_unlock(&p->lock); + return r; +} + static void VS_CC infiltInit(VSMap *in, VSMap *out, void **instanceData, VSNode *node, VSCore *core, const VSAPI *vsapi) { @@ -451,6 +462,8 @@ static const VSFrameRef *VS_CC infiltGetFrame(int frameno, int activationReason, if (p->num_buffered) { drain_oldest_buffered_frame(p); pthread_cond_broadcast(&p->wakeup); + if (vf->chain->wakeup_callback) + vf->chain->wakeup_callback(vf->chain->wakeup_callback_ctx); continue; } } @@ -682,6 +695,7 @@ static int vf_open(vf_instance_t *vf) vf->config = config; vf->filter_ext = filter_ext; vf->filter_out = filter_out; + vf->needs_input = needs_input; vf->query_format = query_format; vf->control = control; vf->uninit = uninit; -- cgit v1.2.3