From 2f69e8f93c468242c866f82b51d4725bcf7d18f2 Mon Sep 17 00:00:00 2001
From: wm4 <wm4@nowhere>
Date: Sat, 3 Jan 2015 03:01:58 +0100
Subject: video: better pipelining with vf_vapoursynth

Most of this is explained in the code comments. This change should
improve performance with vapoursynth, especially if concurrent requests
are used.

This should change nothing if vf_vapoursynth is not in the filter chain,
since non-threaded filters obviously can not asynchronously finish
filtering of frames.
---
 player/core.h                 |  1 +
 player/main.c                 |  2 +-
 player/video.c                | 27 +++++++++++++++++++---
 video/filter/vf.c             | 54 ++++++++++++++++++++++++++++++++++++-------
 video/filter/vf.h             | 13 +++++++++++
 video/filter/vf_vapoursynth.c | 14 +++++++++++
 6 files changed, 99 insertions(+), 12 deletions(-)

diff --git a/player/core.h b/player/core.h
index 4c8b067bb7..c663a046c7 100644
--- a/player/core.h
+++ b/player/core.h
@@ -398,6 +398,7 @@ int mp_initialize(struct MPContext *mpctx);
 struct MPContext *mp_create(void);
 void mp_destroy(struct MPContext *mpctx);
 void mp_print_version(struct mp_log *log, int always);
+void wakeup_playloop(void *ctx);
 
 // misc.c
 double get_start_time(struct MPContext *mpctx);
diff --git a/player/main.c b/player/main.c
index e9820b2afe..c94c53225b 100644
--- a/player/main.c
+++ b/player/main.c
@@ -368,7 +368,7 @@ struct MPContext *mp_create(void)
     return mpctx;
 }
 
-static void wakeup_playloop(void *ctx)
+void wakeup_playloop(void *ctx)
 {
     struct MPContext *mpctx = ctx;
     mp_input_wakeup(mpctx->input);
diff --git a/player/video.c b/player/video.c
index 55daade37a..49476b7235 100644
--- a/player/video.c
+++ b/player/video.c
@@ -178,6 +178,8 @@ static void recreate_video_filters(struct MPContext *mpctx)
     vf_destroy(d_video->vfilter);
     d_video->vfilter = vf_new(mpctx->global);
     d_video->vfilter->hwdec = d_video->hwdec_info;
+    d_video->vfilter->wakeup_callback = wakeup_playloop;
+    d_video->vfilter->wakeup_callback_ctx = mpctx;
 
     vf_append_filter_list(d_video->vfilter, opts->vf_settings);
 
@@ -432,7 +434,8 @@ static int video_filter(struct MPContext *mpctx, bool eof)
         return VD_ERROR;
 
     // There is already a filtered frame available.
-    if (vf_output_frame(vf, eof) > 0)
+    // If vf_needs_input() returns > 0, the filter wants input anyway.
+    if (vf_output_frame(vf, eof) > 0 && vf_needs_input(vf) < 1)
         return VD_PROGRESS;
 
     // Decoder output is different from filter input?
@@ -493,6 +496,20 @@ static int video_decode_and_filter(struct MPContext *mpctx)
     return r;
 }
 
+static int video_feed_async_filter(struct MPContext *mpctx)
+{
+    struct dec_video *d_video = mpctx->d_video;
+    struct vf_chain *vf = d_video->vfilter;
+
+    if (vf->initialized < 0)
+        return VD_ERROR;
+
+    if (vf_needs_input(vf) < 1)
+        return 0;
+    mpctx->sleeptime = 0; // retry until done
+    return video_decode_and_filter(mpctx);
+}
+
 /* Modify video timing to match the audio timeline. There are two main
  * reasons this is needed. First, video and audio can start from different
  * positions at beginning of file or after a seek (MPlayer starts both
@@ -779,8 +796,12 @@ void write_video(struct MPContext *mpctx, double endpts)
     double time_frame = MPMAX(mpctx->time_frame, -1);
     int64_t pts = mp_time_us() + (int64_t)(time_frame * 1e6);
 
-    if (!vo_is_ready_for_frame(vo, pts))
-        return; // wait until VO wakes us up to get more frames
+    // wait until VO wakes us up to get more frames
+    if (!vo_is_ready_for_frame(vo, pts)) {
+        if (video_feed_async_filter(mpctx) < 0)
+            goto error;
+        return;
+    }
 
     int64_t duration = -1;
     double diff = -1;
diff --git a/video/filter/vf.c b/video/filter/vf.c
index 6a7c98aef2..874a03bed8 100644
--- a/video/filter/vf.c
+++ b/video/filter/vf.c
@@ -419,14 +419,12 @@ int vf_filter_frame(struct vf_chain *c, struct mp_image *img)
     return vf_do_filter(c->first, img);
 }
 
-// Output the next queued image (if any) from the full filter chain.
-// The frame can be retrieved with vf_read_output_frame().
-//  eof: if set, assume there's no more input i.e. vf_filter_frame() will
-//       not be called (until reset) - flush all internally delayed frames
-//  returns: -1: error, 0: no output, 1: output available
-int vf_output_frame(struct vf_chain *c, bool eof)
+// Similar to vf_output_frame(), but only ensure that the filter "until" has
+// output, instead of the end of the filter chain.
+static int vf_output_frame_until(struct vf_chain *c, struct vf_instance *until,
+                                 bool eof)
 {
-    if (c->last->num_out_queued)
+    if (until->num_out_queued)
         return 1;
     if (c->initialized < 1)
         return -1;
@@ -442,10 +440,12 @@ int vf_output_frame(struct vf_chain *c, bool eof)
             }
             if (vf_has_output_frame(cur))
                 last = cur;
+            if (cur == until)
+                break;
         }
         if (!last)
             return 0;
-        if (!last->next)
+        if (last == until)
             return 1;
         int r = vf_do_filter(last->next, vf_dequeue_output_frame(last));
         if (r < 0)
@@ -453,6 +453,16 @@ int vf_output_frame(struct vf_chain *c, bool eof)
     }
 }
 
+// Output the next queued image (if any) from the full filter chain.
+// The frame can be retrieved with vf_read_output_frame().
+//  eof: if set, assume there's no more input i.e. vf_filter_frame() will
+//       not be called (until reset) - flush all internally delayed frames
+//  returns: -1: error, 0: no output, 1: output available
+int vf_output_frame(struct vf_chain *c, bool eof)
+{
+    return vf_output_frame_until(c, c->last, eof);
+}
+
 struct mp_image *vf_read_output_frame(struct vf_chain *c)
 {
     if (!c->last->num_out_queued)
@@ -460,6 +470,34 @@ struct mp_image *vf_read_output_frame(struct vf_chain *c)
     return vf_dequeue_output_frame(c->last);
 }
 
+// Some filters (vf_vapoursynth) filter on separate threads, and may need new
+// input from the decoder, even though the core does not need a new output image
+// yet (this is required to get proper pipelining in the filter). If the filter
+// needs new data, it will call c->wakeup_callback, which in turn causes the
+// core to recheck the filter chain, calling this function. Each filter is asked
+// whether it needs a frame (with vf->needs_input), and if so, it will try to
+// feed it a new frame. If this fails, it will request a new frame from the
+// core by returning 1.
+// returns -1: error, 0: nothing needed, 1: add new frame with vf_filter_frame()
+int vf_needs_input(struct vf_chain *c)
+{
+    struct vf_instance *prev = c->first;
+    for (struct vf_instance *cur = c->first; cur; cur = cur->next) {
+        while (cur->needs_input && cur->needs_input(cur)) {
+            // Get frames from preceding filters, or if there are none,
+            // request new frames from decoder.
+            int r = vf_output_frame_until(c, prev, false);
+            if (r < 1)
+                return r < 0 ? -1 : 1;
+            r = vf_do_filter(cur, vf_dequeue_output_frame(prev));
+            if (r < 0)
+                return r;
+        }
+        prev = cur;
+    }
+    return 0;
+}
+
 static void vf_forget_frames(struct vf_instance *vf)
 {
     for (int n = 0; n < vf->num_out_queued; n++)
diff --git a/video/filter/vf.h b/video/filter/vf.h
index e7e53bc3ce..4ff8398229 100644
--- a/video/filter/vf.h
+++ b/video/filter/vf.h
@@ -84,6 +84,12 @@ typedef struct vf_instance {
     // May be called multiple times, even if the filter gives no output.
     int (*filter_out)(struct vf_instance *vf);
 
+    // Optional function that checks whether the filter needs additional
+    // input. This is for filters with asynchronous behavior: they filter
+    // frames in the background, and to get good pipelining behavior, new
+    // data should be fed, even if the playback core doesn't need any yet.
+    bool (*needs_input)(struct vf_instance *vf);
+
     void (*uninit)(struct vf_instance *vf);
 
     char *label;
@@ -121,6 +127,12 @@ struct vf_chain {
     struct MPOpts *opts;
     struct mpv_global *global;
     struct mp_hwdec_info *hwdec;
+
+    // Call when the filter chain wants new processing (for filters with
+    // asynchronous behavior) - must be immutable once filters are created,
+    // since they are supposed to call it from foreign threads.
+    void (*wakeup_callback)(void *ctx);
+    void *wakeup_callback_ctx;
 };
 
 typedef struct vf_seteq {
@@ -150,6 +162,7 @@ int vf_control_any(struct vf_chain *c, int cmd, void *arg);
 int vf_control_by_label(struct vf_chain *c, int cmd, void *arg, bstr label);
 int vf_filter_frame(struct vf_chain *c, struct mp_image *img);
 int vf_output_frame(struct vf_chain *c, bool eof);
+int vf_needs_input(struct vf_chain *c);
 struct mp_image *vf_read_output_frame(struct vf_chain *c);
 void vf_seek_reset(struct vf_chain *c);
 struct vf_instance *vf_append_filter(struct vf_chain *c, const char *name,
diff --git a/video/filter/vf_vapoursynth.c b/video/filter/vf_vapoursynth.c
index ba630f8df1..30fd992071 100644
--- a/video/filter/vf_vapoursynth.c
+++ b/video/filter/vf_vapoursynth.c
@@ -381,6 +381,17 @@ static int filter_out(struct vf_instance *vf)
     return ret;
 }
 
+static bool needs_input(struct vf_instance *vf)
+{
+    struct vf_priv_s *p = vf->priv;
+    bool r = false;
+    pthread_mutex_lock(&p->lock);
+    locked_read_output(vf);
+    r = vf->num_out_queued < p->max_requests && locked_need_input(vf);
+    pthread_mutex_unlock(&p->lock);
+    return r;
+}
+
 static void VS_CC infiltInit(VSMap *in, VSMap *out, void **instanceData,
                              VSNode *node, VSCore *core, const VSAPI *vsapi)
 {
@@ -451,6 +462,8 @@ static const VSFrameRef *VS_CC infiltGetFrame(int frameno, int activationReason,
             if (p->num_buffered) {
                 drain_oldest_buffered_frame(p);
                 pthread_cond_broadcast(&p->wakeup);
+                if (vf->chain->wakeup_callback)
+                    vf->chain->wakeup_callback(vf->chain->wakeup_callback_ctx);
                 continue;
             }
         }
@@ -682,6 +695,7 @@ static int vf_open(vf_instance_t *vf)
     vf->config = config;
     vf->filter_ext = filter_ext;
     vf->filter_out = filter_out;
+    vf->needs_input = needs_input;
     vf->query_format = query_format;
     vf->control = control;
     vf->uninit = uninit;
-- 
cgit v1.2.3