summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRudolf Polzer <divverent@xonotic.org>2012-09-10 13:03:32 +0200
committerwm4 <wm4@nowhere>2012-09-18 21:08:20 +0200
commit9343c1d3abe335a843fe30d9d9271da7c14cde05 (patch)
tree95c94c822d8b4d1fd1828cbaef53146c1428f21e
parentc32082a1a7634e8a5e0bb2551056fe64e357d395 (diff)
downloadmpv-9343c1d3abe335a843fe30d9d9271da7c14cde05.tar.bz2
mpv-9343c1d3abe335a843fe30d9d9271da7c14cde05.tar.xz
vf_dlopen: improve performance for 1-frame-for-1-frame filters
This is done by requesting a buffer from the next filter in the chain, instead of always allocating our own. This allows the next filter to e.g. ensure its own preferred memory layout.
-rw-r--r--TOOLS/vf_dlopen/Makefile7
-rw-r--r--libmpcodecs/vf_dlopen.c74
2 files changed, 59 insertions, 22 deletions
diff --git a/TOOLS/vf_dlopen/Makefile b/TOOLS/vf_dlopen/Makefile
index 2fa4e740c2..c3bd4000c4 100644
--- a/TOOLS/vf_dlopen/Makefile
+++ b/TOOLS/vf_dlopen/Makefile
@@ -5,12 +5,17 @@ OBJECTS = $(patsubst %,%.o,$(FILTERS)) $(COMMON)
HEADERS = $(wildcard *.h)
OUT = $(patsubst %,%.so,$(FILTERS))
-CFLAGS ?= -Wall -Wextra -O3 -march=native -mtune=native
+CFLAGS ?= -Wall -Wextra -O3 -march=native -mtune=native -ffast-math
CPPFLAGS += -I../../libmpcodecs
CFLAGS += -fPIC
LDFLAGS += -shared -fPIC
+ifneq ($(LTO),)
+CFLAGS += -flto
+LDFLAGS += $(CFLAGS) -flto
+endif
+
all: $(OUT)
clean:
diff --git a/libmpcodecs/vf_dlopen.c b/libmpcodecs/vf_dlopen.c
index dc55b50d52..71da4f1636 100644
--- a/libmpcodecs/vf_dlopen.c
+++ b/libmpcodecs/vf_dlopen.c
@@ -54,6 +54,9 @@ static struct vf_priv_s {
// output mp_image_t stuff
mp_image_t *outpic[FILTER_MAX_OUTCNT];
+ // generic
+ unsigned int out_cnt, out_width, out_height;
+
// multi frame output
unsigned int outbufferpos;
unsigned int outbufferlen;
@@ -111,6 +114,11 @@ static int config(struct vf_instance *vf,
return 0;
}
+ // copy away stuff to sanity island
+ vf->priv->out_cnt = vf->priv->filter.out_cnt;
+ vf->priv->out_width = vf->priv->filter.out_width;
+ vf->priv->out_height = vf->priv->filter.out_height;
+
if (vf->priv->filter.out_fmt)
vf->priv->outfmt = mp_imgfmt_from_name(bstr0(vf->priv->filter.out_fmt),
false);
@@ -135,23 +143,24 @@ static int config(struct vf_instance *vf,
"filter config wants an unsupported output format\n");
return 0;
}
- if (!vf->priv->filter.out_cnt || vf->priv->filter.out_cnt >
- FILTER_MAX_OUTCNT) {
+ if (!vf->priv->out_cnt || vf->priv->out_cnt > FILTER_MAX_OUTCNT) {
mp_msg(MSGT_VFILTER, MSGL_ERR,
"filter config wants to yield zero or too many output frames\n");
return 0;
}
- int i;
- for (i = 0; i < vf->priv->filter.out_cnt; ++i) {
- vf->priv->outpic[i] =
- alloc_mpi(vf->priv->filter.out_width, vf->priv->filter.out_height,
- vf->priv->outfmt);
- set_imgprop(&vf->priv->filter.outpic[i], vf->priv->outpic[i]);
+ if (vf->priv->out_cnt >= 2) {
+ int i;
+ for (i = 0; i < vf->priv->out_cnt; ++i) {
+ vf->priv->outpic[i] =
+ alloc_mpi(vf->priv->out_width, vf->priv->out_height,
+ vf->priv->outfmt);
+ set_imgprop(&vf->priv->filter.outpic[i], vf->priv->outpic[i]);
+ }
}
- return vf_next_config(vf, vf->priv->filter.out_width,
- vf->priv->filter.out_height,
+ return vf_next_config(vf, vf->priv->out_width,
+ vf->priv->out_height,
vf->priv->filter.out_d_width,
vf->priv->filter.out_d_height,
flags, vf->priv->outfmt);
@@ -166,10 +175,12 @@ static void uninit(struct vf_instance *vf)
DLLClose(vf->priv->dll);
vf->priv->dll = NULL;
}
- int i;
- for (i = 0; i < vf->priv->filter.out_cnt; ++i) {
- free_mp_image(vf->priv->outpic[i]);
- vf->priv->outpic[i] = NULL;
+ if (vf->priv->out_cnt >= 2) {
+ int i;
+ for (i = 0; i < vf->priv->out_cnt; ++i) {
+ free_mp_image(vf->priv->outpic[i]);
+ vf->priv->outpic[i] = NULL;
+ }
}
if (vf->priv->qbuffer) {
free(vf->priv->qbuffer);
@@ -177,6 +188,7 @@ static void uninit(struct vf_instance *vf)
}
}
+// NOTE: only called if (vf->priv->out_cnt >= 2) {
static int continue_put_image(struct vf_instance *vf)
{
int k;
@@ -235,14 +247,34 @@ static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts)
}
vf->priv->filter.inpic.pts = pts;
- int ret = vf->priv->filter.put_image(&vf->priv->filter);
- if (ret <= 0)
- return ret;
+ if (vf->priv->out_cnt >= 2) {
+ // more than one out pic
+ int ret = vf->priv->filter.put_image(&vf->priv->filter);
+ if (ret <= 0)
+ return ret;
- vf->priv->outbuffermpi = mpi;
- vf->priv->outbufferlen = ret;
- vf->priv->outbufferpos = 0;
- return continue_put_image(vf);
+ vf->priv->outbuffermpi = mpi;
+ vf->priv->outbufferlen = ret;
+ vf->priv->outbufferpos = 0;
+ return continue_put_image(vf);
+ } else {
+ // efficient case: exactly one out pic
+ mp_image_t *dmpi =
+ vf_get_image(vf->next, vf->priv->outfmt,
+ MP_IMGTYPE_TEMP,
+ MP_IMGFLAG_ACCEPT_STRIDE | MP_IMGFLAG_PREFER_ALIGNED_STRIDE,
+ vf->priv->out_width, vf->priv->out_height);
+ set_imgprop(&vf->priv->filter.outpic[0], dmpi);
+
+ int ret = vf->priv->filter.put_image(&vf->priv->filter);
+ if (ret <= 0)
+ return ret;
+
+ // pass through qscale if we can
+ vf_clone_mpi_attributes(dmpi, mpi);
+
+ return vf_next_put_image(vf, dmpi, vf->priv->filter.outpic[0].pts);
+ }
}
//===========================================================================//