summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--DOCS/man/options.rst11
-rw-r--r--test/scale_zimg.c1
-rw-r--r--video/zimg.c128
-rw-r--r--video/zimg.h3
4 files changed, 123 insertions, 20 deletions
diff --git a/DOCS/man/options.rst b/DOCS/man/options.rst
index b85b67c991..95bec1c3d3 100644
--- a/DOCS/man/options.rst
+++ b/DOCS/man/options.rst
@@ -4283,6 +4283,17 @@ Software Scaler
``--zimg-dither=<no|ordered|random|error-diffusion>``
Dithering (default: random).
+``--zimg-threads=<auto|integer>``
+ Set the maximum number of threads to use for scaling (default: auto).
+ ``auto`` uses the number of logical cores on the current machine. Note that
+ the scaler may use less threads (or even just 1 thread) depending on stuff.
+ Passing a value of 1 disables threading and always scales the image in a
+ single operation. Higher thread counts waste resources, but make it
+ typically faster.
+
+ Note that some zimg git versions had bugs that will corrupt the output if
+ threads are used.
+
``--zimg-fast=<yes|no>``
Allow optimizations that help with performance, but reduce quality (default:
yes). Currently, this may simplify gamma conversion operations.
diff --git a/test/scale_zimg.c b/test/scale_zimg.c
index 5fcea20d45..f5957067c2 100644
--- a/test/scale_zimg.c
+++ b/test/scale_zimg.c
@@ -24,6 +24,7 @@ static const struct scale_test_fns fns = {
static void run(struct test_ctx *ctx)
{
struct mp_zimg_context *zimg = mp_zimg_alloc();
+ zimg->opts.threads = 1;
struct scale_test *stest = talloc_zero(NULL, struct scale_test);
stest->fns = &fns;
diff --git a/video/zimg.c b/video/zimg.c
index fb0fc23c74..a73293c8c6 100644
--- a/video/zimg.c
+++ b/video/zimg.c
@@ -17,12 +17,13 @@
#include <math.h>
-#include <libavutil/bswap.h>
-#include <libavutil/pixfmt.h>
+#include <libavutil/cpu.h>
#include "common/common.h"
#include "common/msg.h"
#include "csputils.h"
+#include "misc/thread_pool.h"
+#include "misc/thread_tools.h"
#include "options/m_config.h"
#include "options/m_option.h"
#include "repack.h"
@@ -70,6 +71,7 @@ const struct m_sub_options zimg_conf = {
{"random", ZIMG_DITHER_RANDOM},
{"error-diffusion", ZIMG_DITHER_ERROR_DIFFUSION})},
{"fast", OPT_FLAG(fast)},
+ {"threads", OPT_CHOICE(threads, {"auto", 0}), M_RANGE(1, 64)},
{0}
},
.size = sizeof(struct zimg_opts),
@@ -82,6 +84,9 @@ struct mp_zimg_state {
void *tmp_alloc;
struct mp_zimg_repack *src;
struct mp_zimg_repack *dst;
+ int slice_y, slice_h; // y start position, height of target slice
+ double scale_y;
+ struct mp_waiter thread_waiter;
};
struct mp_zimg_repack {
@@ -102,6 +107,7 @@ struct mp_zimg_repack {
// Temporary memory for zimg buffer.
zimg_image_buffer zbuf;
+ struct mp_image cropped_tmp;
int real_w, real_h; // aligned size
};
@@ -198,6 +204,7 @@ static void free_mp_zimg(void *p)
struct mp_zimg_context *ctx = p;
destroy_zimg(ctx);
+ TA_FREEP(&ctx->tp);
}
struct mp_zimg_context *mp_zimg_alloc(void)
@@ -242,11 +249,19 @@ static int repack_entrypoint(void *user, unsigned i, unsigned x0, unsigned x1)
return 0;
}
-static bool wrap_buffer(struct mp_zimg_repack *r, struct mp_image *mpi)
+static bool wrap_buffer(struct mp_zimg_state *st, struct mp_zimg_repack *r,
+ struct mp_image *a_mpi)
{
zimg_image_buffer *buf = &r->zbuf;
*buf = (zimg_image_buffer){ZIMG_API_VERSION};
+ struct mp_image *mpi = a_mpi;
+ if (r->pack) {
+ mpi = &r->cropped_tmp;
+ *mpi = *a_mpi;
+ mp_image_crop(mpi, 0, st->slice_y, mpi->w, st->slice_y + st->slice_h);
+ }
+
bool direct[MP_MAX_PLANES] = {0};
for (int p = 0; p < mpi->num_planes; p++) {
@@ -354,16 +369,27 @@ static bool setup_format(zimg_image_format *zfmt, struct mp_zimg_repack *r,
r->num_planes = desc.num_planes;
+ // Take care of input/output size, including slicing.
// Note: formats with subsampled chroma may have odd width or height in
// mpv and FFmpeg. This is because the width/height is actually a cropping
// rectangle. Reconstruct the image allocation size and set the cropping.
zfmt->width = r->real_w = MP_ALIGN_UP(fmt.w, 1 << desc.chroma_xs);
zfmt->height = r->real_h = MP_ALIGN_UP(fmt.h, 1 << desc.chroma_ys);
- if (!r->pack && st) {
- // Relies on st->dst being initialized first.
- struct mp_zimg_repack *dst = st->dst;
- zfmt->active_region.width = dst->real_w * (double)fmt.w / dst->fmt.w;
- zfmt->active_region.height = dst->real_h * (double)fmt.h / dst->fmt.h;
+ if (st) {
+ if (r->pack) {
+ zfmt->height = r->real_h = st->slice_h =
+ MPMIN(st->slice_y + st->slice_h, r->real_h) - st->slice_y;
+
+ assert(MP_IS_ALIGNED(r->real_h, 1 << desc.chroma_ys));
+ } else {
+ // Relies on st->dst being initialized first.
+ struct mp_zimg_repack *dst = st->dst;
+
+ zfmt->active_region.width = dst->real_w * (double)fmt.w / dst->fmt.w;
+ zfmt->active_region.height = dst->real_h * st->scale_y;
+
+ zfmt->active_region.top = st->slice_y * st->scale_y;
+ }
}
zfmt->subsample_w = desc.chroma_xs;
@@ -440,13 +466,13 @@ static bool allocate_buffer(struct mp_zimg_state *st, struct mp_zimg_repack *r)
// Either ZIMG_BUFFER_MAX, or a power-of-2 slice buffer.
assert(r->zmask[0] == ZIMG_BUFFER_MAX || MP_IS_POWER_OF_2(r->zmask[0] + 1));
- int h = r->zmask[0] == ZIMG_BUFFER_MAX ? r->fmt.h : r->zmask[0] + 1;
- if (h >= r->fmt.h) {
- h = r->fmt.h;
+ int h = r->zmask[0] == ZIMG_BUFFER_MAX ? r->real_h : r->zmask[0] + 1;
+ if (h >= r->real_h) {
+ h = r->real_h;
r->zmask[0] = ZIMG_BUFFER_MAX;
}
- r->tmp = mp_image_alloc(r->zimgfmt, r->fmt.w, h);
+ r->tmp = mp_image_alloc(r->zimgfmt, r->real_w, h);
talloc_steal(r, r->tmp);
if (!r->tmp)
@@ -465,13 +491,18 @@ static bool allocate_buffer(struct mp_zimg_state *st, struct mp_zimg_repack *r)
}
static bool mp_zimg_state_init(struct mp_zimg_context *ctx,
- struct mp_zimg_state *st)
+ struct mp_zimg_state *st,
+ int slice_y, int slice_h)
{
struct zimg_opts *opts = &ctx->opts;
st->src = talloc_zero(NULL, struct mp_zimg_repack);
st->dst = talloc_zero(NULL, struct mp_zimg_repack);
+ st->scale_y = ctx->src.h / (double)ctx->dst.h;
+ st->slice_y = slice_y;
+ st->slice_h = slice_h;
+
zimg_image_format src_fmt, dst_fmt;
// Note: do dst first, because src uses fields from dst.
@@ -532,15 +563,49 @@ bool mp_zimg_config(struct mp_zimg_context *ctx)
if (ctx->opts_cache)
mp_zimg_update_from_cmdline(ctx);
- struct mp_zimg_state *st = talloc_zero(NULL, struct mp_zimg_state);
- MP_TARRAY_APPEND(ctx, ctx->states, ctx->num_states, st);
+ int slices = ctx->opts.threads;
+ if (slices < 1)
+ slices = av_cpu_count();
+ slices = MPCLAMP(slices, 1, 64);
+
+ struct mp_imgfmt_desc dstfmt = mp_imgfmt_get_desc(ctx->dst.imgfmt);
+ if (!dstfmt.align_y)
+ goto fail;
+ int full_h = MP_ALIGN_UP(ctx->dst.h, dstfmt.align_y);
+ int slice_h = (full_h + slices - 1) / slices;
+ slice_h = MP_ALIGN_UP(slice_h, dstfmt.align_y);
+ slice_h = MP_ALIGN_UP(slice_h, 64); // for dithering and minimum slice size
+ slices = (full_h + slice_h - 1) / slice_h;
+
+ int threads = slices - 1;
+ if (threads != ctx->current_thread_count) {
+ // Just destroy and recreate all - dumb and costly, but rarely happens.
+ TA_FREEP(&ctx->tp);
+ ctx->current_thread_count = 0;
+ if (threads) {
+ MP_VERBOSE(ctx, "using %d threads for scaling\n", threads);
+ ctx->tp = mp_thread_pool_create(NULL, threads, threads, threads);
+ if (!ctx->tp)
+ goto fail;
+ ctx->current_thread_count = threads;
+ }
+ }
- if (!mp_zimg_state_init(ctx, st)) {
- destroy_zimg(ctx);
- return false;
+ for (int n = 0; n < slices; n++) {
+ struct mp_zimg_state *st = talloc_zero(NULL, struct mp_zimg_state);
+ MP_TARRAY_APPEND(ctx, ctx->states, ctx->num_states, st);
+
+ if (!mp_zimg_state_init(ctx, st, n * slice_h, slice_h))
+ goto fail;
}
+ assert(ctx->num_states == slices);
+
return true;
+
+fail:
+ destroy_zimg(ctx);
+ return false;
}
bool mp_zimg_config_image_params(struct mp_zimg_context *ctx)
@@ -577,6 +642,14 @@ static void do_convert(struct mp_zimg_state *st)
repack_entrypoint, st->dst);
}
+static void do_convert_thread(void *ptr)
+{
+ struct mp_zimg_state *st = ptr;
+
+ do_convert(st);
+ mp_waiter_wakeup(&st->thread_waiter, 0);
+}
+
bool mp_zimg_convert(struct mp_zimg_context *ctx, struct mp_image *dst,
struct mp_image *src)
{
@@ -591,15 +664,30 @@ bool mp_zimg_convert(struct mp_zimg_context *ctx, struct mp_image *dst,
for (int n = 0; n < ctx->num_states; n++) {
struct mp_zimg_state *st = ctx->states[n];
- if (!wrap_buffer(st->src, src) || !wrap_buffer(st->dst, dst)) {
+ if (!wrap_buffer(st, st->src, src) || !wrap_buffer(st, st->dst, dst)) {
MP_ERR(ctx, "zimg repacker initialization failed.\n");
return false;
}
}
- assert(ctx->num_states == 1);
+ for (int n = 1; n < ctx->num_states; n++) {
+ struct mp_zimg_state *st = ctx->states[n];
+
+ st->thread_waiter = (struct mp_waiter)MP_WAITER_INITIALIZER;
+
+ bool r = mp_thread_pool_run(ctx->tp, do_convert_thread, st);
+ // This is guaranteed by the API; and unrolling would be inconvenient.
+ assert(r);
+ }
+
do_convert(ctx->states[0]);
+ for (int n = 1; n < ctx->num_states; n++) {
+ struct mp_zimg_state *st = ctx->states[n];
+
+ mp_waiter_wait(&st->thread_waiter);
+ }
+
return true;
}
diff --git a/video/zimg.h b/video/zimg.h
index 01e731cc36..a4f1954402 100644
--- a/video/zimg.h
+++ b/video/zimg.h
@@ -20,6 +20,7 @@ struct zimg_opts {
double scaler_chroma_params[2];
int dither;
int fast;
+ int threads;
};
extern const struct zimg_opts zimg_opts_defaults;
@@ -42,6 +43,8 @@ struct mp_zimg_context {
struct m_config_cache *opts_cache;
struct mp_zimg_state **states;
int num_states;
+ struct mp_thread_pool *tp;
+ int current_thread_count;
};
// Allocate a zimg context. Always succeeds. Returns a talloc pointer (use