summaryrefslogtreecommitdiffstats
path: root/video/out
diff options
context:
space:
mode:
authorPhilip Langdale <philipl@overt.org>2021-12-17 15:55:55 -0800
committersfan5 <sfan5@live.de>2021-12-19 01:51:54 +0100
commitfd63bf398af14c95ee7df18815dc3c9d1b4d9b49 (patch)
treee3da78ba264bdb4a2bca721b9d27da6aab10a423 /video/out
parentc8b6dc489baf21b3945fabfb069be884c53e138a (diff)
downloadmpv-fd63bf398af14c95ee7df18815dc3c9d1b4d9b49.tar.bz2
mpv-fd63bf398af14c95ee7df18815dc3c9d1b4d9b49.tar.xz
vo_gpu: stop hard-coding max compute group threads
We've been assuming that maximum number of compute group threads is never less than the 1024 defined by the desktop GL spec. Given that we haven't had working compute shaders for GLES and I guess the Vulkan spec defines at least as high a value, we've gotten away with it so far. But we should really look the value up and respect it.
Diffstat (limited to 'video/out')
-rw-r--r--video/out/d3d11/ra_d3d11.c2
-rw-r--r--video/out/gpu/ra.h4
-rw-r--r--video/out/gpu/video.c9
-rw-r--r--video/out/opengl/gl_headers.h1
-rw-r--r--video/out/opengl/ra_gl.c2
-rw-r--r--video/out/placebo/ra_pl.c1
6 files changed, 13 insertions, 6 deletions
diff --git a/video/out/d3d11/ra_d3d11.c b/video/out/d3d11/ra_d3d11.c
index 13a48a114c..b4d4c7fdb8 100644
--- a/video/out/d3d11/ra_d3d11.c
+++ b/video/out/d3d11/ra_d3d11.c
@@ -2386,6 +2386,8 @@ struct ra *ra_d3d11_create(ID3D11Device *dev, struct mp_log *log,
if (p->fl >= D3D_FEATURE_LEVEL_11_0) {
ra->caps |= RA_CAP_COMPUTE | RA_CAP_BUF_RW;
ra->max_shmem = 32 * 1024;
+ ra->max_compute_group_threads =
+ D3D11_CS_THREAD_GROUP_MAX_THREADS_PER_GROUP;
}
if (p->fl >= D3D_FEATURE_LEVEL_11_1) {
diff --git a/video/out/gpu/ra.h b/video/out/gpu/ra.h
index cc7fd570f1..85e293d84b 100644
--- a/video/out/gpu/ra.h
+++ b/video/out/gpu/ra.h
@@ -26,6 +26,10 @@ struct ra {
// time.
size_t max_shmem;
+ // Maximum number of threads in a compute work group. Set by the RA backend
+ // at init time.
+ size_t max_compute_group_threads;
+
// Maximum push constant size. Set by the RA backend at init time.
size_t max_pushc_size;
diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c
index 035619891b..841cf36db4 100644
--- a/video/out/gpu/video.c
+++ b/video/out/gpu/video.c
@@ -2698,12 +2698,9 @@ static void pass_dither(struct gl_video *p)
struct image img = image_wrap(p->error_diffusion_tex[0], PLANE_RGB, p->components);
- // Ensure the block size doesn't exceed the minimum defined by the
- // specification (1024 in desktop GL, 128 in GLES).
- // TODO: Look up the actual maximum block size for the
- // implementation using:
- // glGetIntegerv(MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &value);
- int block_size = MPMIN(p->ra->glsl_es ? 128 : 1024, o_h);
+ // Ensure the block size doesn't exceed the maximum of the
+ // implementation.
+ int block_size = MPMIN(p->ra->max_compute_group_threads, o_h);
pass_describe(p, "dither=error-diffusion (kernel=%s, depth=%d)",
kernel->name, dst_depth);
diff --git a/video/out/opengl/gl_headers.h b/video/out/opengl/gl_headers.h
index ad276b3c7b..5c367185bd 100644
--- a/video/out/opengl/gl_headers.h
+++ b/video/out/opengl/gl_headers.h
@@ -138,6 +138,7 @@ typedef uint64_t GLuint64;
#define GL_COMPUTE_SHADER 0x91B9
#define GL_MAX_COMPUTE_SHARED_MEMORY_SIZE 0x8262
+#define GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS 0x90EB
// --- GL 4.3 or GL_ARB_shader_storage_buffer_object
diff --git a/video/out/opengl/ra_gl.c b/video/out/opengl/ra_gl.c
index e08f5ed9bf..8eddb5fabc 100644
--- a/video/out/opengl/ra_gl.c
+++ b/video/out/opengl/ra_gl.c
@@ -224,6 +224,8 @@ static int ra_init_gl(struct ra *ra, GL *gl)
if (ra->caps & RA_CAP_COMPUTE) {
gl->GetIntegerv(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE, &ival);
ra->max_shmem = ival;
+ gl->GetIntegerv(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &ival);
+ ra->max_compute_group_threads = ival;
}
gl->Disable(GL_DITHER);
diff --git a/video/out/placebo/ra_pl.c b/video/out/placebo/ra_pl.c
index ccff9df499..e1baf09a2f 100644
--- a/video/out/placebo/ra_pl.c
+++ b/video/out/placebo/ra_pl.c
@@ -67,6 +67,7 @@ struct ra *ra_create_pl(const struct pl_gpu *gpu, struct mp_log *log)
ra->max_texture_wh = gpu->limits.max_tex_2d_dim;
ra->max_shmem = gpu->limits.max_shmem_size;
ra->max_pushc_size = gpu->limits.max_pushc_size;
+ ra->max_compute_group_threads = gpu->limits.max_group_threads;
// Set up format wrappers
for (int i = 0; i < gpu->num_formats; i++) {