summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--video/out/d3d11/ra_d3d11.c2
-rw-r--r--video/out/gpu/ra.h4
-rw-r--r--video/out/gpu/video.c9
-rw-r--r--video/out/opengl/gl_headers.h1
-rw-r--r--video/out/opengl/ra_gl.c2
-rw-r--r--video/out/placebo/ra_pl.c1
6 files changed, 13 insertions, 6 deletions
diff --git a/video/out/d3d11/ra_d3d11.c b/video/out/d3d11/ra_d3d11.c
index 13a48a114c..b4d4c7fdb8 100644
--- a/video/out/d3d11/ra_d3d11.c
+++ b/video/out/d3d11/ra_d3d11.c
@@ -2386,6 +2386,8 @@ struct ra *ra_d3d11_create(ID3D11Device *dev, struct mp_log *log,
if (p->fl >= D3D_FEATURE_LEVEL_11_0) {
ra->caps |= RA_CAP_COMPUTE | RA_CAP_BUF_RW;
ra->max_shmem = 32 * 1024;
+ ra->max_compute_group_threads =
+ D3D11_CS_THREAD_GROUP_MAX_THREADS_PER_GROUP;
}
if (p->fl >= D3D_FEATURE_LEVEL_11_1) {
diff --git a/video/out/gpu/ra.h b/video/out/gpu/ra.h
index cc7fd570f1..85e293d84b 100644
--- a/video/out/gpu/ra.h
+++ b/video/out/gpu/ra.h
@@ -26,6 +26,10 @@ struct ra {
// time.
size_t max_shmem;
+ // Maximum number of threads in a compute work group. Set by the RA backend
+ // at init time.
+ size_t max_compute_group_threads;
+
// Maximum push constant size. Set by the RA backend at init time.
size_t max_pushc_size;
diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c
index 035619891b..841cf36db4 100644
--- a/video/out/gpu/video.c
+++ b/video/out/gpu/video.c
@@ -2698,12 +2698,9 @@ static void pass_dither(struct gl_video *p)
struct image img = image_wrap(p->error_diffusion_tex[0], PLANE_RGB, p->components);
- // Ensure the block size doesn't exceed the minimum defined by the
- // specification (1024 in desktop GL, 128 in GLES).
- // TODO: Look up the actual maximum block size for the
- // implementation using:
- // glGetIntegerv(MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &value);
- int block_size = MPMIN(p->ra->glsl_es ? 128 : 1024, o_h);
+ // Ensure the block size doesn't exceed the maximum of the
+ // implementation.
+ int block_size = MPMIN(p->ra->max_compute_group_threads, o_h);
pass_describe(p, "dither=error-diffusion (kernel=%s, depth=%d)",
kernel->name, dst_depth);
diff --git a/video/out/opengl/gl_headers.h b/video/out/opengl/gl_headers.h
index ad276b3c7b..5c367185bd 100644
--- a/video/out/opengl/gl_headers.h
+++ b/video/out/opengl/gl_headers.h
@@ -138,6 +138,7 @@ typedef uint64_t GLuint64;
#define GL_COMPUTE_SHADER 0x91B9
#define GL_MAX_COMPUTE_SHARED_MEMORY_SIZE 0x8262
+#define GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS 0x90EB
// --- GL 4.3 or GL_ARB_shader_storage_buffer_object
diff --git a/video/out/opengl/ra_gl.c b/video/out/opengl/ra_gl.c
index e08f5ed9bf..8eddb5fabc 100644
--- a/video/out/opengl/ra_gl.c
+++ b/video/out/opengl/ra_gl.c
@@ -224,6 +224,8 @@ static int ra_init_gl(struct ra *ra, GL *gl)
if (ra->caps & RA_CAP_COMPUTE) {
gl->GetIntegerv(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE, &ival);
ra->max_shmem = ival;
+ gl->GetIntegerv(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &ival);
+ ra->max_compute_group_threads = ival;
}
gl->Disable(GL_DITHER);
diff --git a/video/out/placebo/ra_pl.c b/video/out/placebo/ra_pl.c
index ccff9df499..e1baf09a2f 100644
--- a/video/out/placebo/ra_pl.c
+++ b/video/out/placebo/ra_pl.c
@@ -67,6 +67,7 @@ struct ra *ra_create_pl(const struct pl_gpu *gpu, struct mp_log *log)
ra->max_texture_wh = gpu->limits.max_tex_2d_dim;
ra->max_shmem = gpu->limits.max_shmem_size;
ra->max_pushc_size = gpu->limits.max_pushc_size;
+ ra->max_compute_group_threads = gpu->limits.max_group_threads;
// Set up format wrappers
for (int i = 0; i < gpu->num_formats; i++) {