vo_gpu: stop hard-coding max compute group threads

We've been assuming that maximum number of compute group threads is never less than the 1024 defined by the desktop GL spec. Given that we haven't had working compute shaders for GLES and I guess the Vulkan spec defines at least as high a value, we've gotten away with it so far. But we should really look the value up and respect it.
author: Philip Langdale <philipl@overt.org> 2021-12-17 15:55:55 -0800
committer: sfan5 <sfan5@live.de> 2021-12-19 01:51:54 +0100
commit: fd63bf398af14c95ee7df18815dc3c9d1b4d9b49 (patch)
tree: e3da78ba264bdb4a2bca721b9d27da6aab10a423 /video/out
parent: c8b6dc489baf21b3945fabfb069be884c53e138a (diff)
download: mpv-fd63bf398af14c95ee7df18815dc3c9d1b4d9b49.tar.bz2
mpv-fd63bf398af14c95ee7df18815dc3c9d1b4d9b49.tar.xz
6 files changed, 13 insertions, 6 deletions
diff --git a/video/out/d3d11/ra_d3d11.c b/video/out/d3d11/ra_d3d11.c
index 13a48a114c..b4d4c7fdb8 100644
--- a/video/out/d3d11/ra_d3d11.c
+++ b/video/out/d3d11/ra_d3d11.c
@@ -2386,6 +2386,8 @@ struct ra *ra_d3d11_create(ID3D11Device *dev, struct mp_log *log,
     if (p->fl >= D3D_FEATURE_LEVEL_11_0) {
         ra->caps |= RA_CAP_COMPUTE | RA_CAP_BUF_RW;
         ra->max_shmem = 32 * 1024;
+        ra->max_compute_group_threads =
+            D3D11_CS_THREAD_GROUP_MAX_THREADS_PER_GROUP;
     }
 
     if (p->fl >= D3D_FEATURE_LEVEL_11_1) {
diff --git a/video/out/gpu/ra.h b/video/out/gpu/ra.h
index cc7fd570f1..85e293d84b 100644
--- a/video/out/gpu/ra.h
+++ b/video/out/gpu/ra.h
@@ -26,6 +26,10 @@ struct ra {
     // time.
     size_t max_shmem;
 
+    // Maximum number of threads in a compute work group. Set by the RA backend
+    // at init time.
+    size_t max_compute_group_threads;
+
     // Maximum push constant size. Set by the RA backend at init time.
     size_t max_pushc_size;
 
diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c
index 035619891b..841cf36db4 100644
--- a/video/out/gpu/video.c
+++ b/video/out/gpu/video.c
@@ -2698,12 +2698,9 @@ static void pass_dither(struct gl_video *p)
 
             struct image img = image_wrap(p->error_diffusion_tex[0], PLANE_RGB, p->components);
 
-            // Ensure the block size doesn't exceed the minimum defined by the
-            // specification (1024 in desktop GL, 128 in GLES).
-            // TODO: Look up the actual maximum block size for the
-            // implementation using:
-            //     glGetIntegerv(MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &value);
-            int block_size = MPMIN(p->ra->glsl_es ? 128 : 1024, o_h);
+            // Ensure the block size doesn't exceed the maximum of the
+            // implementation.
+            int block_size = MPMIN(p->ra->max_compute_group_threads, o_h);
 
             pass_describe(p, "dither=error-diffusion (kernel=%s, depth=%d)",
                              kernel->name, dst_depth);
diff --git a/video/out/opengl/gl_headers.h b/video/out/opengl/gl_headers.h
index ad276b3c7b..5c367185bd 100644
--- a/video/out/opengl/gl_headers.h
+++ b/video/out/opengl/gl_headers.h
@@ -138,6 +138,7 @@ typedef uint64_t GLuint64;
 
 #define GL_COMPUTE_SHADER                 0x91B9
 #define GL_MAX_COMPUTE_SHARED_MEMORY_SIZE 0x8262
+#define GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS 0x90EB
 
 // --- GL 4.3 or GL_ARB_shader_storage_buffer_object
 
diff --git a/video/out/opengl/ra_gl.c b/video/out/opengl/ra_gl.c
index e08f5ed9bf..8eddb5fabc 100644
--- a/video/out/opengl/ra_gl.c
+++ b/video/out/opengl/ra_gl.c
@@ -224,6 +224,8 @@ static int ra_init_gl(struct ra *ra, GL *gl)
     if (ra->caps & RA_CAP_COMPUTE) {
         gl->GetIntegerv(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE, &ival);
         ra->max_shmem = ival;
+        gl->GetIntegerv(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &ival);
+        ra->max_compute_group_threads = ival;
     }
 
     gl->Disable(GL_DITHER);
diff --git a/video/out/placebo/ra_pl.c b/video/out/placebo/ra_pl.c
index ccff9df499..e1baf09a2f 100644
--- a/video/out/placebo/ra_pl.c
+++ b/video/out/placebo/ra_pl.c
@@ -67,6 +67,7 @@ struct ra *ra_create_pl(const struct pl_gpu *gpu, struct mp_log *log)
     ra->max_texture_wh = gpu->limits.max_tex_2d_dim;
     ra->max_shmem = gpu->limits.max_shmem_size;
     ra->max_pushc_size = gpu->limits.max_pushc_size;
+    ra->max_compute_group_threads = gpu->limits.max_group_threads;
 
     // Set up format wrappers
     for (int i = 0; i < gpu->num_formats; i++) {
author	Philip Langdale <philipl@overt.org>	2021-12-17 15:55:55 -0800
committer	sfan5 <sfan5@live.de>	2021-12-19 01:51:54 +0100
commit	fd63bf398af14c95ee7df18815dc3c9d1b4d9b49 (patch)
tree	e3da78ba264bdb4a2bca721b9d27da6aab10a423 /video/out
parent	c8b6dc489baf21b3945fabfb069be884c53e138a (diff)
download	mpv-fd63bf398af14c95ee7df18815dc3c9d1b4d9b49.tar.bz2 mpv-fd63bf398af14c95ee7df18815dc3c9d1b4d9b49.tar.xz