diff options
-rw-r--r-- | video/out/d3d11/ra_d3d11.c | 2 | ||||
-rw-r--r-- | video/out/gpu/ra.h | 4 | ||||
-rw-r--r-- | video/out/gpu/video.c | 9 | ||||
-rw-r--r-- | video/out/opengl/gl_headers.h | 1 | ||||
-rw-r--r-- | video/out/opengl/ra_gl.c | 2 | ||||
-rw-r--r-- | video/out/placebo/ra_pl.c | 1 |
6 files changed, 13 insertions, 6 deletions
diff --git a/video/out/d3d11/ra_d3d11.c b/video/out/d3d11/ra_d3d11.c index 13a48a114c..b4d4c7fdb8 100644 --- a/video/out/d3d11/ra_d3d11.c +++ b/video/out/d3d11/ra_d3d11.c @@ -2386,6 +2386,8 @@ struct ra *ra_d3d11_create(ID3D11Device *dev, struct mp_log *log, if (p->fl >= D3D_FEATURE_LEVEL_11_0) { ra->caps |= RA_CAP_COMPUTE | RA_CAP_BUF_RW; ra->max_shmem = 32 * 1024; + ra->max_compute_group_threads = + D3D11_CS_THREAD_GROUP_MAX_THREADS_PER_GROUP; } if (p->fl >= D3D_FEATURE_LEVEL_11_1) { diff --git a/video/out/gpu/ra.h b/video/out/gpu/ra.h index cc7fd570f1..85e293d84b 100644 --- a/video/out/gpu/ra.h +++ b/video/out/gpu/ra.h @@ -26,6 +26,10 @@ struct ra { // time. size_t max_shmem; + // Maximum number of threads in a compute work group. Set by the RA backend + // at init time. + size_t max_compute_group_threads; + // Maximum push constant size. Set by the RA backend at init time. size_t max_pushc_size; diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c index 035619891b..841cf36db4 100644 --- a/video/out/gpu/video.c +++ b/video/out/gpu/video.c @@ -2698,12 +2698,9 @@ static void pass_dither(struct gl_video *p) struct image img = image_wrap(p->error_diffusion_tex[0], PLANE_RGB, p->components); - // Ensure the block size doesn't exceed the minimum defined by the - // specification (1024 in desktop GL, 128 in GLES). - // TODO: Look up the actual maximum block size for the - // implementation using: - // glGetIntegerv(MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &value); - int block_size = MPMIN(p->ra->glsl_es ? 128 : 1024, o_h); + // Ensure the block size doesn't exceed the maximum of the + // implementation. + int block_size = MPMIN(p->ra->max_compute_group_threads, o_h); pass_describe(p, "dither=error-diffusion (kernel=%s, depth=%d)", kernel->name, dst_depth); diff --git a/video/out/opengl/gl_headers.h b/video/out/opengl/gl_headers.h index ad276b3c7b..5c367185bd 100644 --- a/video/out/opengl/gl_headers.h +++ b/video/out/opengl/gl_headers.h @@ -138,6 +138,7 @@ typedef uint64_t GLuint64; #define GL_COMPUTE_SHADER 0x91B9 #define GL_MAX_COMPUTE_SHARED_MEMORY_SIZE 0x8262 +#define GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS 0x90EB // --- GL 4.3 or GL_ARB_shader_storage_buffer_object diff --git a/video/out/opengl/ra_gl.c b/video/out/opengl/ra_gl.c index e08f5ed9bf..8eddb5fabc 100644 --- a/video/out/opengl/ra_gl.c +++ b/video/out/opengl/ra_gl.c @@ -224,6 +224,8 @@ static int ra_init_gl(struct ra *ra, GL *gl) if (ra->caps & RA_CAP_COMPUTE) { gl->GetIntegerv(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE, &ival); ra->max_shmem = ival; + gl->GetIntegerv(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &ival); + ra->max_compute_group_threads = ival; } gl->Disable(GL_DITHER); diff --git a/video/out/placebo/ra_pl.c b/video/out/placebo/ra_pl.c index ccff9df499..e1baf09a2f 100644 --- a/video/out/placebo/ra_pl.c +++ b/video/out/placebo/ra_pl.c @@ -67,6 +67,7 @@ struct ra *ra_create_pl(const struct pl_gpu *gpu, struct mp_log *log) ra->max_texture_wh = gpu->limits.max_tex_2d_dim; ra->max_shmem = gpu->limits.max_shmem_size; ra->max_pushc_size = gpu->limits.max_pushc_size; + ra->max_compute_group_threads = gpu->limits.max_group_threads; // Set up format wrappers for (int i = 0; i < gpu->num_formats; i++) { |