summaryrefslogtreecommitdiffstats
path: root/video
diff options
context:
space:
mode:
Diffstat (limited to 'video')
-rw-r--r--video/out/gpu/ra.h1
-rw-r--r--video/out/gpu/video.c5
-rw-r--r--video/out/vulkan/ra_vk.c7
3 files changed, 12 insertions, 1 deletions
diff --git a/video/out/gpu/ra.h b/video/out/gpu/ra.h
index ffb010960a..08ccdaee70 100644
--- a/video/out/gpu/ra.h
+++ b/video/out/gpu/ra.h
@@ -53,6 +53,7 @@ enum {
RA_CAP_GLOBAL_UNIFORM = 1 << 8, // supports using "naked" uniforms (not UBO)
RA_CAP_GATHER = 1 << 9, // supports textureGather in GLSL
RA_CAP_FRAGCOORD = 1 << 10, // supports reading from gl_FragCoord
+ RA_CAP_PARALLEL_COMPUTE = 1 << 11, // supports parallel compute shaders
};
enum ra_ctype {
diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c
index 3f0959931d..1b50166dc4 100644
--- a/video/out/gpu/video.c
+++ b/video/out/gpu/video.c
@@ -1237,6 +1237,11 @@ static void finish_pass_tex(struct gl_video *p, struct ra_tex **dst_tex,
return;
}
+ // If RA_CAP_PARALLEL_COMPUTE is set, try to prefer compute shaders
+ // over fragment shaders wherever possible.
+ if (!p->pass_compute.active && (p->ra->caps & RA_CAP_PARALLEL_COMPUTE))
+ pass_is_compute(p, 16, 16);
+
if (p->pass_compute.active) {
gl_sc_uniform_image2D_wo(p->sc, "out_image", *dst_tex);
if (!p->pass_compute.directly_writes)
diff --git a/video/out/vulkan/ra_vk.c b/video/out/vulkan/ra_vk.c
index 905fc89596..f0353629e6 100644
--- a/video/out/vulkan/ra_vk.c
+++ b/video/out/vulkan/ra_vk.c
@@ -208,8 +208,13 @@ struct ra *ra_create_vk(struct mpvk_ctx *vk, struct mp_log *log)
ra->max_shmem = vk->limits.maxComputeSharedMemorySize;
ra->max_pushc_size = vk->limits.maxPushConstantsSize;
- if (vk->pool_compute)
+ if (vk->pool_compute) {
ra->caps |= RA_CAP_COMPUTE;
+ // If we have more compute queues than graphics queues, we probably
+ // want to be using them. (This seems mostly relevant for AMD)
+ if (vk->pool_compute->num_queues > vk->pool_graphics->num_queues)
+ ra->caps |= RA_CAP_PARALLEL_COMPUTE;
+ }
if (!vk_setup_formats(ra))
goto error;