vo_gpu: aggressively prefer async compute

On AMD devices, we only get one graphics pipe but several compute pipes which can (in theory) run independently. As such, we should prefer compute shaders over fragment shaders in scenarios where we expect them to be better for parallelism. This is amusingly trivial to do, and actually improves performance even in a single-queue scenario.
author: Niklas Haas <git@haasn.xyz> 2017-09-24 15:21:37 +0200
committer: Martin Herkt <652892+lachs0r@users.noreply.github.com> 2017-12-25 00:47:53 +0100
commit: dcda8bd36aa8eb0003c301b4564cd01f7870fe34 (patch)
tree: e24046b6e4c85c7dcabf32f6d69159f50f2d2dbf /video/out/gpu/video.c
parent: bded247fb53558dd5cba26560d1f24e9234ae24e (diff)
download: mpv-dcda8bd36aa8eb0003c301b4564cd01f7870fe34.tar.bz2
mpv-dcda8bd36aa8eb0003c301b4564cd01f7870fe34.tar.xz
1 files changed, 5 insertions, 0 deletions
diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c
index 3f0959931d..1b50166dc4 100644
--- a/video/out/gpu/video.c
+++ b/video/out/gpu/video.c
@@ -1237,6 +1237,11 @@ static void finish_pass_tex(struct gl_video *p, struct ra_tex **dst_tex,
         return;
     }
 
+    // If RA_CAP_PARALLEL_COMPUTE is set, try to prefer compute shaders
+    // over fragment shaders wherever possible.
+    if (!p->pass_compute.active && (p->ra->caps & RA_CAP_PARALLEL_COMPUTE))
+        pass_is_compute(p, 16, 16);
+
     if (p->pass_compute.active) {
         gl_sc_uniform_image2D_wo(p->sc, "out_image", *dst_tex);
         if (!p->pass_compute.directly_writes)
author	Niklas Haas <git@haasn.xyz>	2017-09-24 15:21:37 +0200
committer	Martin Herkt <652892+lachs0r@users.noreply.github.com>	2017-12-25 00:47:53 +0100
commit	dcda8bd36aa8eb0003c301b4564cd01f7870fe34 (patch)
tree	e24046b6e4c85c7dcabf32f6d69159f50f2d2dbf /video/out/gpu/video.c
parent	bded247fb53558dd5cba26560d1f24e9234ae24e (diff)
download	mpv-dcda8bd36aa8eb0003c301b4564cd01f7870fe34.tar.bz2 mpv-dcda8bd36aa8eb0003c301b4564cd01f7870fe34.tar.xz