summaryrefslogtreecommitdiffstats
path: root/video/out/gpu
diff options
context:
space:
mode:
authorNiklas Haas <git@haasn.xyz>2018-08-26 09:55:12 +0200
committersfan5 <sfan5@live.de>2018-08-26 12:32:20 +0200
commit1890ca024e541e1d07bdee242b75b24391b4434a (patch)
tree5ab58a54c5e5fe99328a17fe7439890d3801875e /video/out/gpu
parenta10754f038b7a69436bef536d9ed7cc4755ba0d1 (diff)
downloadmpv-1890ca024e541e1d07bdee242b75b24391b4434a.tar.bz2
mpv-1890ca024e541e1d07bdee242b75b24391b4434a.tar.xz
vo_gpu: avoid overwriting compute shader block sizes
When using multiple compute shaders as part of the same pass, there can be a conflict in the block sizes. In the problematic case, the HDR detection shader can collide with the polar sampling shader. In this case, the solution is clear - the passes that can handle any size should "give in" and not overwrite the block sizes. Fixes #6083.
Diffstat (limited to 'video/out/gpu')
-rw-r--r--video/out/gpu/video.c14
1 files changed, 10 insertions, 4 deletions
diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c
index 7594c2b04a..ebb63cc02d 100644
--- a/video/out/gpu/video.c
+++ b/video/out/gpu/video.c
@@ -1103,8 +1103,14 @@ static void cleanup_binds(struct gl_video *p)
// Sets the appropriate compute shader metadata for an implicit compute pass
// bw/bh: block size
-static void pass_is_compute(struct gl_video *p, int bw, int bh)
+static void pass_is_compute(struct gl_video *p, int bw, int bh, bool flexible)
{
+ if (p->pass_compute.active && flexible) {
+ // Avoid overwriting existing block sizes when using a flexible pass
+ bw = p->pass_compute.block_w;
+ bh = p->pass_compute.block_h;
+ }
+
p->pass_compute = (struct compute_info){
.active = true,
.block_w = bw,
@@ -1248,7 +1254,7 @@ static void finish_pass_tex(struct gl_video *p, struct ra_tex **dst_tex,
// If RA_CAP_PARALLEL_COMPUTE is set, try to prefer compute shaders
// over fragment shaders wherever possible.
if (!p->pass_compute.active && (p->ra->caps & RA_CAP_PARALLEL_COMPUTE))
- pass_is_compute(p, 16, 16);
+ pass_is_compute(p, 16, 16, true);
if (p->pass_compute.active) {
gl_sc_uniform_image2D_wo(p->sc, "out_image", *dst_tex);
@@ -1744,7 +1750,7 @@ static void pass_dispatch_sample_polar(struct gl_video *p, struct scaler *scaler
if (shmem_req > p->ra->max_shmem)
goto fallback;
- pass_is_compute(p, bw, bh);
+ pass_is_compute(p, bw, bh, false);
pass_compute_polar(p->sc, scaler, img.components, bw, bh, iw, ih);
return;
@@ -2485,7 +2491,7 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
if (detect_peak) {
pass_describe(p, "detect HDR peak");
- pass_is_compute(p, 8, 8); // 8x8 is good for performance
+ pass_is_compute(p, 8, 8, true); // 8x8 is good for performance
gl_sc_ssbo(p->sc, "PeakDetect", p->hdr_peak_ssbo,
"uint counter;"
"uint frame_idx;"