summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNiklas Haas <git@haasn.xyz>2018-08-26 09:55:12 +0200
committerJan Ekström <jeebjp@gmail.com>2018-09-29 23:06:49 +0300
commit312b88d360a4a6956a27cdd5f71a335ceed38b00 (patch)
tree565b5af6572a2e3bed7106c12e1e271c3c5d195e
parentb97f172faad933abdc6fef04c802ba59032e4550 (diff)
downloadmpv-312b88d360a4a6956a27cdd5f71a335ceed38b00.tar.bz2
mpv-312b88d360a4a6956a27cdd5f71a335ceed38b00.tar.xz
vo_gpu: avoid overwriting compute shader block sizes
When using multiple compute shaders as part of the same pass, there can be a conflict in the block sizes. In the problematic case, the HDR detection shader can collide with the polar sampling shader. In this case, the solution is clear - the passes that can handle any size should "give in" and not overwrite the block sizes. Fixes #6083. (cherry picked from commit 1890ca024e541e1d07bdee242b75b24391b4434a)
-rw-r--r--video/out/gpu/video.c14
1 files changed, 10 insertions, 4 deletions
diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c
index 7594c2b04a..ebb63cc02d 100644
--- a/video/out/gpu/video.c
+++ b/video/out/gpu/video.c
@@ -1103,8 +1103,14 @@ static void cleanup_binds(struct gl_video *p)
// Sets the appropriate compute shader metadata for an implicit compute pass
// bw/bh: block size
-static void pass_is_compute(struct gl_video *p, int bw, int bh)
+static void pass_is_compute(struct gl_video *p, int bw, int bh, bool flexible)
{
+ if (p->pass_compute.active && flexible) {
+ // Avoid overwriting existing block sizes when using a flexible pass
+ bw = p->pass_compute.block_w;
+ bh = p->pass_compute.block_h;
+ }
+
p->pass_compute = (struct compute_info){
.active = true,
.block_w = bw,
@@ -1248,7 +1254,7 @@ static void finish_pass_tex(struct gl_video *p, struct ra_tex **dst_tex,
// If RA_CAP_PARALLEL_COMPUTE is set, try to prefer compute shaders
// over fragment shaders wherever possible.
if (!p->pass_compute.active && (p->ra->caps & RA_CAP_PARALLEL_COMPUTE))
- pass_is_compute(p, 16, 16);
+ pass_is_compute(p, 16, 16, true);
if (p->pass_compute.active) {
gl_sc_uniform_image2D_wo(p->sc, "out_image", *dst_tex);
@@ -1744,7 +1750,7 @@ static void pass_dispatch_sample_polar(struct gl_video *p, struct scaler *scaler
if (shmem_req > p->ra->max_shmem)
goto fallback;
- pass_is_compute(p, bw, bh);
+ pass_is_compute(p, bw, bh, false);
pass_compute_polar(p->sc, scaler, img.components, bw, bh, iw, ih);
return;
@@ -2485,7 +2491,7 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
if (detect_peak) {
pass_describe(p, "detect HDR peak");
- pass_is_compute(p, 8, 8); // 8x8 is good for performance
+ pass_is_compute(p, 8, 8, true); // 8x8 is good for performance
gl_sc_ssbo(p->sc, "PeakDetect", p->hdr_peak_ssbo,
"uint counter;"
"uint frame_idx;"