summaryrefslogtreecommitdiffstats
path: root/video/out/opengl/video.c
diff options
context:
space:
mode:
Diffstat (limited to 'video/out/opengl/video.c')
-rw-r--r--video/out/opengl/video.c57
1 files changed, 32 insertions, 25 deletions
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index b6be230b53..811c7b717b 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -262,9 +262,9 @@ struct gl_video {
// temporary during rendering
struct img_tex pass_tex[TEXUNIT_VIDEO_NUM];
+ struct compute_info pass_compute; // compute shader metadata for this pass
int pass_tex_num;
int texture_w, texture_h;
- int compute_w, compute_h; // presence indicates the use of a compute shader
struct gl_transform texture_offset; // texture transform without rotation
int components;
bool use_linear;
@@ -1132,26 +1132,28 @@ static void pass_prepare_src_tex(struct gl_video *p)
}
}
-// Update the compute work group size requirements for the current shader.
-// Since we assume that all shaders can work with bigger working groups, just
-// never smaller ones, this effectively becomes the maximum of all size
-// requirements
-static void compute_size_minimum(struct gl_video *p, int bw, int bh)
+// Sets the appropriate compute shader metadata for an implicit compute pass
+// bw/bh: block size
+static void pass_is_compute(struct gl_video *p, int bw, int bh)
{
- p->compute_w = MPMAX(p->compute_w, bw);
- p->compute_h = MPMAX(p->compute_h, bh);
+ p->pass_compute = (struct compute_info){
+ .active = true,
+ .block_w = bw,
+ .block_h = bh,
+ };
}
// w/h: the width/height of the compute shader's operating domain (e.g. the
// target target that needs to be written, or the source texture that needs to
// be reduced)
-// bw/bh: the width/height of the block (working group), which is tiled over
-// w/h as necessary
-static void dispatch_compute(struct gl_video *p, int w, int h, int bw, int bh)
+static void dispatch_compute(struct gl_video *p, int w, int h,
+ struct compute_info info)
{
GL *gl = p->gl;
- PRELUDE("layout (local_size_x = %d, local_size_y = %d) in;\n", bw, bh);
+ PRELUDE("layout (local_size_x = %d, local_size_y = %d) in;\n",
+ info.threads_w > 0 ? info.threads_w : info.block_w,
+ info.threads_h > 0 ? info.threads_h : info.block_h);
pass_prepare_src_tex(p);
gl_sc_set_vertex_format(p->sc, vertex_vao, sizeof(struct vertex));
@@ -1188,8 +1190,8 @@ static void dispatch_compute(struct gl_video *p, int w, int h, int bw, int bh)
// always round up when dividing to make sure we don't leave off a part of
// the image
- int num_x = (w + bw - 1) / bw,
- num_y = (h + bh - 1) / bh;
+ int num_x = info.block_w > 0 ? (w + info.block_w - 1) / info.block_w : 1,
+ num_y = info.block_h > 0 ? (h + info.block_h - 1) / info.block_h : 1;
gl->DispatchCompute(num_x, num_y, 1);
gl_sc_reset(p->sc);
@@ -1263,18 +1265,19 @@ static void finish_pass_fbo(struct gl_video *p, struct fbotex *dst_fbo,
{
fbotex_change(dst_fbo, p->gl, p->log, w, h, p->opts.fbo_format, flags);
- if (p->compute_w > 0 && p->compute_h > 0) {
+ if (p->pass_compute.active) {
gl_sc_uniform_image2D(p->sc, "out_image", dst_fbo->texture,
dst_fbo->iformat, GL_WRITE_ONLY);
- GLSL(imageStore(out_image, ivec2(gl_GlobalInvocationID), color);)
- dispatch_compute(p, w, h, p->compute_w, p->compute_h);
+ if (!p->pass_compute.directly_writes)
+ GLSL(imageStore(out_image, ivec2(gl_GlobalInvocationID), color);)
+
+ dispatch_compute(p, w, h, p->pass_compute);
p->gl->MemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT);
+ p->pass_compute = (struct compute_info){0};
} else {
finish_pass_direct(p, dst_fbo->fbo, dst_fbo->rw, dst_fbo->rh,
&(struct mp_rect){0, 0, w, h});
}
-
- p->compute_w = p->compute_h = 0;
}
static const char *get_tex_swizzle(struct img_tex *img)
@@ -1756,7 +1759,7 @@ static void pass_dispatch_sample_polar(struct gl_video *p, struct scaler *scaler
if (shmem_req > gl->max_shmem)
goto fallback;
- compute_size_minimum(p, bw, bh);
+ pass_is_compute(p, bw, bh);
pass_compute_polar(p->sc, scaler, tex.components, bw, bh, iw, ih);
return;
@@ -1923,13 +1926,17 @@ static void user_hook(struct gl_video *p, struct img_tex tex,
{
struct gl_user_shader_hook *shader = priv;
assert(shader);
+ load_shader(p, shader->pass_body);
pass_describe(p, "user shader: %.*s (%s)", BSTR_P(shader->pass_desc),
plane_names[tex.type]);
- compute_size_minimum(p, shader->compute_w, shader->compute_h);
- load_shader(p, shader->pass_body);
- GLSLF("color = hook();\n");
+ if (shader->compute.active) {
+ p->pass_compute = shader->compute;
+ GLSLF("hook();\n");
+ } else {
+ GLSLF("color = hook();\n");
+ }
// Make sure we at least create a legal FBO on failure, since it's better
// to do this and display an error message than just crash OpenGL
@@ -2487,7 +2494,7 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
bool detect_peak = p->opts.compute_hdr_peak && mp_trc_is_hdr(src.gamma);
if (detect_peak) {
pass_describe(p, "detect HDR peak");
- compute_size_minimum(p, 8, 8); // 8x8 is good for performance
+ pass_is_compute(p, 8, 8); // 8x8 is good for performance
if (!p->hdr_peak_ssbo) {
struct {
@@ -2808,7 +2815,7 @@ static void pass_draw_to_screen(struct gl_video *p, int fbo)
// Since finish_pass_direct doesn't work with compute shaders, and neither
// does the checkerboard/dither code, we may need an indirection via
// p->screen_fbo here.
- if (p->compute_w > 0 && p->compute_h > 0) {
+ if (p->pass_compute.active) {
int o_w = p->dst_rect.x1 - p->dst_rect.x0,
o_h = p->dst_rect.y1 - p->dst_rect.y0;
finish_pass_fbo(p, &p->screen_fbo, o_w, o_h, FBOTEX_FUZZY);