diff options
Diffstat (limited to 'video/out/opengl/video.c')
-rw-r--r-- | video/out/opengl/video.c | 57 |
1 files changed, 32 insertions, 25 deletions
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c index b6be230b53..811c7b717b 100644 --- a/video/out/opengl/video.c +++ b/video/out/opengl/video.c @@ -262,9 +262,9 @@ struct gl_video { // temporary during rendering struct img_tex pass_tex[TEXUNIT_VIDEO_NUM]; + struct compute_info pass_compute; // compute shader metadata for this pass int pass_tex_num; int texture_w, texture_h; - int compute_w, compute_h; // presence indicates the use of a compute shader struct gl_transform texture_offset; // texture transform without rotation int components; bool use_linear; @@ -1132,26 +1132,28 @@ static void pass_prepare_src_tex(struct gl_video *p) } } -// Update the compute work group size requirements for the current shader. -// Since we assume that all shaders can work with bigger working groups, just -// never smaller ones, this effectively becomes the maximum of all size -// requirements -static void compute_size_minimum(struct gl_video *p, int bw, int bh) +// Sets the appropriate compute shader metadata for an implicit compute pass +// bw/bh: block size +static void pass_is_compute(struct gl_video *p, int bw, int bh) { - p->compute_w = MPMAX(p->compute_w, bw); - p->compute_h = MPMAX(p->compute_h, bh); + p->pass_compute = (struct compute_info){ + .active = true, + .block_w = bw, + .block_h = bh, + }; } // w/h: the width/height of the compute shader's operating domain (e.g. the // target target that needs to be written, or the source texture that needs to // be reduced) -// bw/bh: the width/height of the block (working group), which is tiled over -// w/h as necessary -static void dispatch_compute(struct gl_video *p, int w, int h, int bw, int bh) +static void dispatch_compute(struct gl_video *p, int w, int h, + struct compute_info info) { GL *gl = p->gl; - PRELUDE("layout (local_size_x = %d, local_size_y = %d) in;\n", bw, bh); + PRELUDE("layout (local_size_x = %d, local_size_y = %d) in;\n", + info.threads_w > 0 ? info.threads_w : info.block_w, + info.threads_h > 0 ? info.threads_h : info.block_h); pass_prepare_src_tex(p); gl_sc_set_vertex_format(p->sc, vertex_vao, sizeof(struct vertex)); @@ -1188,8 +1190,8 @@ static void dispatch_compute(struct gl_video *p, int w, int h, int bw, int bh) // always round up when dividing to make sure we don't leave off a part of // the image - int num_x = (w + bw - 1) / bw, - num_y = (h + bh - 1) / bh; + int num_x = info.block_w > 0 ? (w + info.block_w - 1) / info.block_w : 1, + num_y = info.block_h > 0 ? (h + info.block_h - 1) / info.block_h : 1; gl->DispatchCompute(num_x, num_y, 1); gl_sc_reset(p->sc); @@ -1263,18 +1265,19 @@ static void finish_pass_fbo(struct gl_video *p, struct fbotex *dst_fbo, { fbotex_change(dst_fbo, p->gl, p->log, w, h, p->opts.fbo_format, flags); - if (p->compute_w > 0 && p->compute_h > 0) { + if (p->pass_compute.active) { gl_sc_uniform_image2D(p->sc, "out_image", dst_fbo->texture, dst_fbo->iformat, GL_WRITE_ONLY); - GLSL(imageStore(out_image, ivec2(gl_GlobalInvocationID), color);) - dispatch_compute(p, w, h, p->compute_w, p->compute_h); + if (!p->pass_compute.directly_writes) + GLSL(imageStore(out_image, ivec2(gl_GlobalInvocationID), color);) + + dispatch_compute(p, w, h, p->pass_compute); p->gl->MemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT); + p->pass_compute = (struct compute_info){0}; } else { finish_pass_direct(p, dst_fbo->fbo, dst_fbo->rw, dst_fbo->rh, &(struct mp_rect){0, 0, w, h}); } - - p->compute_w = p->compute_h = 0; } static const char *get_tex_swizzle(struct img_tex *img) @@ -1756,7 +1759,7 @@ static void pass_dispatch_sample_polar(struct gl_video *p, struct scaler *scaler if (shmem_req > gl->max_shmem) goto fallback; - compute_size_minimum(p, bw, bh); + pass_is_compute(p, bw, bh); pass_compute_polar(p->sc, scaler, tex.components, bw, bh, iw, ih); return; @@ -1923,13 +1926,17 @@ static void user_hook(struct gl_video *p, struct img_tex tex, { struct gl_user_shader_hook *shader = priv; assert(shader); + load_shader(p, shader->pass_body); pass_describe(p, "user shader: %.*s (%s)", BSTR_P(shader->pass_desc), plane_names[tex.type]); - compute_size_minimum(p, shader->compute_w, shader->compute_h); - load_shader(p, shader->pass_body); - GLSLF("color = hook();\n"); + if (shader->compute.active) { + p->pass_compute = shader->compute; + GLSLF("hook();\n"); + } else { + GLSLF("color = hook();\n"); + } // Make sure we at least create a legal FBO on failure, since it's better // to do this and display an error message than just crash OpenGL @@ -2487,7 +2494,7 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool bool detect_peak = p->opts.compute_hdr_peak && mp_trc_is_hdr(src.gamma); if (detect_peak) { pass_describe(p, "detect HDR peak"); - compute_size_minimum(p, 8, 8); // 8x8 is good for performance + pass_is_compute(p, 8, 8); // 8x8 is good for performance if (!p->hdr_peak_ssbo) { struct { @@ -2808,7 +2815,7 @@ static void pass_draw_to_screen(struct gl_video *p, int fbo) // Since finish_pass_direct doesn't work with compute shaders, and neither // does the checkerboard/dither code, we may need an indirection via // p->screen_fbo here. - if (p->compute_w > 0 && p->compute_h > 0) { + if (p->pass_compute.active) { int o_w = p->dst_rect.x1 - p->dst_rect.x0, o_h = p->dst_rect.y1 - p->dst_rect.y0; finish_pass_fbo(p, &p->screen_fbo, o_w, o_h, FBOTEX_FUZZY); |