summaryrefslogtreecommitdiffstats
path: root/video/out/opengl/video.c
diff options
context:
space:
mode:
authorNiklas Haas <git@haasn.xyz>2017-07-29 20:41:50 +0200
committerNiklas Haas <git@haasn.xyz>2017-08-03 18:27:36 +0200
commit83f39103989ba6bd289a2ed83abe8177a3ba2f93 (patch)
tree1c516d66cad01a09d3837bb48916d4bb0a336975 /video/out/opengl/video.c
parente7d31d12bea0463ff2d8f43067d40634d1be6b40 (diff)
downloadmpv-83f39103989ba6bd289a2ed83abe8177a3ba2f93.tar.bz2
mpv-83f39103989ba6bd289a2ed83abe8177a3ba2f93.tar.xz
vo_opengl: make compute shaders more flexible
This allows users to do their own custom sample writing, mainly meant to address use cases such as RAVU. Also clean up the compute shader code a bit.
Diffstat (limited to 'video/out/opengl/video.c')
-rw-r--r--video/out/opengl/video.c57
1 files changed, 32 insertions, 25 deletions
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index b6be230b53..811c7b717b 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -262,9 +262,9 @@ struct gl_video {
// temporary during rendering
struct img_tex pass_tex[TEXUNIT_VIDEO_NUM];
+ struct compute_info pass_compute; // compute shader metadata for this pass
int pass_tex_num;
int texture_w, texture_h;
- int compute_w, compute_h; // presence indicates the use of a compute shader
struct gl_transform texture_offset; // texture transform without rotation
int components;
bool use_linear;
@@ -1132,26 +1132,28 @@ static void pass_prepare_src_tex(struct gl_video *p)
}
}
-// Update the compute work group size requirements for the current shader.
-// Since we assume that all shaders can work with bigger working groups, just
-// never smaller ones, this effectively becomes the maximum of all size
-// requirements
-static void compute_size_minimum(struct gl_video *p, int bw, int bh)
+// Sets the appropriate compute shader metadata for an implicit compute pass
+// bw/bh: block size
+static void pass_is_compute(struct gl_video *p, int bw, int bh)
{
- p->compute_w = MPMAX(p->compute_w, bw);
- p->compute_h = MPMAX(p->compute_h, bh);
+ p->pass_compute = (struct compute_info){
+ .active = true,
+ .block_w = bw,
+ .block_h = bh,
+ };
}
// w/h: the width/height of the compute shader's operating domain (e.g. the
// target target that needs to be written, or the source texture that needs to
// be reduced)
-// bw/bh: the width/height of the block (working group), which is tiled over
-// w/h as necessary
-static void dispatch_compute(struct gl_video *p, int w, int h, int bw, int bh)
+static void dispatch_compute(struct gl_video *p, int w, int h,
+ struct compute_info info)
{
GL *gl = p->gl;
- PRELUDE("layout (local_size_x = %d, local_size_y = %d) in;\n", bw, bh);
+ PRELUDE("layout (local_size_x = %d, local_size_y = %d) in;\n",
+ info.threads_w > 0 ? info.threads_w : info.block_w,
+ info.threads_h > 0 ? info.threads_h : info.block_h);
pass_prepare_src_tex(p);
gl_sc_set_vertex_format(p->sc, vertex_vao, sizeof(struct vertex));
@@ -1188,8 +1190,8 @@ static void dispatch_compute(struct gl_video *p, int w, int h, int bw, int bh)
// always round up when dividing to make sure we don't leave off a part of
// the image
- int num_x = (w + bw - 1) / bw,
- num_y = (h + bh - 1) / bh;
+ int num_x = info.block_w > 0 ? (w + info.block_w - 1) / info.block_w : 1,
+ num_y = info.block_h > 0 ? (h + info.block_h - 1) / info.block_h : 1;
gl->DispatchCompute(num_x, num_y, 1);
gl_sc_reset(p->sc);
@@ -1263,18 +1265,19 @@ static void finish_pass_fbo(struct gl_video *p, struct fbotex *dst_fbo,
{
fbotex_change(dst_fbo, p->gl, p->log, w, h, p->opts.fbo_format, flags);
- if (p->compute_w > 0 && p->compute_h > 0) {
+ if (p->pass_compute.active) {
gl_sc_uniform_image2D(p->sc, "out_image", dst_fbo->texture,
dst_fbo->iformat, GL_WRITE_ONLY);
- GLSL(imageStore(out_image, ivec2(gl_GlobalInvocationID), color);)
- dispatch_compute(p, w, h, p->compute_w, p->compute_h);
+ if (!p->pass_compute.directly_writes)
+ GLSL(imageStore(out_image, ivec2(gl_GlobalInvocationID), color);)
+
+ dispatch_compute(p, w, h, p->pass_compute);
p->gl->MemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT);
+ p->pass_compute = (struct compute_info){0};
} else {
finish_pass_direct(p, dst_fbo->fbo, dst_fbo->rw, dst_fbo->rh,
&(struct mp_rect){0, 0, w, h});
}
-
- p->compute_w = p->compute_h = 0;
}
static const char *get_tex_swizzle(struct img_tex *img)
@@ -1756,7 +1759,7 @@ static void pass_dispatch_sample_polar(struct gl_video *p, struct scaler *scaler
if (shmem_req > gl->max_shmem)
goto fallback;
- compute_size_minimum(p, bw, bh);
+ pass_is_compute(p, bw, bh);
pass_compute_polar(p->sc, scaler, tex.components, bw, bh, iw, ih);
return;
@@ -1923,13 +1926,17 @@ static void user_hook(struct gl_video *p, struct img_tex tex,
{
struct gl_user_shader_hook *shader = priv;
assert(shader);
+ load_shader(p, shader->pass_body);
pass_describe(p, "user shader: %.*s (%s)", BSTR_P(shader->pass_desc),
plane_names[tex.type]);
- compute_size_minimum(p, shader->compute_w, shader->compute_h);
- load_shader(p, shader->pass_body);
- GLSLF("color = hook();\n");
+ if (shader->compute.active) {
+ p->pass_compute = shader->compute;
+ GLSLF("hook();\n");
+ } else {
+ GLSLF("color = hook();\n");
+ }
// Make sure we at least create a legal FBO on failure, since it's better
// to do this and display an error message than just crash OpenGL
@@ -2487,7 +2494,7 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool
bool detect_peak = p->opts.compute_hdr_peak && mp_trc_is_hdr(src.gamma);
if (detect_peak) {
pass_describe(p, "detect HDR peak");
- compute_size_minimum(p, 8, 8); // 8x8 is good for performance
+ pass_is_compute(p, 8, 8); // 8x8 is good for performance
if (!p->hdr_peak_ssbo) {
struct {
@@ -2808,7 +2815,7 @@ static void pass_draw_to_screen(struct gl_video *p, int fbo)
// Since finish_pass_direct doesn't work with compute shaders, and neither
// does the checkerboard/dither code, we may need an indirection via
// p->screen_fbo here.
- if (p->compute_w > 0 && p->compute_h > 0) {
+ if (p->pass_compute.active) {
int o_w = p->dst_rect.x1 - p->dst_rect.x0,
o_h = p->dst_rect.y1 - p->dst_rect.y0;
finish_pass_fbo(p, &p->screen_fbo, o_w, o_h, FBOTEX_FUZZY);