summaryrefslogtreecommitdiffstats
path: root/video/out/opengl/video.c
diff options
context:
space:
mode:
Diffstat (limited to 'video/out/opengl/video.c')
-rw-r--r--video/out/opengl/video.c93
1 files changed, 89 insertions, 4 deletions
diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c
index 65b1d95849..ab8f311191 100644
--- a/video/out/opengl/video.c
+++ b/video/out/opengl/video.c
@@ -260,6 +260,7 @@ struct gl_video {
struct img_tex pass_tex[TEXUNIT_VIDEO_NUM];
int pass_tex_num;
int texture_w, texture_h;
+ int compute_w, compute_h; // presence indicates the use of a compute shader
struct gl_transform texture_offset; // texture transform without rotation
int components;
bool use_linear;
@@ -446,6 +447,7 @@ static void gl_video_setup_hooks(struct gl_video *p);
#define GLSL(x) gl_sc_add(p->sc, #x "\n");
#define GLSLF(...) gl_sc_addf(p->sc, __VA_ARGS__)
#define GLSLHF(...) gl_sc_haddf(p->sc, __VA_ARGS__)
+#define PRELUDE(...) gl_sc_paddf(p->sc, __VA_ARGS__)
static struct bstr load_cached_file(struct gl_video *p, const char *path)
{
@@ -1107,6 +1109,7 @@ static void pass_prepare_src_tex(struct gl_video *p)
char *texture_name = mp_tprintf(32, "texture%d", n);
char *texture_size = mp_tprintf(32, "texture_size%d", n);
char *texture_rot = mp_tprintf(32, "texture_rot%d", n);
+ char *texture_off = mp_tprintf(32, "texture_off%d", n);
char *pixel_size = mp_tprintf(32, "pixel_size%d", n);
if (gl_is_integer_format(s->gl_format)) {
@@ -1121,11 +1124,80 @@ static void pass_prepare_src_tex(struct gl_video *p)
}
gl_sc_uniform_vec2(sc, texture_size, f);
gl_sc_uniform_mat2(sc, texture_rot, true, (float *)s->transform.m);
+ gl_sc_uniform_vec2(sc, texture_off, (float *)s->transform.t);
gl_sc_uniform_vec2(sc, pixel_size, (GLfloat[]){1.0f / f[0],
1.0f / f[1]});
}
}
+// Update the compute work group size requirements for the current shader.
+// Since we assume that all shaders can work with bigger working groups, just
+// never smaller ones, this effectively becomes the maximum of all size
+// requirements
+static void compute_size_minimum(struct gl_video *p, int bw, int bh)
+{
+ p->compute_w = MPMAX(p->compute_w, bw);
+ p->compute_h = MPMAX(p->compute_h, bh);
+}
+
+// w/h: the width/height of the compute shader's operating domain (e.g. the
+// target target that needs to be written, or the source texture that needs to
+// be reduced)
+// bw/bh: the width/height of the block (working group), which is tiled over
+// w/h as necessary
+static void dispatch_compute(struct gl_video *p, int w, int h, int bw, int bh)
+{
+ GL *gl = p->gl;
+
+ PRELUDE("layout (local_size_x = %d, local_size_y = %d) in;\n", bw, bh);
+
+ pass_prepare_src_tex(p);
+ gl_sc_set_vertex_format(p->sc, vertex_vao, sizeof(struct vertex));
+
+ // Since we don't actually have vertices, we pretend for convenience
+ // reasons that we do and calculate the right texture coordinates based on
+ // the output sample ID
+ gl_sc_uniform_vec2(p->sc, "out_scale", (GLfloat[2]){ 1.0 / w, 1.0 / h });
+ PRELUDE("#define outcoord(id) (out_scale * (vec2(id) + vec2(0.5)))\n");
+
+ for (int n = 0; n < TEXUNIT_VIDEO_NUM; n++) {
+ struct img_tex *s = &p->pass_tex[n];
+ if (!s->gl_tex)
+ continue;
+
+ // We need to rescale the coordinates to the true texture size
+ char tex_scale[32];
+ snprintf(tex_scale, sizeof(tex_scale), "tex_scale%d", n);
+ gl_sc_uniform_vec2(p->sc, tex_scale, (GLfloat[2]){
+ (float)s->w / s->tex_w,
+ (float)s->h / s->tex_h,
+ });
+
+ PRELUDE("#define texcoord%d_raw(id) (tex_scale%d * outcoord(id))\n", n, n);
+ PRELUDE("#define texcoord%d_rot(id) (texture_rot%d * texcoord%d_raw(id) + "
+ "pixel_size%d * texture_off%d)\n", n, n, n, n, n);
+ // Clamp the texture coordinates to prevent sampling out-of-bounds in
+ // threads that exceed the requested width/height
+ PRELUDE("#define texmap%d(id) min(texcoord%d_rot(id), vec2(1.0))\n", n, n);
+ PRELUDE("const vec2 texcoord%d = texmap%d(gl_GlobalInvocationID);\n", n, n);
+ }
+
+ pass_record(p, gl_sc_generate(p->sc, GL_COMPUTE_SHADER));
+
+ // always round up when dividing to make sure we don't leave off a part of
+ // the image
+ int num_x = (w + bw - 1) / bw,
+ num_y = (h + bh - 1) / bh;
+
+ gl->DispatchCompute(num_x, num_y, 1);
+ gl_sc_reset(p->sc);
+
+ debug_check_gl(p, "after dispatching compute shader");
+
+ memset(&p->pass_tex, 0, sizeof(p->pass_tex));
+ p->pass_tex_num = 0;
+}
+
static void render_pass_quad(struct gl_video *p, int vp_w, int vp_h,
const struct mp_rect *dst)
{
@@ -1169,7 +1241,7 @@ static void finish_pass_direct(struct gl_video *p, GLint fbo, int vp_w, int vp_h
GL *gl = p->gl;
pass_prepare_src_tex(p);
gl_sc_set_vertex_format(p->sc, vertex_vao, sizeof(struct vertex));
- pass_record(p, gl_sc_generate(p->sc));
+ pass_record(p, gl_sc_generate(p->sc, GL_FRAGMENT_SHADER));
gl->BindFramebuffer(GL_FRAMEBUFFER, fbo);
render_pass_quad(p, vp_w, vp_h, dst);
gl->BindFramebuffer(GL_FRAMEBUFFER, 0);
@@ -1187,10 +1259,23 @@ static void finish_pass_direct(struct gl_video *p, GLint fbo, int vp_w, int vp_h
static void finish_pass_fbo(struct gl_video *p, struct fbotex *dst_fbo,
int w, int h, int flags)
{
+ bool use_compute = p->compute_w > 0 && p->compute_h > 0;
+ if (use_compute)
+ flags |= FBOTEX_COMPUTE;
+
fbotex_change(dst_fbo, p->gl, p->log, w, h, p->opts.fbo_format, flags);
- finish_pass_direct(p, dst_fbo->fbo, dst_fbo->rw, dst_fbo->rh,
- &(struct mp_rect){0, 0, w, h});
+ if (use_compute) {
+ gl_sc_uniform_image2D(p->sc, "out_image", dst_fbo->texture,
+ dst_fbo->iformat, GL_WRITE_ONLY);
+ GLSL(imageStore(out_image, ivec2(gl_GlobalInvocationID), color);)
+ dispatch_compute(p, w, h, p->compute_w, p->compute_h);
+ } else {
+ finish_pass_direct(p, dst_fbo->fbo, dst_fbo->rw, dst_fbo->rh,
+ &(struct mp_rect){0, 0, w, h});
+ }
+
+ p->compute_w = p->compute_h = 0;
}
static const char *get_tex_swizzle(struct img_tex *img)
@@ -2479,7 +2564,7 @@ static void pass_draw_osd(struct gl_video *p, int draw_flags, double pts,
pass_colormanage(p, csp_srgb, true);
}
- pass_record(p, gl_sc_generate(p->sc));
+ pass_record(p, gl_sc_generate(p->sc, GL_FRAGMENT_SHADER));
mpgl_osd_draw_finish(p->osd, vp_w, vp_h, n, p->sc);
gl_sc_reset(p->sc);
}